- 论坛徽章:
- 7
|
本帖最后由 b114213903 于 2015-08-13 13:06 编辑
回复 21# 一串儿葡萄皮
应该是你的氨基酸序列中存在非氨基酸字符。请仔细检查!- #!/usr/bin/perl
- use strict;
- use Bio::SeqIO;
- use Bio::Seq;
- my $fasta=shift @ARGV;
- (my $Out=$fasta)=~s/(\.[^\.]+)$/_out$1/;
- open (IN,"<$fasta") or die "Open $fasta failed!\n";
- my $Out=Bio::SeqIO->new(-file=>">$Out",-format=>'fasta');
- my ($flag,$seq,$id,$desc)=();
- while(my $line=<IN>){
- chomp($line);
- if($line=~/\>/){
- if($flag){
- print "$id\t$desc\n$seq\n";
- $seq=~s/[^ABCDEFGHIKLMNPQRSTVWXYZ]/X/gi; #把非氨基酸字符转换为未知序列
- my $SEQ_OBJ=Bio::Seq->new(-seq=>$seq,-id=>$id,-desc=>$desc,-alphabet=>'protein');
- $Out->write_seq($SEQ_OBJ);
- }
- $line=~s/[\"\>]//g;
- ($id,$desc)=split (/\s+/,$line,2);
- $seq=undef;
- $flag=1;
- }else{
- $seq.=$line;
- }
- }
- print "$id\t$desc\n$seq\n";
- $seq=~s/[^ABCDEFGHIKLMNPQRSTVWXYZ]/X/gi; #把非氨基酸字符转换为未知序列
- my $SEQ_OBJ=Bio::Seq->new(-seq=>$seq,-id=>$id,-desc=>$desc,-alphabet=>'protein');
- $Out->write_seq($SEQ_OBJ);
- $Out->close();
复制代码 试试这个转换以后可不可以 |
|