- 论坛徽章:
- 7
|
- #!/usr/bin/perl
- use Bio::SeqIO;
- use Bio::Seq;
- use strict;
- my $file=shift @ARGV or die "Usage:\t$0\tseq_file\n";
- my $SeqIO=Bio::SeqIO->new(-file=>"$file",-format=>"fasta") or die "Open seq_file failed!\n";
- my ($flag,$length,$sequence,%ids,%out)=();
- while(my $seq=$SeqIO->next_seq()){
- (my $id= $seq->id)=~s/\_[\s\S]+$//;
- my $tag=$seq->id;
- my $len=$seq->length;
- my $temp_seq=$seq->seq;
- if(! $ids{$id}){
- $ids{$id}++;
- if($seq and $flag){
- $out{$flag}=$sequence;
- }
- $length=$len;
- $flag=$tag;
- $sequence=$temp_seq;
- }else{
- if($length<$len){
- $length=$len;
- $flag=$tag;
- $sequence=$temp_seq;
- }
- }
- }
- $out{$flag}=$sequence;
- $SeqIO->close();
- my $Seq_OUT=Bio::SeqIO->new(-file=>">results.fasta",-format=>"fasta") or die "Write to fasta file failed!\n";
- foreach my $key (keys %out){
- print ">$key\n$out{$key}\n";
- my $Seq=Bio::Seq->new(-id=>"$key",-seq=>"$out{$key}",-alphabeta=>"PRO");
- $Seq_OUT->write_seq($Seq);
- }
- $Seq_OUT->close();
- print "Finished!\n";
复制代码
|
|