- 论坛徽章:
- 5
|
{:2_172:} 大神! 这个, 内存占用小了。- #!/usr/bin/perl
- my %code = (
- 'TCA' => 'S', # Serine
- 'TCC' => 'S', # Serine
- 'TCG' => 'S', # Serine
- 'TCT' => 'S', # Serine
- 'TTC' => 'F', # Phenylalanine
- 'TTT' => 'F', # Phenylalanine
- 'TTA' => 'L', # Leucine
- 'TTG' => 'L', # Leucine
- 'TAC' => 'Y', # Tyrosine
- 'TAT' => 'Y', # Tyrosine
- 'TAA' => '_', # Stop
- 'TAG' => '_', # Stop
- 'TGC' => 'C', # Cysteine
- 'TGT' => 'C', # Cysteine
- 'TGA' => '_', # Stop
- 'TGG' => 'W', # Tryptophan
- 'CTA' => 'L', # Leucine
- 'CTC' => 'L', # Leucine
- 'CTG' => 'L', # Leucine
- 'CTT' => 'L', # Leucine
- 'CCA' => 'P', # Proline
- 'CCC' => 'P', # Proline
- 'CCG' => 'P', # Proline
- 'CCT' => 'P', # Proline
- 'CAC' => 'H', # Histidine
- 'CAT' => 'H', # Histidine
- 'CAA' => 'Q', # Glutamine
- 'CAG' => 'Q', # Glutamine
- 'CGA' => 'R', # Arginine
- 'CGC' => 'R', # Arginine
- 'CGG' => 'R', # Arginine
- 'CGT' => 'R', # Arginine
- 'ATA' => 'I', # Isoleucine
- 'ATC' => 'I', # Isoleucine
- 'ATT' => 'I', # Isoleucine
- 'ATG' => 'M', # Methionine
- 'ACA' => 'T', # Threonine
- 'ACC' => 'T', # Threonine
- 'ACG' => 'T', # Threonine
- 'ACT' => 'T', # Threonine
- 'AAC' => 'N', # Asparagine
- 'AAT' => 'N', # Asparagine
- 'AAA' => 'K', # Lysine
- 'AAG' => 'K', # Lysine
- 'AGC' => 'S', # Serine
- 'AGT' => 'S', # Serine
- 'AGA' => 'R', # Arginine
- 'AGG' => 'R', # Arginine
- 'GTA' => 'V', # Valine
- 'GTC' => 'V', # Valine
- 'GTG' => 'V', # Valine
- 'GTT' => 'V', # Valine
- 'GCA' => 'A', # Alanine
- 'GCC' => 'A', # Alanine
- 'GCG' => 'A', # Alanine
- 'GCT' => 'A', # Alanine
- 'GAC' => 'D', # Aspartic Acid
- 'GAT' => 'D', # Aspartic Acid
- 'GAA' => 'E', # Glutamic Acid
- 'GAG' => 'E', # Glutamic Acid
- 'GGA' => 'G', # Glycine
- 'GGC' => 'G', # Glycine
- 'GGG' => 'G', # Glycine
- 'GGT' => 'G', # Glycine
- );
- # @ARGV = qw/sca cd/;
- my ( $one, $two ) = @ARGV;
- open my ($TWO), $two;
- my %cds;
- while (<$TWO>) {
- my ( $chr, $s, $e, $p ) = ( split /\s+/, $_, 6 )[ 0, 3, 4, 2 ];
- next if $p ne 'CDS';
- my $l = $e - $s + 1;
- push @{ $cds{$chr} }, [ $s, $l, $_ ];
- }
- open OUT, '>', 'Sequence_extraction_part.fa' or die "don't worry";
- open AA, '>', 'aa_extraction_part.fa' or die "be happy";
- open my ($ONE), $one;
- $/ = '>';
- <$ONE>;
- while (<$ONE>) {
- my ( $key, @seq ) = split /\n/;
- ($key) = $key =~ /^(\w+)/;
- next unless exists $cds{$key};
- my $seq = join '', @seq;
- chop $seq if $seq =~ />$/;
- for my $k ( @{ $cds{$key} } ) {
- extract( $seq, @$k );
- }
- }
- close OUT;
- close AA;
- sub extract {
- my ( $SEQ, $start, $len, $m ) = @_;
- my $Seq = substr $SEQ, $start - 1, $len;
- my $aa;
- while ( $Seq =~ /(.{3})/g ) {
- $aa .= $code{$1} || bad_codon($1);
- }
- my ( @OUT, @AA );
- my ( $lenseq, $lenaa ) = ( length($Seq), length($aa) );
- for ( my $j = 0 ; $j * 60 < $lenseq ; $j++ ) {
- push @OUT, substr( $Seq, $j * 60, 60 ), "\n";
- }
- for ( my $i = 0 ; $i * 60 < $lenaa ; $i++ ) {
- push @AA, substr( $aa, $i * 60, 60 ), "\n";
- }
- print OUT ">$m", @OUT;
- print AA ">$m", @AA;
- }
- sub bad_codon {
- my $bad = shift;
- print qq[Bad codon "$bad"!!\n];
- exit;
- }
复制代码 回复 10# huang6894
|
|