- 论坛徽章:
- 8
|
- #!/usr/bin/perl -w
- my(%h1, %h2);
- $/="\>";
- <DATA>;
- while(<DATA>){
- chomp;
- my($g_t, @fastq) = (split /\n/);
- my($gene, $tran) = (split /\|/,$g_t);
- push @{$h2{$tran}},@fastq;
- $h1{$gene} ||= $tran ;
- $h1{$gene} = $tran if length(join("",@{$h2{$h1{$gene}}})) < length(join("",@fastq));
- }
- $/="\n";
- foreach my $gene(keys %h1){
- print ">$gene|$h1{$gene}\n";
- print join("\n",@{$h2{$h1{$gene}}});
- print $/;
- }
- __DATA__
- >ENSG00000104671|ENST00000522141
- ATGGCGGAGAAGACTCAAAAGAGTGTGAAGATTGCTCCTGGAGCAGTTGTATGTGTAGAA
- AGTGAAATCAGAGGAGATGTAACTATCGTTACCCAGATAATATCACTCCTGACACTGAAG
- ATCCAGAACCAAAACCTATGA
- >ENSG00000104671|ENST00000520829
- ATGGCGGAGAAGACTCAAAAGAGTGTGAAGATTGCTCCTGGAGCAGTTGTATGTGTAGAA
- AGTGAAATCAGAGGAGATGTAACTATCGGACCTCGGACAGTGATCCACCCTAAAGCAAGA
- ATTATTGCGGAAGCCGGGCCAATAGTGATTGGCGAAGGGAACCTAATAGAAGAACAGGCC
- CTTATCATAAATGCTTACCCAGATAATATCACTCCTGACACTGAAGATCCAGAACCAAAA
- CCTATGATCATTGGCACCAATAATGTGTTTGAAGTTGGCTGTTATTCCCAAGCCATGAAG
- ATGGGAGATAATAATGTCATTGAATCAAAAGCATATGTAGGCAGAAATGTAATATTGACA
- AGTGGCTGCATCATTGGGGCTTGTTGCAACCTAAATACATTTGAAGTCATCCCTGAGAAT
- ACGGTGATCTATGGTGCAGACTGCCTTCGTCGGGTGCAGACTGAGCGACCGCAGGTACTA
- GAACCTCTCTTTAAAAAGAGTTCTATCTGCTGA
- >ENSG00000104671|ENST00000523666
- ATGGCGGAGAAGACTCAAAAGAGTGTGAAGATTGCTCCTGGAGCAGTTGTATGTGTAGAA
- AGTGAAATCAGAGGAGATGTAACTATCGGACCTCGGACAGTGATCCACCCTAAAGCAAGA
- ATTATTGCGGAAGCCGGGCCAATAGTGATTGGCGAAGGGAACCTAATAGAAGAACAGGCC
- CTTATCATAAATGCATTCCCAAGCCATGAAGATGGGAGATAA
- >ENSG00000205339|ENST00000533233
- ATGGACCCCAACACCATTATCGAGGCCCTGCGGGGCACTATGGACCCAGCCCTGCGTGAG
- GCCGCGGAGCGCCAGCTCAATGAAACGGAGTTTCGCTCTTGTTGCCCAGACTGGAGTGCA
- ATGGCACAATCTCGGCTCGCCACAACCTCCGCCTCCCGGATTCGGGCAATTCTCCTGCCT
- CAGCCTCTTGAGTAG
- >ENSG00000205339|ENST00000527431
- ATGATAACACAGTATTGGCCTGATCGAGAAACAGCACCAGGGGATATATCCCCTTATACT
- ATTCCAGAAGAAGATCGCCATTGTATTCGAGAAAATATTGTAGAAGCCATTATCCATTCT
- CCTGAGCTCATCAGGGTACAGCTTACTACATGCATTCATCACATCATCAAACATGATTAT
- CCAAGCCGCTGGACTGCCATTGTGGACAAAATTGGCTTTTATCTTCAGTCCGATAACAGT
- GCTTGTTGGCTAGGAATTCTTCTTTGCCTTTATCAGCTTGTGAAAAATTATGAGTATAAA
- AAACCAGAGGAGCGGAGTCCATTGGTAGCAGCAATGCAGCATTTTCTGCCAGTTCTAAAG
- GATCGTTTTATCCAGCTTCTTTCTGACCAGTC
复制代码 |
|