- 论坛徽章:
- 1
|
用于多个isoform的code
那个ID的问题 fix了- #!/usr/bin/perl
- use strict;
- use warnings;
- my %id;
- my %seq;
- #load orthoMCL result from MSU
- open FILE, "<$ARGV[0]" or die "$!\n";
- while(<FILE>){
- chomp;
- #skip header line
- if($. == 1){
- next;
- }
- #get ortholog groups
- my ($orthoID,undef,$speciesCount,@line)=split(/\s+/,$_);
- for(my $i=$speciesCount;$i<scalar(@line);$i++){
- $id{$orthoID}{$line[$i]}="";
- $seq{$line[$i]}="";
- }
- }
- close FILE;
- #scanning through the seq
- my $switch=0;
- my $id="";
- my $isoformID;
- open FILE, "<$ARGV[1]" or die "$!\n";
- while(<FILE>){
- chomp;
- chop;
- if($_ =~ />([\w\.]+)/){
- $switch =0;
- $id=$1;
- $isoformID=$id;
- $id=~s/(.{4,})[\._].*/$1/;
- if(defined $seq{$id}){
- $switch = 1;
- $seq{$id}.=">$isoformID\n";
- }
- }
- elsif($_ =~ /^[ATCGatcg]/ && $switch == 1){
- $seq{$id}.="$_\n";
- }
- }
- close FILE;
- foreach my $orthoID(keys %id){
- open OUT, ">$orthoID.fa" or die "$!\n";
- foreach my $id(keys %{$id{$orthoID}}){
- print OUT "$seq{$id}";
- }
- close OUT;
- }
复制代码 |
|