- 论坛徽章:
- 0
|
回复 16# sunzhiguolu
您好!这个是我根据您的脚本所撰写的脚本:
#!/usr/bin/perl
use strict;
use warnings;
my$infile=shift||die"$_";
my$outfile=shift||die"$_";
open(IN,$infile);
open(OUT,">$outfile");
my@aData=();
while(<IN>){
chomp(my@aT=split(/,/));
my($id,$val)=@aT[0,-1];
unless(@aData){
@aData=($id,$val,$_);
next;
}
if($aData[0] ne $id){
print OUT splice(@aData,2);
@aData=($id,$val,$_);
next;
}
if($aData[1] == $val){
push(@aData,$_);
next;
}
@aData=($id,$val,$_)if($aData[1]<$val);
}
print OUT splice (@aData,2);
close IN;
close OUT;
输出的结果为
--DATA--
ENSACAP00000010464 gnl|UMD3.1|GK000025.2 65.71 70 24 0 32 101 211185 211394 1.00E-32 105
ENSACAP00000010464 gnl|UMD3.1|GK000025.2 65.71 70 24 0 32 101 214239 214448 1.00E-32 105
ENSACAP00000010464 gnl|UMD3.1|GK000025.2 53.12 32 15 0 1 32 210991 211086 1.00E-32 38.5
ENSACAP00000010464 gnl|UMD3.1|GK000025.2 53.12 32 15 0 1 32 214045 214140 1.00E-32 38.5
ENSACAP00000010464 gnl|UMD3.1|GK000025.2 41.86 43 25 0 100 142 211493 211621 1.00E-32 37.7
ENSACAP00000010464 gnl|UMD3.1|GK000025.2 41.86 43 25 0 100 142 214547 214675 1.00E-32 37.7
ENSACAP00000010464 gnl|UMD3.1|GK000025.2 45.59 68 37 0 32 99 209012 209215 1.00E-22 73.2
ENSACAP00000010464 gnl|UMD3.1|GK000025.2 55.32 47 21 0 96 142 209316 209456 1.00E-22 54.3
ENSACAP00000010464 gnl|UMD3.1|GK000025.2 55.71 70 31 0 32 101 193419 193628 2.00E-19 91.7
ENSACAP00000010464 gnl|UMD3.1|GK000025.2 58.82 68 28 0 34 101 217170 217373 3.00E-19 90.9
ENSACAP00000010464 gnl|UMD3.1|GK000025.2 54.79 73 33 0 29 101 207456 207674 9.00E-19 89.7
ENSACAP00000010464 gnl|UMD3.1|GK000011.2 53.42 73 34 0 70 142 650498 650280 4.00E-16 82
ENSACAP00000010464 gnl|UMD3.1|GK000015.2 30.46 151 58 5 3 108 48997720 48998166 4.00E-11 66.6
ENSACAP00000010464 gnl|UMD3.1|GK000015.2 35.53 76 43 1 32 101 49007223 49007450 7.00E-08 57
ENSACAP00000010464 gnl|UMD3.1|GK000015.2 38.16 76 41 2 32 101 49023242 49023469 5.00E-07 54.3
ENSACAP00000010464 gnl|UMD3.1|GK000015.2 44 50 28 0 52 101 49049345 49049494 1.00E-06 53.1
ENSACAP00000010464 gnl|UMD3.1|GK000015.2 36.84 76 42 2 32 101 49341123 49340896 1.00E-06 53.1
ENSACAP00000010464 gnl|UMD3.1|GK000015.2 36.84 76 42 2 32 101 49340178 49339951 1.00E-06 53.1
ENSACAP00000010464 gnl|UMD3.1|GK000015.2 36.84 76 42 2 32 101 49073678 49073905 1.00E-06 53.1
ENSACAP00000011963 gnl|UMD3.1|GK000015.2 50 160 40 2 2 121 48997711 48998190 3.00E-31 126
ENSACAP00000011963 gnl|UMD3.1|GK000015.2 68.54 89 27 1 33 120 49039473 49039739 3.00E-26 105
ENSACAP00000011963 gnl|UMD3.1|GK000015.2 46.67 30 16 0 2 31 49039265 49039354 3.00E-26 34.7
ENSACAP00000011963 gnl|UMD3.1|GK000015.2 61.45 83 32 0 33 115 49007226 49007474 1.00E-20 95.1
ENSACAP00000011963 gnl|UMD3.1|GK000015.2 49.57 115 53 1 33 147 49023245 49023574 1.00E-19 92.4
ENSACAP00000011963 gnl|UMD3.1|GK000015.2 69.33 75 23 0 33 107 49341120 49340896 2.00E-19 91.7
ENSACAP00000011963 gnl|UMD3.1|GK000015.2 69.33 75 23 0 33 107 49340175 49339951 2.00E-19 91.7
ENSACAP00000011963 gnl|UMD3.1|GK000015.2 69.33 75 23 0 33 107 49073681 49073905 2.00E-19 91.7
ENSACAP00000011963 gnl|UMD3.1|GK000015.2 63.89 72 26 0 36 107 49049279 49049494 1.00E-17 86.7
ENSACAP00000011963 gnl|UMD3.1|GK000015.2 58.67 75 31 0 33 107 49056621 49056845 6.00E-15 78.6
ENSACAP00000011963 gnl|UMD3.1|GK000015.2 61.33 75 29 0 33 107 49012677 49012901 2.00E-14 77
ENSACAP00000011963 gnl|UMD3.1|GK000015.2 53.49 43 20 0 106 148 48999230 48999358 2.00E-07 55.8
ENSACAP00000011963 gnl|UMD3.1|GK000015.2 52.38 42 20 0 107 148 49050335 49050460 3.00E-07 55.1
ENSACAP00000011963 gnl|UMD3.1|GK000015.2 48.84 43 22 0 106 148 49008286 49008414 6.00E-07 53.9
ENSACAP00000011963 gnl|UMD3.1|GK000015.2 54.76 42 19 0 107 148 49013817 49013942 2.00E-06 52.4
ENSACAP00000011963 gnl|UMD3.1|GK000025.2 40.85 71 36 2 37 107 211200 211394 6.00E-12 52.4
ENSACAP00000011963 gnl|UMD3.1|GK000025.2 40.85 71 36 2 37 107 214254 214448 6.00E-12 52.4
ENSACAP00000011963 gnl|UMD3.1|GK000025.2 42.86 42 24 0 106 147 211493 211618 6.00E-12 39.7
ENSACAP00000011963 gnl|UMD3.1|GK000025.2 42.86 42 24 0 106 147 214547 214672 6.00E-12 39.7
ENSACAP00000011963 gnl|UMD3.1|GK000025.2 41.33 75 38 1 33 107 193422 193628 3.00E-08 58.2
ENSACAP00000011963 gnl|UMD3.1|GK000025.2 41.33 75 38 1 33 107 207468 207674 4.00E-08 57.4
ENSACAP00000011963 gnl|UMD3.1|GK000025.2 34.25 73 42 1 33 105 209015 209215 1.00E-07 48.9
ENSACAP00000011963 gnl|UMD3.1|GK000025.2 34 50 29 1 102 147 209304 209453 1.00E-07 28.1
ENSACAP00000011963 gnl|UMD3.1|GK000025.2 43.4 53 30 0 65 117 217245 217403 3.00E-06 44.7
ENSACAP00000011963 gnl|UMD3.1|GK000025.2 41.94 31 18 0 114 144 217487 217579 3.00E-06 27.7
ENSACAP00000011963 gnl|UMD3.1|GK000011.2 36.99 73 46 0 75 147 650501 650283 1.00E-06 53.1
ENSACAP00000011963 gnl|UMD3.1|GK000015.2 50 160 40 2 2 121 48997711 48998190 3.00E-31 126
ENSACAP00000011963 gnl|UMD3.1|GK000015.2 68.54 89 27 1 33 120 49039473 49039739 3.00E-26 105
ENSACAP00000011963 gnl|UMD3.1|GK000015.2 46.67 30 16 0 2 31 49039265 49039354 3.00E-26 34.7
ENSACAP00000011963 gnl|UMD3.1|GK000015.2 61.45 83 32 0 33 115 49007226 49007474 1.00E-20 95.1
ENSACAP00000011963 gnl|UMD3.1|GK000015.2 49.57 115 53 1 33 147 49023245 49023574 1.00E-19 92.4
ENSAMEP00000007230 gnl|UMD3.1|GK000025.2 62.5 72 27 0 32 103 214239 214454 1.00E-32 97.1
ENSAMEP00000007230 gnl|UMD3.1|GK000025.2 65.12 43 15 0 100 142 214547 214675 1.00E-32 58.9
ENSAMEP00000007230 gnl|UMD3.1|GK000025.2 34.38 32 21 0 1 32 214045 214140 1.00E-32 26.2
ENSAMEP00000007230 gnl|UMD3.1|GK000025.2 62.5 72 27 0 32 103 211185 211400 1.00E-31 93.2
ENSAMEP00000007230 gnl|UMD3.1|GK000025.2 65.12 43 15 0 100 142 211493 211621 1.00E-31 58.9
ENSAMEP00000007230 gnl|UMD3.1|GK000025.2 34.38 32 21 0 1 32 210991 211086 1.00E-31 26.2
ENSAMEP00000007230 gnl|UMD3.1|GK000025.2 75.71 70 17 0 32 101 193419 193628 6.00E-29 119
ENSAMEP00000007230 gnl|UMD3.1|GK000025.2 72 75 21 0 27 101 207450 207674 6.00E-29 119
ENSAMEP00000007230 gnl|UMD3.1|GK000025.2 50 68 34 0 32 99 209012 209215 4.00E-22 75.5
ENSAMEP00000007230 gnl|UMD3.1|GK000025.2 48.94 47 24 0 96 142 209316 209456 4.00E-22 50.8
ENSAMEP00000007230 gnl|UMD3.1|GK000025.2 86.05 43 6 0 100 142 193823 193951 1.00E-15 80.5
ENSAMEP00000007230 gnl|UMD3.1|GK000025.2 88.1 42 5 0 100 141 207870 207995 2.00E-15 79.3
ENSAMEP00000007230 gnl|UMD3.1|GK000025.2 45.57 79 43 0 34 112 217170 217406 3.00E-12 70.5
ENSAMEP00000007230 gnl|UMD3.1|GK000025.2 78.12 32 7 0 1 32 192943 193038 2.00E-07 55.5
ENSAMEP00000007230 gnl|UMD3.1|GK000011.2 49.32 73 37 0 70 142 650498 650280 8.00E-15 77.8
ENSAMEP00000007230 gnl|UMD3.1|GK000015.2 44.74 76 36 2 32 101 48997918 48998145 9.00E-11 65.9
ENSAMEP00000007230 gnl|UMD3.1|GK000015.2 41.11 90 46 3 32 114 49039470 49039739 5.00E-09 60.5
ENSAMEP00000007230 gnl|UMD3.1|GK000015.2 39.47 76 40 2 32 101 49007223 49007450 2.00E-08 58.2
ENSAMEP00000007230 gnl|UMD3.1|GK000015.2 39.47 76 40 1 32 101 49341123 49340896 3.00E-08 58.2
ENSAMEP00000007230 gnl|UMD3.1|GK000015.2 39.47 76 40 1 32 101 49340178 49339951 3.00E-08 58.2
ENSAMEP00000007230 gnl|UMD3.1|GK000015.2 39.47 76 40 1 32 101 49073678 49073905 3.00E-08 58.2
ENSAMEP00000007230 gnl|UMD3.1|GK000015.2 38.16 76 41 1 32 101 49023242 49023469 3.00E-07 54.7
ENSAMEP00000007230 gnl|UMD3.1|GK000015.2 38.89 72 38 2 38 103 49049285 49049500 4.00E-07 54.3
ENSAMEP00000007233 gnl|UMD3.1|GK000025.2 89.36 47 5 0 96 142 209316 209456 6.00E-34 89
ENSAMEP00000007233 gnl|UMD3.1|GK000025.2 50 68 34 0 32 99 209012 209215 6.00E-34 77
ENSAMEP00000007233 gnl|UMD3.1|GK000025.2 75.71 70 17 0 32 101 193419 193628 5.00E-30 122
ENSAMEP00000007233 gnl|UMD3.1|GK000025.2 72 75 21 0 27 101 207450 207674 8.00E-30 122
ENSAMEP00000007233 gnl|UMD3.1|GK000025.2 62.86 70 26 0 32 101 214239 214448 3.00E-29 98.6
ENSAMEP00000007233 gnl|UMD3.1|GK000025.2 51.16 43 21 0 100 142 214547 214675 3.00E-29 45.8
ENSAMEP00000007233 gnl|UMD3.1|GK000025.2 34.38 32 21 0 1 32 214045 214140 3.00E-29 26.2
ENSAMEP00000007233 gnl|UMD3.1|GK000025.2 62.86 70 26 0 32 101 211185 211394 9.00E-29 97.1
ENSAMEP00000007233 gnl|UMD3.1|GK000025.2 51.16 43 21 0 100 142 211493 211621 9.00E-29 45.8
ENSAMEP00000007233 gnl|UMD3.1|GK000025.2 34.38 32 21 0 1 32 210991 211086 9.00E-29 26.2
ENSAMEP00000007233 gnl|UMD3.1|GK000025.2 50 68 34 0 34 101 217170 217373 2.00E-13 73.6
ENSAMEP00000007233 gnl|UMD3.1|GK000025.2 78.12 32 7 0 1 32 192943 193038 2.00E-07 55.1
ENSAMEP00000007233 gnl|UMD3.1|GK000025.2 55.81 43 19 0 100 142 193823 193951 1.00E-06 52.8
ENSAMEP00000007233 gnl|UMD3.1|GK000011.2 75.34 73 18 0 70 142 650498 650280 1.00E-28 118
ENSAMEP00000007233 gnl|UMD3.1|GK000015.2 44.74 76 36 2 32 101 48997918 48998145 7.00E-11 66.2
ENSAMEP00000007233 gnl|UMD3.1|GK000015.2 40.96 83 43 2 32 108 49039470 49039718 2.00E-09 61.6
我想要每一个序列中,score值最高的那一行的内容(红字标注)。
这个脚本我写了上去,输出的结果如下,不知道是不是我哪里写错了还是添加输入文件输出文件的时候对脚本造成了影响。
这个脚本是我根据您第二个脚本写得。
#!/usr/bin/perl
use strict;
use warnings;
my $INfile=shift||die"$_";
my $OUTfile=shift||die"$_";
open(IN,$INfile);
open(OUT,">$OUTfile");
my %Data=();
while(<IN>){
chomp(my@T=split(/,/));
my($id,$score)=@T[0,-1];
push(@{$Data{$id}},[$score,$_]);
}
foreach(keys%Data){
my@T=sort{$b->[-1] <=> $a->[-1]}@{$Data{$_}};
print OUT map(@{$_}[-1],grep(@{$T[0]}[0] == @{$_}[0],@T));
}
close IN;
close OUT;
输出的结果如下:
ENSMEUP00000000639 gnl|UMD3.1|GK000025.2 44 75 36 2 31 105 211185 211391 3.00E-09 61.2
ENSMEUP00000000639 gnl|UMD3.1|GK000025.2 44 75 36 2 31 105 211185 211391 3.00E-09 61.2
ENSMEUP00000000639 gnl|UMD3.1|GK000025.2 44 75 36 2 31 105 211185 211391 3.00E-09 61.2
ENSLOCP00000009436 gnl|UMD3.1|GK000025.2 42.65 68 39 0 35 102 209012 209215 4.00E-12 52.4
ENSTGUP00000004511 gnl|UMD3.1|GK000025.2 53.06 49 23 0 95 143 193805 193951 2.00E-07 55.8
ENSOARP00000020551 gnl|UMD3.1|GK000015.2 74.68 79 20 0 56 134 49073669 49073905 1.00E-33 134
ENSOARP00000020551 gnl|UMD3.1|GK000015.2 74.68 79 20 0 56 134 49073669 49073905 1.00E-33 134
ENSMLUP00000011277 gnl|UMD3.1|GK000025.2 42.55 47 27 0 96 142 209316 209456 1.00E-16 42.7
ENSFALP00000010579 gnl|UMD3.1|GK000015.2 64.52 31 11 0 1 31 49039265 49039357 3.00E-40 43.5
ENSFALP00000010579 gnl|UMD3.1|GK000015.2 64.52 31 11 0 1 31 49039265 49039357 3.00E-40 43.5
ENSFALP00000010579 gnl|UMD3.1|GK000015.2 64.52 31 11 0 1 31 49039265 49039357 3.00E-40 43.5
ENSFALP00000010579 gnl|UMD3.1|GK000015.2 64.52 31 11 0 1 31 49039265 49039357 3.00E-40 43.5
ENSGGOP00000026624 gnl|UMD3.1|GK000015.2 43.42 76 37 2 32 101 48997918 48998145 5.00E-10 63.5
ENSMUSP00000020531 gnl|UMD3.1|GK000015.2 36.84 76 42 1 32 101 49341123 49340896 3.00E-07 55.1
ENSGACP00000019116 gnl|UMD3.1|GK000025.2 47.22 72 38 0 31 102 214239 214454 4.00E-24 78.6
ENSMICP00000033506 gnl|UMD3.1|GK000015.2 38.89 72 38 2 38 103 49049285 49049500 1.00E-06 53.1
ENSPANP00000013424 gnl|UMD3.1|GK000025.2 57.14 70 30 0 32 101 193419 193628 2.00E-17 85.9
ENSXETP00000053076 gnl|UMD3.1|GK000015.2 61.33 75 29 0 31 105 49073678 49073902 3.00E-14 76.3
结果输出,基本上每个序列都只有一个输出结果,于是我将序列代码复制下来,在原始文件中寻找了下,比如ENSMEUP00000000639,发现在原始文件中,这个序列对应的有更优的结果。其他的序列也有一些是并没有将最优结果输出的,我尝试改了下脚本,但是也没有成功。
很抱歉一直打扰您,如果您有时间的话,希望得到您的答复!感谢!
ENSMEUP00000000639 | gnl|UMD3.1|GK000015.2 | 65.97 | 144 | 10 | 1 | 1 | 105 | 48997711 | 48998142 | 3.00E-51 | 184 |
该表为ENSMEUP00000000639的最优结果。
|
|