- 论坛徽章:
- 145
|
回复 1# huang6894
Would you like this ...
# awk '{k=sprintf("%s_%020d",$5,$2);for(n=0;n++<6;)a[k,n]=$n;c[k]}END{$5="";t=asorti(c,s);for(n=0;n++<t+1;){N=s[n];if($5==a[N,5]&&$3+1>=a[N,2])a[N,2]=$2;else if($5)print ">"$5"_E"++C,$6,$1,$4,$2,$3;if($5!=a[N,5])C=0;for(m=0;m++<6;)$m=a[N,m]}}' FILE
>NM_001002919_E1 FAM150B refseq2 - 279363 280352
>NM_001002919_E2 FAM150B refseq2 - 282911 283375
>NM_001002919_E3 FAM150B refseq2 - 285923 286543
>NM_001002919_E4 FAM150B refseq2 - 287383 288508
>NM_021170_E1 HES4 refseq1 - 934142 935752
>NM_152486_E1 SAMD11 refseq1 + 860921 861593
>NM_152486_E2 SAMD11 refseq1 + 865335 865916
>NM_152486_E3 SAMD11 refseq1 + 866219 866669
>NM_152486_E4 SAMD11 refseq1 + 870952 871476
>NM_152486_E5 SAMD11 refseq1 + 874220 875040
>NM_152486_E6 SAMD11 refseq1 + 876324 876886
>NM_152486_E7 SAMD11 refseq1 + 877316 880161
|
|