- 论坛徽章:
- 0
|
回复 5# wh7211
awk是4.01 version的
我进行如下代码:
awk '/^>/{a=NR;b[a]=$0;next}{c[a]=c[a]?c[a]"\n"$0 0}END{PROCINFO["sorted_in"]="@ind_num_asc";for(i in c){d=length(c);printf("%s\n%s%s\n",b,substr(c,1,100),substr(c,d-99))}}' ptaeda_unpaired_filter.fasta|less -S
>C30462902_9_1183
aaccccaaattttgggcgccgcatggaaaccccaaattttgtgcgccgggtggaaacccc
aaattttgggtgccgggtggaaaccccaaattttgggcgaaa
ttttgggcgccgcatggaaaccccaaattttgggcgctaggtgcaaaccccaaattttgg
gcgccgcatggaaaccccaaattttgggcgctagg
>C30487642_123_1263
cccaaattttgggcgctgggtgaaaaccccaaattttgtgcgccgcatggaaaccccaaa
ttttgggcgctcggtggaaaccccaaattttgggcgccgttttggccgccgcatggaaaccccaaattttgggcgc
cgagtggaaaccccaaattttgagcaccgcatggaaaccccaaatttcgggcgccgggtg
g
>C30492480_19_1274
gcgccgcatggaaaccccaaattttggtcgccgagtggaaaccccaaatttgggcgcggc
tggaaaccccaaattttgggcgccgggtggaaaccccaagggtggaaaccccaaattttgggcgccggttggaaaccccaaa
ttttgggcgccgcatggaaaccccaaattttgggcaccgagtggaaaccccaaatt
随后计算长度
awk '/^>/{a=NR;b[a]=$0;next}{c[a]=c[a]?c[a]"\n"$0 0}END{PROCINFO["sorted_in"]="@ind_num_asc";for(i in c){d=length(c);printf("%s\n%s%s\n",b,substr(c,1,100),substr(c,d-99))}}' ptaeda_unpaired_filter.fasta|awk '{print $1}' | awk '/^>/&&NR>1{print "";}{ printf "%s",/^>/ ? $0" " 0 }END{printf("%s\n", $0)}' |awk '{print $1"\t"length($2)}'|less -S
>C30462902_9_1183 197
>C30487642_123_1263 197
>C30492480_19_1274 198
很奇怪不能得到一样的结果。
以下是序列,比较长
>C30462902_9_1183
aaccccaaattttgggcgccgcatggaaaccccaaattttgtgcgccgggtggaaacccc
aaattttgggtgccgggtggaaaccccaaattttgggcgccatggcgctgcatggaaacc
ccaaattctgggtgctctgtggaaaccccaaattttggctgcggcatgcaaaccccaaat
tttgggcgccgagtggaaaccccaaatttggccgtggcatggaatccccaaattttgggc
accggggggaaaccccaaattttgagcgccgcatggaaaccccaaattttgggcgctgga
tggaaaccccaaattttacgcgccgggtggaaaccccaagttttgggcgcagcatggaaa
cccgaaatcttgggcgccgggtggaaaccccaaattttgggcgctctgtggaaacccaaa
attttggccgccacatggaaaccccaaattttcggcgccgagtggaagcctaaattttgg
gcgccgcatggaaaccccaaattttgggcgccggttggaaagcccaaattttgggcgcta
ggtggaaaccctaaatttttggccccgcatggaaaccccaaattttcggcgccgagtcga
aaccccaaatcttgggcgccatgtcgaaaccccaaatttttggcgctgggtggaaacccc
aaattttgggcgccgcatagaaaccccaaattttgggcgccgcatggaaacaccaaattt
tgggcgttgagtggaaaccccaaattttgggcggaaaccccgaattttgtgcggaaaccc
caaattttgggtggaaaccccgaatttgggcgcggcatggaaaacccaaattttgggtgc
cgggtggaaaccccaaatttagggcgctgaggcgccgcatggaaaccccaaattttgggc
gctgggggaaaccccaaattttgggtgccgcatggaaaccccaaattttgggcgttgagt
ggaaaccccaaattttgggcaccgggtggaaacccaaaattttgggcgctgggtggaaac
cccaaattttggccaccgcatagaaaccccaaattttggacaccaagtggaaagcccaaa
ttttgggcgccgcatggaaaccccaaattttgggcgctaggtgcaaaccccaaattttgg
gcgccgcatggaaaccccaaattttgggcgctagg
>C30487642_123_1263
cccaaattttgggcgctgggtgaaaaccccaaattttgtgcgccgcatggaaaccccaaa
ttttgggcgctcggtggaaaccccaaattttgggcgccgggtgtaaaccccaaatttggc
cgcagcatggaaacaccaaattttgggcaccggacggaaaccccaaattttgagcgccgc
atggaaaccccaaattgtgggcaccgggtggaaacctcaaattttgggcgccgggtggaa
accccaaattttgggcgccgggtggaaaccccaaattttgggcgccggggcgccgaatgg
aaacaccaaattttgggcgccaggtggaaaccccaaatttttggcgctaggtgaaagccc
caaattttgggtgccgcatggaaaccccaaattttgggcgccgagtggaaaccccaaatt
ttgggtgctgggtggaagccccaaattttgggcgctgggtggaagccccaaattttgggc
atcgagtgaaaaccccaaagtttgggcgccgcatggaaaccccaaattttgggcaccggg
ttaaaaccccaaattttgggagccgcatggaaaccccaaattttgggcgccgcatgcaaa
ccccaatttttggtcgttggatggaaacgccaaatttgggcgcggcatggaaaccccaaa
ttttgggcgtcgggtggaaaccccaaattttggggccgggtggaaaccccaaattttggg
cgccggctggaaaccccaaattttggcgcgggctggaaaccccaaatttgggcgtggggt
ggaaaccccaaattttgggagccgggtggaaaccccaaattttgggcgctaggtggaaac
cccaaattttgggtgccaggtggaaattgcaaattttgagcaccggatggaaacccccaa
ttttgggtgccgcgtggaaaccccaaattttgggcgccttatggaaatcccaaattttgg
gcggcgggtggaaaccccaaattttcagcgccgtggcgccgcatggaaaccccaaatttt
gggcgctcggcggaaaccccaaattttggccgccgcatggaaaccccaaattttgggcgc
cgagtggaaaccccaaattttgagcaccgcatggaaaccccaaatttcgggcgccgggtg
g
>C30492480_19_1274
gcgccgcatggaaaccccaaattttggtcgccgagtggaaaccccaaatttgggcgcggc
tggaaaccccaaattttgggcgccgggtggaaaccccaaattttgggcgccggctagaaa
ccccaaatttttggcgccagctggaaaccccaaatttgggcgcggggtggaaaccccaaa
cttggggcgcctggtggaaaccccaaattttgggtgccgggtggaaaccccaaaatttga
gcgctgcatggaaaccccaaattttgggcgtcagatggaaaccccaaattttggggccga
gtggaaaccccaatttttgggcgcagggtggaaaccccaaatttgggcgccaggtggaaa
ccccaaattttgggcgccgagtggaaaccccaaattttaggcgccaggtggaaacccgaa
attttgggtgccaagtggaaaccccaaattttgtgtgccgagtggaaaccccaagttttg
ggcgccgcatggaaaccccaaattttgggcgctcggtgggaaccccaaattttggccgcc
gcatggaaaccccaaattttggccaccgagtggaaaccccaaattttgggcgccgcatgt
aaaccccaaattttgggcgccaggtggaagccccaagttttgggcgctaggaggaaagct
caaattttcggccgcatggaaaccccaaattttgggcgccgggtggaaacccaaaatttt
gggcgccacatggaaaccccaaattttgggcgccgggtggaaaccccaaattttgggcac
cgcgtgcaaaccccaaattttgggcgctggctggaaaccccaaattttgggcgccacatg
gaaaccccaaattttgggcgtcgcacggaaaccctaaatttcgggcgccgcatggaaacc
ccaaatttgggcgcggcatggaaaccccaaatgttgggcgcggcatggaaaccccaaatg
ttgggcgcctaggggaaaccccaaattttgggcgccgggtggaaaccccaaattttgggc
accgcatggaaaccccaaattttgggtgcagggtggaaaccccaaattttgggcgcaggg
tggaaaccccaaattttgggcgctgggtggaaaccccaaattttgggcgccgggtggaaa
ccccaaatttgggcacagggtggaaaccccaaattttgggcgccggttggaaaccccaaa
ttttgggcgccgcatggaaaccccaaattttgggcaccgagtggaaaccccaaatt
|
|