awk '{match($6,/[0-9]+S/);len=substr($6,RSTART,RLENGTH)+0;str=$1"_"gsub("-","",$9)+1}NR==FNR{gsub(/[0-9]*D|[0-9]*S86M|[0-9]*S/,"",$6);split($6,a,/[A-Z]/);for(i in a) start+=a[i];print str"\n"substr($10,start,len)"\n+\n"substr($11,start,len)"\n";start=0;next}{str2=$6;gsub(/[0-9]*I|[0-9]*S86M|[0-9]*S/,"",$6);split($6,a,/[A-Z]/);for(i in a) start+=a[i];print str,str2,$4+start;start=0}' file file
[root@localhost ~]# awk '{s=0;n=gensub(/.*(^|[^0-9])([0-9]*)S.*/,"\\2",$6);gsub(/[0-9]*D|[0-9]*S.*/,"",$6);split($6,a,"[MI]");for(i in a)s+=a[i];N=$9>0?1:2;print $1"_"N"\n"substr($10,s+1,n)"\n+\n"substr($11,s+1,n)"\n"}' i
awk '{N=$9>0?1:2;s=0;t=$6;gsub(/[0-9]*[^S0-9]/,"",t);l=split(t,a,"S");if(l<3){t=$6;gsub(/[0-9]*(I|S.*)/,"",t);split(t,b,"[DM]");for(i in b)s+=b[i];print $1"_"N,$6,$4+s}else{print $1"_"N,$6,$4;t=$6;gsub(/[0-9]*(I|S$)/,"",t);split(t,b,"[DMS]");for(i in b)s+=b[i];print $1"_"N,$6,$4+s}}'
awk '{s=$6;gsub(/[0-9]+D|[0-9]+I|[0-9]+M|S$/,"",s);m=$6;gsub(/[0-9]+S|[0-9]+D/,"",m);n=split(m,a,"[MI]");t=0;if (match(m,"I")) for (i=1;i<=n;i++) t+=a[i];n=split(s,a,"S");for (i=1;i<=n;i++) if (length(substr($10,t,a[i]))>1&& length(substr($11,t,a[i]))>1) printf "%s_%d\n%s\n%s\n%s\n\n",$1,($9>0?1:2),substr($10,t,a[i]),"+",substr($11,t,a[i])}' data
复制代码
问题二,可指定字符串长度(通过strlen指定):
awk -v strlen=10 '{m=$6;gsub(/D/,"I",m);gsub(/[0-9]+S/,"",m);n=split(m,a,"[MI]");t=0;if (match(m,"I")) for (i=1;i<=n;i++) t+=a[i];if (t>strlen) printf "%s_%d\t%s\t%d\n\n",$1,($9>0?1:2),$6,$4+t}' data
awk '{N=$9>0?1:2;s=0;t=$6;gsub(/[0-9]*[^S0-9]/,"",t);l=split(t,a,"S");if(l<3){t=$6;gsub(/[0-9]*(I|S.*)/,"",t);split(t,b,"[DM]");for(i in b)s+=b[i];if(s>=10)print $1"_"N,$6,$4+s}else{print $1"_"N,$6,$4;t=$6;gsub(/[0-9]*(I|S$)/,"",t);split(t,b,"[DMS]");for(i in b)s+=b[i];if(s>=10)print $1"_"N,$6,$4+s}}'