主要特点:
1、第7列数据中的逗号及|不处理
2、可以输出异常数据,如示例用555的这一行合法数据,666的这一行少一个数据列,为异常数据,仅输出到异常数据中
3、可以调试
4、如果对异常数据也想正常输出,改为r=match($0,"(^[^\\|]*)\\|?([^,]*),?([^,]*),?([^,]*),?([^,]*),?([^,]*),?(.*)$",a);即可
限制:
标准awk可能不支持带数组参数的match函数,如果没有gawk,试试nawk
echo "3453343|32444,asd,42,345sa,asda5445,asdsa|,asdaas,,asd,,,asdadsad||sadad,sad~@%asdasd32432|
232|as2d,421,3452sa,asda2544,asd121sa,erasdaaaas,asdasdadsadsadad,sad~|@%asdasdasdsda|
ab2|aspp2d,ass,23,aaa,bbb,ddsdd,asdasdadsadsadad,sad~|@%asdasd
333|555|bbb,ccc,ddd
555|a2,a3,a4,a5,a6,
666|a2,a3,a4,a5,a6
"|gawk '
{
r=match($0,"(^[^\\|]*)\\|([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),(.*)$",a);
#r=match($0,"(^[^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),(.*)$",a); #如果第1列也用,分隔,注解上一句,改用这一句.
#printf("(%d,%s;%s,%s,%s,%s,%s,%s,%s):%s\n",r,RLENGTH,a[1,"length"],a[2,"length"],a[3,"length"],a[4,"length"],a[5,"length"],a[6,"length"],a[7,"length"],$0);#本句调试用,可加#注解掉
if (r>0){
printf("%-9s,%-12s,%-7s,%-8s,%-15s,%-300s\n",a[2],a[3],a[4],a[5],a[6],a[7]);
} else if($0!=""){
#printf("%d行数据异常(%s,%s,%s,%s,%s,%s,%s):%s\n",NR,a[1,"length"],a[2,"length"],a[3,"length"],a[4,"length"],a[5,"length"],a[6,"length"],a[7,"length"],$0) >>"error.log";#输出异常数据,如果不需要可加#注解掉
}
}'
输出结果:
32444 ,asd ,42 ,345sa ,asda5445 ,asdsa|,asdaas,,asd,,,asdadsad||sadad,sad~@%asdasd32432|
as2d ,421 ,3452sa ,asda2544,asd121sa ,erasdaaaas,asdasdadsadsadad,sad~|@%asdasdasdsda|
aspp2d ,ass ,23 ,aaa ,bbb ,ddsdd,asdasdadsadsadad,sad~|@%asdasd
a2 ,a3 ,a4 ,a5 ,a6 ,
[ 本帖最后由 zhangshebao 于 2009-4-21 00:03 编辑 ] |