- 论坛徽章:
- 145
|
回复 1# summeraing
- $ perl gen_ABCN.pl
- Usage : gen_ABCN.pl Table_file
- Example: gen_ABCN.pl t.txt
- === table file ===
- ChrID d1 d2 d3 d4 d5 d6
- Chr1:1 A B A C N A
- Chr1:2 - C N B A N
- ...
- $ perl gen_ABCN.pl t.txt
- Output file: A.txt
- Output file: B.txt
- Output file: C.txt
- Output file: N.txt
复制代码
- $ grep '.*' {A,B,C,N}.txt
- A.txt:site d1 d2 d3 d4 d5 d6
- A.txt:Chr1:01-10 2 0 1 1 2 2
- A.txt:Chr1:11-20 1 0 1 0 1 2
- A.txt:Chr1:21-30 4 1 1 1 2 2
- A.txt:Chr2:01-10 2 0 2 1 2 4
- A.txt:
- A.txt:Chr1total 7 1 3 2 5 6
- A.txt:Chr2total 2 0 2 1 2 4
- A.txt:totalA 9 1 5 3 7 10
- B.txt:site d1 d2 d3 d4 d5 d6
- B.txt:Chr1:01-10 0 1 1 1 1 0
- B.txt:Chr1:11-20 0 1 0 2 0 0
- B.txt:Chr1:21-30 1 0 4 0 0 2
- B.txt:Chr2:01-10 1 2 1 1 1 0
- B.txt:
- B.txt:Chr1total 1 2 5 3 1 2
- B.txt:Chr2total 1 2 1 1 1 0
- B.txt:totalB 2 4 6 4 2 2
- C.txt:site d1 d2 d3 d4 d5 d6
- C.txt:Chr1:01-10 1 1 0 1 0 1
- C.txt:Chr1:11-20 0 1 0 1 0 0
- C.txt:Chr1:21-30 1 2 1 0 2 2
- C.txt:Chr2:01-10 1 1 0 2 0 1
- C.txt:
- C.txt:Chr1total 2 4 1 2 2 3
- C.txt:Chr2total 1 1 0 2 0 1
- C.txt:totalC 3 5 1 4 2 4
- N.txt:site d1 d2 d3 d4 d5 d6
- N.txt:Chr1:01-10 0 0 1 1 1 1
- N.txt:Chr1:11-20 0 1 2 0 2 1
- N.txt:Chr1:21-30 0 2 1 2 1 0
- N.txt:Chr2:01-10 0 0 1 1 3 1
- N.txt:
- N.txt:Chr1total 0 3 4 3 4 2
- N.txt:Chr2total 0 0 1 1 3 1
- N.txt:totalN 0 3 5 4 7 3
复制代码
$ cat gen_ABCN.pl
- use strict;
- use warnings;
- my $sFile_display = 0;
- my $sItem_check = 1;
- my @aItem = qw(A B C N -);
- my %hItem = map {$_ => 1} @aItem;
- my @aHead;
- my %hChr;
- my $sHead;
- sub usage{
- print <<__USAGE__;
- Usage : $0 Table_file
- Example: $0 t.txt
- === table file ===
- ChrID d1 d2 d3 d4 d5 d6
- Chr1:1 A B A C N A
- Chr1:2 - C N B A N
- ...
- __USAGE__
- exit 1;
- }
- usage() if(@ARGV == 0);
- my $sCnt_head = 0;
- my $sCnt = 0;
- my $sLen = 0;
- while(<>){
- ++$sCnt;
- chomp;
- my $sLine = $_;
- s/^\s+|\s$//g;
- next if(m/^(#|$)/);
- # ChrID d1 d2 d3 d4 d5 d6
- # Chr1:1 A B A C N A
- my($sChr_num, @aData) = split;
- if(m/^ChrID\s/){
- @aHead = @aData;
- $sLen = scalar @aHead;
- $sHead = $sLine;
- $sCnt_head = $sCnt;
- next;
- }
- die "*** error: Cannot get 'ChrID' header\n" if($sLen == 0);
- if(scalar @aData != $sLen){
- print "*** error *** Line[$sCnt]: get wrong items number\n";
- printf(" Line[%03d]: $sHead\n",$sCnt_head);
- printf(" Line[%03d]: $sLine\n",$sCnt);
- # exit 1;
- }
- # Chr1:1 A B A C N A
- # Chr1 : 1
- my($sChr, $sNum) = split(/:/, $sChr_num);
- # @aData = A B A C N A
- foreach(0 .. $#aData){
- my $sItem = $aData[$_];
- if($sItem_check != 0 && (! exists $hItem{$sItem})){
- print "*** error *** Line[$sCnt]: get wrong item '$sItem'\n";
- print " check items: @aItem\n";
- printf(" Line[%03d]: $sLine\n", $sCnt);
- # exit 1;
- }
- # for initial value (0)
- if(! exists $hChr{$sItem}{"Total"}{$sChr}){
- @{$hChr{$sItem}{"Total"}{$sChr}} = (0) x $sLen;
- }
- if(! exists $hChr{$sItem}{"Number"}{$sChr}{int(($sNum-1)/10)}){
- @{$hChr{$sItem}{"Number"}{$sChr}{int(($sNum-1)/10)}} = (0) x $sLen;
- }
- if(! exists $hChr{$sItem}{"All"}){
- @{$hChr{$sItem}{"All"}} = (0) x $sLen;
- }
- # for Chr1:1-10, Chr2:11-20, ...
- ++$hChr{$sItem}{"Number"}{$sChr}{int(($sNum-1)/10)}[$_];
- # for Chr1total, Chr2total
- ++$hChr{$sItem}{"Total"}{$sChr}[$_];
- # for totalA, totalB, ...
- ++$hChr{$sItem}{"All"}[$_];
- }
- }
- my $sFmt = "%-12s " . (" %5s" x @aHead) . "\n";
- foreach my $sItem(sort keys %hChr){
- next if($sItem eq "-");
- # for head, site d1 d2 d3 d4 d5 d6
- my $sOut = sprintf($sFmt, "site", @aHead);
- foreach my $sChr(sort keys %{$hChr{$sItem}{"Number"}}){
- my $rhChr = $hChr{$sItem}{"Number"}{$sChr};
- # for Chr1:01-10 2 0 1 1 2 3
- # for Chr1:11-20 1 0 1 0 1 2
- foreach(sort{ $a<=>$b } keys %{$rhChr}){
- # Chr1:01-10 <== Chr1: 01 - 10
- my $sChr_num = sprintf("$sChr:%02d-%02d",$_*10+1,$_*10+10);
- $sOut .= sprintf($sFmt, $sChr_num, @{$rhChr->{$_}});
- }
- }
- $sOut .= "\n";
- # for Chr1total 7 1 3 2 5 7
- # for Chr2total ...
- foreach my $sChr(sort keys %{$hChr{$sItem}{"Total"}}){
- $sOut .= sprintf($sFmt, "${sChr}total", @{$hChr{$sItem}{"Total"}{$sChr}});
- }
- # for totalA ..., totalB ..., ...
- $sOut .= sprintf($sFmt, "total$sItem", @{$hChr{$sItem}{"All"}});
- # output file
- my $sFout = "$sItem.txt";
- open(my $FHout, ">", $sFout) or die "cannot open $sFout file\n";
- print {$FHout} $sOut;
- close $FHout;
- print "Output file: $sFout\n";
- print $sOut if($sFile_display);
-
- }
复制代码
|
|