- 论坛徽章:
- 145
|
回复 1# newfinder
- $ cat data1.txt
- A B C D E F
- A C D E F A
- B D C E F A
- E F D C G A
- $ perl get_col.pl data1.txt > data2.txt
- $ cat data2.txt
- A:2, 0.50 B:1, 0.25 C:2, 0.50 C:1, 0.25 E:1, 0.25 A:3, 0.75
- B:1, 0.25 C:1, 0.25 D:2, 0.50 D:1, 0.25 F:2, 0.50 F:1, 0.25
- E:1, 0.25 D:1, 0.25 E:2, 0.50 G:1, 0.25
- F:1, 0.25
复制代码
$ cat get_col.pl
- use strict;
- use warnings;
- sub mesg{
- print "Usage: $0 FILE\n";
- print "Example: $0 data1.txt\n";
- exit(1);
- }
- mesg if @ARGV !=1;
- my @aCol;
- while(<>){
- s/^\s+|\s+$//g;
- next if (m/^(#|$)/);
- my @aData = split;
- foreach( 0 .. scalar @aData-1) {
- $aCol[$_]{'count'}{$aData[$_]}++;
- $aCol[$_]{'total'}++;
- }
- }
- my @aResult;
- my $sCol = 0;
- my $sMax_cnt = 0;
- my $sMax_len = 0;
- foreach (@aCol) {
- my $sTotal = $_->{'total'};
- my $sCnt = 0;
- foreach my $sKey (sort keys %{$_->{'count'}}) {
- my $sVal = $_->{'count'}{$sKey};
- my $sStr = sprintf("%s:%d, %.2f", $sKey, $sVal, $sVal/$sTotal);
- my $sLen = length($sStr);
- push @{$aResult[$sCol]},$sStr;
- $sMax_cnt = $sCnt if($sMax_cnt < $sCnt);
- $sMax_len = $sLen if($sMax_len < $sLen);
- ++$sCnt;
- }
- ++$sCol;
- }
- foreach my $sCnt ( 0 .. $sMax_cnt) {
- my $sLine = "";
- foreach (0 .. $sCol-1) {
- my $sStr = $aResult[$_]->[$sCnt] // "";
- $sLine .= sprintf("%*s ", $sMax_len, $sStr);
- }
- print "$sLine\n";
- }
复制代码
|
|