- 论坛徽章:
- 12
|
更新,先处理a-zA-Z
- =info
- Code by 523066680
- 2017-06 V2.0
- Bug1 忘了windows文件名不分大小写,"Ta.txt" == "TA.txt",改为用ASCII码
- Bug2 数值排序,而不是字符串排序 -- 例如:"9" 应小于 "21"
- =cut
- #!/usr/bin/perl
- use strict;
- use IO::Handle;
- STDOUT->autoflush(1);
- our $SRC = "A.txt";
- our $DST = "Final.txt";
- our %letter;
- our @group = map { ord($_) } ('a'..'z', 'A'..'Z');
- our $wd = "./tempfolder";
- mkdir $wd if (not -e $wd);
- ANALYSE_AND_EXPORT:
- {
- my %FH;
- #提前准备多个文件句柄
- grep { open $FH{$_}, ">:raw", "$wd/T${_}.txt" or die "$!" } @group;
- open READ, "<:raw", $SRC or die "$!";
- while ( my $line = <READ> )
- {
- next if ( $line=~/^\r?\n$/ );
- if ($line=~/([a-zA-Z])/)
- {
- $letter{$1} = 0 if ( not exists $letter{$1} );
- $letter{$1}++;
- select $FH{ord($1)};
- print $line;
- }
- }
- #清理所有文件句柄
- grep { close $FH{$_} } @group;
- close READ;
- select STDOUT;
- }
- COLLECT_DATA:
- {
- my $RESULT;
- my $TempFH;
- open $RESULT, ">:raw", $DST or die "$!";
- #按出现的次数排列出优先顺序
- my @rank = sort_byValue(\%letter);
- for my $name ( @rank )
- {
- print "Reading F$name.txt ... \n";
- open $TempFH, "<:raw", "$wd/T$name.txt" or die "$!";
- while (my $line = <$TempFH>)
- {
- print $RESULT $line;
- }
- close $TempFH;
- }
- print "Please check $DST\n";
- }
- sub sort_byValue
- {
- my $ref = shift;
- my @arr;
- #不知道有没有绕弯路 =_=
- @arr = reverse sort { "$a,$b," =~/-(\d+),.*-(\d+),/; $1 <=> $2 }
- map { $_ ."-". $ref->{$_} } keys %$ref;
- my ($char, $times);
- for (@arr)
- {
- ($char, $times) = split("-", $_);
- print "char: $char, times: $times\n";
- }
- return ( map { /,(.)$/; $_ = ord($_) } @arr );
- }
复制代码 |
|