- 论坛徽章:
- 6
|
本帖最后由 stanley_tam 于 2013-10-14 21:34 编辑
果然是伸手党。假设文件名为test.txt,一次读取一百万行(看自己需求了,自己改)。。。看你上面的行长度,一百万估计就30M。
不过你要的是第二行排序。。。自己想办法咯{:3_195:}- #!perl
- use strict;
- use autodie qw{open close};
- use v5.10;
- our %tmp_files = ();
- sub main;
- sub split_file_by_line;
- sub get_fh;
- sub merge_sorted_file;
- main();
- sub main {
- my $line_number = 1000000;
- my $big_file = 'test.txt';
- my $sorted_file = q{};
- split_file_by_line($big_file, $line_number);
- my $tag = 1;
- while (keys %tmp_files > 1) {
- my ($file1, $file2) = (keys %tmp_files)[0,1];
- my $merged_file = "merge_$tag.txt";
- merge_sorted_file($file1, $file2, $merged_file);
- unlink $file1;
- unlink $file2;
- delete $tmp_files{$file1};
- delete $tmp_files{$file2};
- $tmp_files{$merged_file}++;
- $tag++;
- }
- $sorted_file = (keys %tmp_files)[0];
- print "\$sorted_file => [$sorted_file]\n";
- <STDIN>;
- }
- sub merge_sorted_file {
- my ($file1, $file2, $output_file) = @_;
- my $fh1 = get_fh('<', $file1);
- my $fh2 = get_fh('<', $file2);
- my $output_fh = get_fh('>', $output_file);
- my $smaller_one = q{};
- my $larger_one = q{};
- my $line_f1 = <$fh1>;
- my $line_f2 = <$fh2>;
- chomp $line_f1;
- chomp $line_f2;
- while (1) {
- if ($line_f1 le $line_f2) {
- $smaller_one = $line_f1;
- $larger_one = $line_f2;
- if (eof $fh1) {
- say {$output_fh} $smaller_one;
- say {$output_fh} $larger_one;
- while (my $line = <$fh2>) {
- chomp $line;
- say {$output_fh} $line;
- }
- last;
- }
- else {
- chomp($line_f1 = <$fh1>);
- }
- }
- else {
- $smaller_one = $line_f2;
- $larger_one = $line_f1;
- if (eof $fh2) {
- say {$output_fh} $smaller_one;
- say {$output_fh} $larger_one;
- while (my $line = <$fh1>) {
- chomp $line;
- say {$output_fh} $line;
- }
- last;
- }
- else {
- chomp($line_f2 = <$fh2>);
- }
- }
- say {$output_fh} $smaller_one;
- }
- close $fh1;
- close $fh2;
- close $output_fh;
- }
- sub split_file_by_line {
- my ($file, $line_number) = @_;
- my $tag = 1;
- my $fh = get_fh('<', $file);
- OUTER:
- while (1) {
- my @lines = ();
- INNER:
- for my $lno (1 .. $line_number){
- last INNER if eof $fh;
- my $line = <$fh>;
- chomp $line;
- push @lines, $line;
- }
- if (scalar @lines) {
- @lines = sort {$a cmp $b} @lines;
- my $tmp_file_name = "tmp_$tag.txt";
- my $tmp_fh = get_fh('>', $tmp_file_name);
- say {$tmp_fh} $_ for @lines;
- close $tmp_fh;
- $tmp_files{$tmp_file_name}++;
- $tag++;
- }
- else {
- last OUTER;
- }
- }
- }
- sub get_fh {
- my ($mode, $file) = @_;
- my $fh;
- open $fh, $mode, $file;
- return $fh;
- }
- __END__
复制代码 回复 8# huang6894
|
|