- 论坛徽章:
- 5
|
本帖最后由 pitonas 于 2013-12-22 10:32 编辑
{:2_171:}小伙伴们, 0grade6.txt... 编码 UTF-16LE, 所以无法找到苹果
try
1: save all your files 编码 UTF-8
2: 试试你的所有文件转换为UTF-8
README !!
http://mocha-c-163-com.iteye.com/blog/583064
Change File Encoding to utf-8 via vim in a script
http://stackoverflow.com/questio ... via-vim-in-a-script
试试这个脚本? 小伙伴们, 如果你不满意,请告诉我- #!/usr/bin/perl
- my $Dir = '/xyz';
- # README! newword.txt 每行字後面没有空格
- my $newword = '/abc/newword.txt';
- my $phone = '/abc/phonetic.txt';
- my $save = '/abc/ok.txt';
- my $t = qr/[^\.\?\!]/;
- my $e = qr/[\.\?\!]/;
- open my $dic, $newword or die "$newword:\t$!";
- open my $pho, $phone or die "$phone:\t$!";
- my %dic = map { chomp; $_, [] } <$dic>;
- my @word = keys %dic;
- my %phone = map @$_, grep $dic{ $_->[0] }, map [split], <$pho>;
- sub findtxt {
- my $dir = shift;
- map { -d $_ ? findtxt($_) : /\.txt$/ ? $_ : () } glob "$dir/*";
- }
- for my $file ( findtxt $Dir) {
- print "$file\n";
- open my $f, $file or die "$file:\t$!";
- my $data = do { local $/; <$f> };
- $data =~ s/[\r\n]|\s{2,}/ /g;
- for my $w (@word) {
- next unless $data =~ /\b$w/i;
- while ( $data =~
- /\s*\|?\s*((?:$t*?$e){0,1}$t*?\b($w\w*)$t*?$e(?:$t*?$e){0,1})/ig )
- {
- my $string = $1;
- my $w2 = $2;
- $string =~ s/\b$w2/[ $w2 ]/ig;
- push @{ $dic{$w} }, "$w $phone{$w}\n$string\n# $file";
- }
- }
- }
- open my $S, '>', $save or die "$save:\t$!";
- for my $k ( sort keys %dic ) {
- next unless @{ $dic{$k} };
- print $S join( "\n\n", @{ $dic{$k} } ), "\n\n";
- }
复制代码 |
|