- 论坛徽章:
- 0
|
本帖最后由 大山里出来的孩子 于 2014-08-13 08:47 编辑
下面大神的那个代码你看懂了吗 ?像我这个应该怎样去修改?其中输入的两篇中英文已经经过分句处理了
my %dic = qw[
Egypt 埃及
state 国家
media 电台
reporting 报道
deaths 丧生
Egyptians 埃及人
Libyan 利比亚
capital 首都
Tripoli 的里波里
rocket 火箭
hit 击中
home 住所
near 附近
airport 机场
fighting 激战
rival 对立
militias 民兵
entering 进入
third 第三
week 星期
];
open($INCH,"< $fn1_IN ") || die " failed to open the input file1";
open($INEN,"< $fn2_IN ") || die " failed to open the input file2";
open($OUT, "> $fn_OUT") || die " failed to open the output file";
while($lines=<$INCH>){
$/ = '。';
@CH=$lines;
#print @CH;
}
while($lines=<$INEN>){
$/ = '.';
@EN=$lines;
#print @EN;
}
close($INCH);
close($INEN);
my $i = 0;
for my $e (@EN) {
my $begin = $i;
my @sentence = map { [/\w+/g] } split /,/, $e; #将读入的每句话按照,进行切分然后赋给数组
for my $ws (@sentence) {
my $index = $#{$ws} >= 3 ? [ 0, $#{$ws} / 2, -1 ] : [ 0, -1 ];
my @word = @$ws[@$index];
my $ok = 0;
say "CHECK: [ ", join( '|', @word ) . ' ]';
NEXT: for my $w (@word) {
++$ok and next unless exists $dic{$w};
my $c = $dic{$w};
if ( $CH[$i] =~ /$c/ ) {
say "$w\t$c = ok";
$ok++;
}
}
unless ( $ok > @word * 2 / 3 ) {
say "FAIL !!\tcheck next sentence";
++$i and goto NEXT;
}
sleep 1;
}
print '=' x 24;print "\n";
print "match\t\@CH[ $begin .. $i ]";print "\n";
print '-' x 24;print "\n";
print $e;print "\n";
print @CH[ $begin .. $i++ ];print "\n";
print '-' x 24;print "\n";
}
回复 4# huang6894
|
|