- 论坛徽章:
- 0
|
#! /usr/bin/env perl
use strict;
use warnings;
# GB2312 其编码范围是高位0xa1-0xfe,低位也是0xa1-0xfe;汉字从0xb0a1开始,结束于0xf7fe。
# GB2312的汉字编码规则为:第一个字节的值在0xB0到0xF7之间,第二个字节的值在0xA0到0xFE之间。
while (<DATA>) {
my @t = (split)[1 .. 16];
while (@t) {
my $t1 = shift @t;
if (in_range($t1)) {
my $t2 = shift @t;
if (in_range($t2)) {
printf "%c%c", hex $t1, hex $t2;
next;
} else {
unshift @t, $t2;
}
}
}
}
sub in_range {
$_ = shift;
return (hex $_ >= 0xa1)&&(hex $_ <= 0xfe);
}
__DATA__
00000000 04 00 00 00 00 00 00 00 A8 0C 00 00 B0 14 00 00 ........?..?..
00000010 7C 1A 00 00 9F 86 01 00 00 00 00 00 13 00 00 00 |...焼..........
00000020 08 00 00 00 35 00 00 00 06 00 00 00 25 00 00 00 ....5.......%...
00000030 34 00 00 00 1A 00 00 00 2E 00 00 00 D6 D5 D3 DA 4...........终于
00000040 B8 E6 D2 BB B6 CE C2 E4 C1 CB B0 A1 A1 A3 CB B5 告一段落了啊。说
00000050 C6 F0 C0 B4 CB FD D2 B2 B2 EE B2 BB B6 E0 B8 C3 起来她也差不多该
00000060 C0 B4 C1 CB B0 C9 A1 AD A1 AD 00 00 FF FF FF FF 来了吧…….. |
|