- 论坛徽章:
- 0
|
本帖最后由 Perlvim 于 2013-05-03 18:26 编辑
String::NestMatch 模块的 nest_match 方法可以处理:
它接受一个首尾字符串散列作为参数,不但可以处理单字符为标志的结构,也能处理多字符为标志的结构,能够处理任意深度。使用了最基本的字符串替换算法,可以在 sed, awk, Lua, 等算法简单的语言中实现。
测试代码:- #!perl
- use 5.014;
- use YAML qw(Dump);
- use String::NestMatch qw(nest_match);
- my $str = 'if a then if b then if c then d end end end if f then g end';
- say(Dump(nest_match($str, { if => 'end'})));
- my $text = "<table><tr><td>aaa</td></tr></table>";
- say(Dump(nest_match($text, { '<tr>' => '</tr>', '<td>' => '</td>' })));
- my $str1 = 'if a then for b in c end end';
- say(Dump(nest_match($str1, { 'if' => 'end', 'for' => 'end' })));
复制代码 输出:- >perl -w test_nest_match.pl
- ---
- 1:
- - if a then if b then if c then d end end end
- - if f then g end
- 2:
- - if b then if c then d end end
- 3:
- - if c then d end
- ---
- 1:
- - '<tr><td>aaa</td></tr>'
- 2:
- - '<td>aaa</td>'
- ---
- 1:
- - if a then for b in c end end
- 2:
- - for b in c end
- >Exit code: 0 Time: 0.549
复制代码 模块源代码:- package String::NestMatch;
- use Exporter;
- our @ISA = qw(Exporter);
- our @EXPORT_OK = qw(nest_match);
- use 5.010;
- use strict;
- use warnings;
- use YAML qw(Dump);
- my $count = 127;
- my $id_char = {};
- my $char_id = {};
- sub apply_id_char {
- my $id = shift;
- $count++;
- my $char = chr($count);
- $id_char->{$id} = $char;
- $char_id->{$char} = $id;
- return $char;
- }
- sub char_id_in_text {
- my ($text, @id) = @_;
- foreach my $id (@id) {
- if (length($id) == 1) {
- $char_id->{$id} = $id;
- $id_char->{$id} = $id;
- next;
- }
- next if (exists $id_char->{$id});
- my $char = apply_id_char($id);
- if ($id =~ /^\w+$/) {
- $text =~ s/\b$id\b/$char/g;
- }
- elsif ($id =~ /\w$/) {
- $text =~ s/\Q$id\E\b/$char/g;
- }
- elsif ($id =~ /^\w/) {
- $text =~ s/\b\Q$id\E/$char/g;
- }
- else {
- $text =~ s/\Q$id\E/$char/g;
- }
- }
- return $text;
- }
- sub nest_match {
- my ($str, $rule) = @_;
- my $match_start = {};
- my $start_end_id = {};
- while (my ($start_str, $end_str) = each %$rule) {
- $str = char_id_in_text($str, $start_str, $end_str);
- # say $str;
- my $start_char = $id_char->{$start_str};
- my $end_char = $id_char->{$end_str};
- if (exists $match_start->{$start_char}) {
- $match_start->{$start_char}{$end_char} = 1;
- }
- else {
- $match_start->{$start_char} = { $end_char => 1 };
- }
- }
- # default depth
- my $depth = 0;
- my $depth_chars = { 0 => [] };
- # according depth to save matched string
- my $depth_match_str = { };
- my $depth_start_char = { 0 => '' };
- my $expect_end_chars = {};
- my @text_chars = split //, $str;
- foreach my $char (@text_chars) {
- if (exists $expect_end_chars->{$char}) {
- push @{$depth_chars->{$depth}}, $char_id->{$char};
- my $depth_str = join '', @{ $depth_chars->{$depth} };
- if (exists $depth_match_str->{$depth}) {
- push @{$depth_match_str->{$depth}}, $depth_str;
- }
- else {
- $depth_match_str->{$depth} = [ $depth_str ];
- }
- $depth = $depth - 1;
- push @{ $depth_chars->{$depth} }, $depth_str;
- my $current_start_char = $depth_start_char->{$depth};
- if ($depth == 0) {
- $expect_end_chars = {};
- } else {
- $expect_end_chars = $match_start->{$current_start_char};
- }
- if (exists $match_start->{$char}) {
- $depth = $depth + 1;
- $depth_chars->{$depth} = [ $char_id->{$char} ];
- $expect_end_chars = $match_start->{$char};
- $depth_start_char->{$depth} = $char;
- }
- }
- else {
- if (exists $match_start->{$char}) {
- $depth = $depth + 1;
- $depth_chars->{$depth} = [ $char_id->{$char} ];
- $expect_end_chars = $match_start->{$char};
- $depth_start_char->{$depth} = $char;
- }
- else {
- push @{ $depth_chars->{$depth} }, $char;
- }
- }
- }
- $count = 127; $id_char = {}; $char_id = {};
- return $depth_match_str;
- }
- 1;
复制代码 |
|