- 论坛徽章:
- 3
|
- #!/usr/bin/perl -w
- use warnings;
- use strict;
- use Encode;
- use HTML::Entities;
- use HTML::TreeBuilder;
- my $encode = 'gbk';# change it according to your env
- my $html = <<'__HTML__';
- <body lang=ZH-CN link=blue vlink=purple style='tab-interval:21.0pt;text-justify-trim:
- punctuation'>
- <p style='margin-top:0pt;margin-right:0pt;margin-bottom:0pt;margin-left:.0pt; '><span face=宋体><span class=GramE><span class=grame><span lang=EN-US style='font-size:9.0pt; font-family:宋体'><b> Perl </b><span face=宋体>是不同的语言。</span></b><span face=宋体>从一开始, </span><b><span face=宋体> Perl</span></b><span face=宋体>就设计成可以把简单工作简单化,</span><span
- face=宋体><span style="mso-spacerun:yes">同时又不失</span></span><span face=宋体>去处理困难问题</span>
- <span face=宋体>能力的语言。</span><o:p></o:p></span></span></span></span></p>
- </body>
- __HTML__
- {
- my $h = HTML::TreeBuilder->new_from_content( decode($encode, $html) );
- my $p = $h->look_down(_tag => q{p});
- my $mainspan;
- for my $span( $h->look_down(_tag => q{span}) ){
- if ( defined $span->attr('lang') ) {
- $span->attr(lang=>undef);
- $mainspan = $span;
- last;
- }
- }
- #print decode_entities($mainspan->as_HTML), "\n";
- for my $span( $h->look_down(_tag => q{span}) ){
- $span->replace_with_content($span->content_refs_list);
- }
- my @content = $p->content_list;
- $p->detach_content();
- $mainspan->push_content(@content);
- $p->push_content($mainspan);
- print encode( $encode, $h->as_HTML('<>&',' ',{}) ), "\n";
- $h->delete;
- }
- __END__
- <html>
- <head>
- </head>
- <body lang="ZH-CN" link="blue" style="tab-interval:21.0pt;text-justify-trim:
- punctuation" vlink="purple">
- <p style="margin-top:0pt;margin-right:0pt;margin-bottom:0pt;margin-left:.0pt; "><span style="font-size:9.0pt; font-family:宋体"><b> Perl </b>是不同的语言。从一开始, <b> Perl</b>就设计成可以把简单工作简单化,同时又不失去处理困难问题 能力的语言。</span></p>
- </body>
- </html>
复制代码 |
|