- 论坛徽章:
- 12
|
本帖最后由 523066680 于 2018-11-07 09:17 编辑
改了一下,用Mojo::UserAgent,默认keep_alive,连header都不用提供,速度快好多
- =info
- Author: 523066680/vicyang
- Date: 2018-11
- =cut
- use Encode;
- use Mojo::UserAgent;
- use File::Slurp;
- use File::Basename qw/basename/;
- use File::Path qw/mkpath/;
- STDOUT->autoflush(1);
- our $theme = "meishi";
- our $wdir = "D:/Wallpaper/zol/$theme";
- our $main = "http://desk.zol.com.cn";
- our $ua = Mojo::UserAgent->new();
- our @headers = (
- "Host" => "desk.zol.com.cn",
- "User-Agent" => "Firefox/63.0",
- );
- mkpath $wdir unless -e $wdir;
- chdir $wdir;
- # 获取所有主题链接
- my @items;
- my $iter = 1;
- while ( get_item( $main ."/${theme}/${iter}.html", \@items ) >= 1 )
- {
- $iter++;
- }
- # 遍历页面、提取图片
- my $idx = 1;
- for my $item ( @items )
- {
- printf "[%03d/%d] %s %s\n", $idx++ , $#items+1, $item->{link}, $item->{title};
- get_pages( $item->{link}, $item->{title} );
- }
- sub get_item
- {
- my ( $link, $ref ) = @_;
- my $res = $ua->get( $link )->result;
- my $dom = $res->dom;
- for my $e ( $dom->find(".photo-list-padding")->each )
- {
- printf "%s %s\n", $e->at("a")->attr("href"), $e->at("span")->attr("title");
- push @$ref, {'link' => $main . $e->at("a")->attr("href"),
- 'title' => $e->at("span")->attr("title") };
- }
- # 判断是否为最后一页
- if ( defined $dom->at("#pageNext") ) { return 1 }
- else { return 0 }
- }
- # --- Get each pages of item --- #
- sub get_pages
- {
- my ($link, $title) = @_;
- my $res = $ua->get( $link )->result;
- my $dom = $res->dom;
- my $path = "${wdir}/${title}";
- mkpath $path unless -e $path;
- chdir $path;
- # 图片数量
- my $pics = $dom->at(".photo-list-box li i")->text;
- $pics=~s/[^\d]//; #去除斜杠
- my @files = glob "*.jpg";
- if ( $#files+1 == $pics ) {
- printf "Images already exist\n";
- return;
- }
- for my $e ($dom->find(".photo-list-box a")->each )
- {
- #printf "%s\n", $e->attr("href");
- get_pic( $main . $e->attr("href") );
- }
- }
- sub get_pic
- {
- my ( $link ) = @_;
- my $res = $ua->get( $link )->result;
- return unless (defined $res);
- my $dom = $res->dom;
- my $pic_url;
- my $pic_name;
- my $obj = $dom->at(".wallpaper-down dd a");
- my $sub_url;
- while (1)
- {
- $sub_url = $obj->attr("href");
- # 某些图片没有提供指定分辨率的链接
- if ( $sub_url !~/\.html/ ) {
- printf "Did not found picture url, skip %s\n", $sub_url;
- return;
- }
- $pic_name = basename($sub_url);
- $pic_name =~ s/\.html/\.jpg/i;
- printf "%s\n", $pic_name;
- return if ( -e $pic_name );
- my $res = $ua->get( $main .$sub_url )->result;
- return unless (defined $res);
- my $dom = $res->dom;
- my $res = $ua->get( $dom->at("")->attr("src") )->result;
-
- # 如果下载失败就选择下一个分辨率的图片
- if ( $res->code == 502 ) { $obj = $obj->next; next; }
- write_file( $pic_name, {binmode=>":raw"}, $res->body );
- last;
- }
- }
复制代码
|
评分
-
查看全部评分
|