- 论坛徽章:
- 0
|
本帖最后由 nsnake 于 2010-02-26 17:07 编辑
主要就是查看该卖家在指定时期内都卖了哪些商品,数量和价格
因为淘宝卖出的数据有一定的缓冲区,所以查询的数据并非100%是准确的,但如果你查询的时间段越长,数据会越精确
使用也比较简单
修改- my $url='http://rate.taobao.com/user-rate-a2af4af68a32e8f046e8703fed064284.htm';
复制代码 为你需要的店家信用评价的地址即可。
-
- #!/usr/bin/perl -w
- #####################################
- #淘宝卖家卖出商品数量价格查询 #
- #BY cgi.net <loveme1314@sakuras.cn> #
- #http://blog.sakuras.cn #
- #####################################
- use strict;
- use LWP::UserAgent;
- use HTTP::Cookies;
- use HTML::TreeBuilder;
- #use HTTP::Request;
- #use Data::Dumper;
- $|=1;
- my $url='http://rate.taobao.com/user-rate-a2af4af68a32e8f046e8703fed064284.htm';
- #查询天数
- my $day = 7;
- #设置显示列表
- my %conf = (
- num => 1,
- tite => 1,
- price => 1,
- url => 0,
- );
- ###########################################################
- $url =~ /rate\-(.*)\.htm/;
- my $user = $1;
- my @ns_headers = (
- 'User-Agent' => 'Mozilla/4.76 [en] (Win2000; U)',
- 'Accept' => 'image/gif, image/x-xbitmap, image/jpeg,
- image/pjpeg, image/png, */*',
- 'Accept-Charset' => 'iso-8859-1,*,utf-8',
- 'Accept-Language' => 'en-US',
- );
- #只查看好评过的商品
- our $good_url = "http://rate.taobao.com/user-rate-$user--detailed|1--goodNeutralOrBad|1--timeLine|-$day--receivedOrPosted|0--buyerOrSeller|0.htm#RateType";
- our %count;
- while ( $good_url )
- {
- #print '>>1' . $good_url ."\r\n";
- _get_data ( $good_url );
- }
- #生成列表清单并排序
- map { $count{$_}->{sort} = sprintf('%s', $count{$_}->{'num'} ) } keys %count;
- my @marksorted = sort { $count{$b}->{sort} <=> $count{$a}->{sort} } keys %count;
- foreach ( @marksorted )
- {
- $count{$_}->{'num'} = '' unless $conf{num} ;
- $count{$_}->{'tite'} = '' unless $conf{tite} ;
- $count{$_}->{'price'} = '' unless $conf{price} ;
- $count{$_}->{'url'} = '' unless $conf{url} ;
- print sprintf ("%u\t%s\t%s\t%s\r\n",$count{$_}->{'num'},$count{$_}->{'tite'},$count{$_}->{'price'},$count{$_}->{'url'});
- }
- exit;
- sub _get_data
- {
- my $good_url_new = shift || exit;
- #开始访问页面
- my $browser=LWP::UserAgent->new;
- my $tree = new HTML::TreeBuilder;
- my $resp =$browser->get($good_url_new,@ns_headers);
- die $resp->message ,$resp->status_line unless $resp->is_success;
- $tree->parse( $resp->content );
- #获取物品价格
- our @prices;
- foreach my $tmp2 ( $tree->look_down( '_tag' , 'span' , 'class' , 'price' ) )
- {
- push @prices , $tmp2;
- }
- #获取物品名称
- foreach my $tmp1 ( $tree->look_down( '_tag' , 'p' , 'class' , 'exp' ) )
- {
- my $list = $tmp1->look_down('_tag', 'a');
- my $tmp2 = shift @prices;
- my $list2 = $tmp2->look_down('_tag', 'em');
- #$list->dump();
- #$list2->dump();
- count($list->attr('title'),$list->attr('href'),$list2->as_text);
- $tmp1->delete;
- }
-
- #获取总页数
- my $pages = 0;
- foreach ( $tree->look_down( '_tag', 'a',
- sub {
- if ($_[0]->as_HTML('<>&',' ',{}) =~ /\>\;\>\;/ig)
- {$pages = $_[0]->attr('href')}
-
- }
- ) ){};
- $tree = undef;
- #print '>>2' . $pages . "\r\n";
- if ( $pages ) {
- $good_url = $pages;
- }
- else{ $good_url = $pages ;}
- }
- sub count
- {
- my ($tite,$url,$price) = @_;
- my $id = _return_id($url);
- #print $id . "\n";
- if ( exists $count{$id} )
- {
- ++($count{$id}->{'num'}) ;
- }
- else{
- $count{$id} = { 'num' =>1 , 'tite' => $tite , 'url' => $url ,'price' => $price };
- }
- }
- sub _return_id
- {
- my $content = shift;
- #$content =~ /auction1\.taobao\.com\/auction\/snap_detail\.htm\?trade_id=(\d+)&auction_id=([0-9a-z]+)/ig;
- $content =~ /auction_id=([0-9a-z]+)/ig;
- return $1;
- }
复制代码 |
|