- 论坛徽章:
- 0
|
用LWP很容易
用LWP模拟一个浏览器,下面是我曾经写的一个脚本,供参考:
- #!/usr/bin/perl -w
- use strict;
- use LWP::Simple;
- use LWP;
- use HTML::Parse;
- use HTTP::Cookies;
- use Term::ReadKey;
- print "Please enter your username:";
- chop( my $user = <stdin> );
- print "Please enter your Password: (do not display)\n";
- ReadMode 2;
- chop( my $password = <stdin> );
- ReadMode 0;
- my $browser = LWP::UserAgent->new();
- $browser->cookie_jar( {} ); # turn on cookies
- $browser->agent('Mozilla/4.0 (compatible; MSIE 5.12; Mac_PowerPC)')
- ; # simulate popular browser
- my $login_url = 'http://www.gene-regulation.com/login';
- my $first_response = $browser->get($login_url);
- die "$login_url error: ", $first_response->status_line
- unless $first_response->is_success;
- die "Weird content type at $login_url -- ", $first_response->content_type
- unless $first_response->content_type eq 'text/html';
- my $login_response = $browser->post(
- $login_url,
- [
- 'user' => $user,
- 'password' => $password,
- 'request_uri' => '/index.html',
- ],
- );
- die "$login_url error: ", $login_response->status_line
- unless $login_response->is_success;
- die "Weird content type at $login_url -- ", $login_response->content_type
- unless $login_response->content_type eq 'text/html';
- if ( $login_response->content =~ /try again/i ) {
- print "Wrong username or password!\n";
- exit 1;
- }
- else { print "login OK!\n"; }
- open MYFILE, ">>transfac70.html";
- my $url = make_url($_);
- my $response2 = $browser->get($url);
- my $content_wanted;
- if ( $response2->content =~
- /(<pre>.*<\/pre>)/is ) # retain the content between <pre> and </pre>
- {
- $content_wanted = $1;
- }
- else { next; }
- $content_wanted =~ s/<H3.*<\/H3>//sg; # remove the title
- print MYFILE $content_wanted;
- sub make_url {
- my $base_url =
- 'http://www.gene-regulation.com/cgi-bin/pub/databases/transfac/getTF.cgi?AC=';
- if ( $_[0] < 10 ) { return $base_url . 'M0000' . $_[0]; }
- elsif ( $_[0] >= 10 && $_[0] < 100 ) { return $base_url . 'M000' . $_[0]; }
- else { return $base_url . 'M00' . $_[0]; }
- }
复制代码 |
|