- 论坛徽章:
- 0
|
import urllib2
import cookielib
import re
jar = cookielib.FileCookieJar("cookies")
results_web = "http://www.amazon.com/s/qid=1246505576/ref=sr_pg_2?ie=UTF8&rs=1000&rh=n%3A!1000%2Ci%3Astripbooks&page=2"
print results_web
request_web = urllib2.Request(results_web)
request_web.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1) Gecko/20090624 Firefox/3.5 (.NET CLR 3.5.30729)')
opener_web = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar))
text = opener_web.open(request_web).read()
print re.findall(r'class="productTitle"',text) |
|