- 论坛徽章:
- 0
|
本帖最后由 t6760915 于 2010-05-31 16:44 编辑
- #coding=utf-8
- import urllib
- import urllib2
- import httplib
- import cookielib
- class poster:
-
- httpcookie = ''
- #构造函数
- def __init__(self):
- cookie = cookielib.CookieJar()
- self.httpcookie = urllib2.HTTPCookieProcessor(cookie)
-
- #post数据
- def doPost(self, url, params):
- try:
- params = urllib.urlencode(params)
- req = urllib2.Request(url, params)
- opener = urllib2.build_opener(self.httpcookie)
- fp = opener.open(req)
- return fp.read()
- except:
- return False
- #析构函数
- def __del__(self):
- self.httpcookie = ''
- if __name__=='__main__':
- pObj = poster()
-
- url = 'http://vip.chinalawinfo.com/Newlaw2002/chl/result.asp'
- params = {'PreSearchWhere':'效力级别=%#XA01%', 'ResultID':'1', 'CurrentPage':'5', 'AllPageCount':'30', 'Page':'5', 'PageSize':'40', 'orderby':'2', 'jd':'', 'RdIsSHowMess':'', 'RdIsSHow':''}
-
- newparams = {}
- for key,val in params.items():
- key = key.decode('utf-8', 'ignore').encode('gbk', 'ignore')
- val = val.decode('utf-8', 'ignore').encode('gbk', 'ignore')
- newparams[key] = val
- html = pObj.doPost(url, newparams)
- fp = open('a.html', 'w')
- fp.write(html)
- fp.close()
复制代码 我就晕了,那代码是我给你的.
我稍微改了下抓取的页面截图是这样的...
他的页次是page,我抓的第5页所以就写的5,你可以循环抓,这个变量不断变化就可以了 |
|