- 论坛徽章:
- 0
|
#coding=gb2312
#抓取sina读书频道小说
import re
import urllib as ub
booklist = [40438,27128,27204,'浴火凤凰']
titlePre = "(.*?)"
contentsPre = "(.*?)"
start = booklist[1]
end = booklist[2]+1
for i in range(start,end):
url = 'http://vip.book.sina.com.cn/book/chapter_%d_%d.html' % (booklist[0],i)
ufh = ub.urlopen(url)
cont = ufh.read()
title = re.findall(titlePre,cont)
contents = re.findall(contentsPre,cont)
fh = open(booklist[3]+'.txt','a')
wcs =title[0]+"\r\n\n"+contents[0]+"\r\n\n"
fh.write(wcs)
fh.close()
本文来自ChinaUnix博客,如果查看原文请点:http://blog.chinaunix.net/u3/94441/showart_2041891.html |
|