- 论坛徽章:
- 0
|
import urllib2
import re
import sys
import string
def get(url1,boo):
request = urllib2.Request(url1)
opener = urllib2.build_opener()
opener.addheaders = [("User-agent","Mozilla/5.0")]
try:
feeddata1 = opener.open(request).read()
boo=True
return feeddata1
except urllib2.HTTPError,e:
print "http erro code" ,e.code
boo=False
return
gethtmldata,抓网页的函数
import urllib2
import re
import sys
import string
import gethtmldata
def get(urlwea):
# print urlwea
bo=True
weadata=gethtmldata.get(urlwea,bo)
if bo==False:
print "getdata erro"
sys.exit()
#request = urllib2.Request(urlwea)
#opener = urllib2.build_opener()
#opener.addheaders = [("User-agent","Mozilla/5.0")]
#weadata = opener.open(request).read()
print "data get ok"
#f=open("data.html","w")
#print f,weadata
#f.close()
deg_matches=re.findall(r"\d+\b°\b",weadata)
wea_matches=re.findall(r"ALT.*TITLE",weadata)
deglen=len(deg_matches)
wealen=len(wea_matches)
ii=0
while iideglen:
# print deg_matches[ii]
stringdeg=string.replace(deg_matches[ii],"°","")
print stringdeg
ii=ii+1
ii=0
while iiwealen:
# print wea_matches[ii]
stringwea=string.replace(wea_matches[ii],'ALT="',"")
stringtemp=string.replace(stringwea,'" TITLE',"")
print stringtemp
ii=ii+1
return
正则表达式解出天气预报函数
用来抓取网页的天气预报并整理,算是自己写的第一个能用的东西
本文来自ChinaUnix博客,如果查看原文请点:http://blog.chinaunix.net/u2/79830/showart_1211487.html |
|