- 论坛徽章:
- 0
|
#!/usr/bin/env python
#-*- encoding: u8 -*-
import sys,re,urllib
url = str("http://www.baidu.com/s?tn=baiduhome_pg&ie=utf-8&bs=inurl%3Aaction&f=8&rsv_bp=1&rsv_spt=1&wd=%E6%95%99%E5%AD%A6&rsv_sug3=4&rsv_sug=0&rsv_sug1=4&rsv_sug4=75&inputT=4535")
oper = urllib.urlopen(url).read().replace(""," ").decode('utf-8')
urls = re.findall(r"<a href=.*?</a>",oper,re.S)
print oper
D:\script>python test.py
Traceback (most recent call last):
File "test.py", line 5, in <module>
oper = urllib.urlopen(url).read().replace(""," ").decode('utf-8')
File "C:\Python27\lib\encodings\utf_8.py", line 16, in decode
return codecs.utf_8_decode(input, errors, True)
UnicodeDecodeError: 'utf8' codec can't decode byte 0xe7 in position 341: invalid continuation byte
这里也提示错了 但是不知道怎么解决编码问题 |
|