- 论坛徽章:
- 0
|
python 得到指定网页中的图片,使用urllib
#!/usr/lib/python
# getimg.py
import sys,os
from sgmllib import SGMLParser
class URLLister(SGMLParser):
def reset(self):
SGMLParser.reset(self)
self.urls = []
def start_img(self, attrs):
src = [v for k, v in attrs if k=='src']
if src:
self.urls.extend(src)
imgdir = "/home/jim/pic/"
def ImgDownload(inputurl, img):
# judge whether the img have 'http://' or 'https://'
ret = inputurl.find('http', 0, len(img))
if 'http' not in img:
imgurl = inputurl+img
else:
imgurl = img
imgname = imgurl.split('/')[-1]
imgpath = imgdir+imgname
try:
if os.path.exists(imgpath):
print imgpath+" have exist, Needn't to download"
else:
urllib.urlretrieve(imgurl, imgpath)
print imgname+" has save to: "+imgpath
except:
print "Picture("+imgname+") which come from "+inputurl+" saved failed"
if __name__ == "__main__":
import urllib
while True:
inputurl = raw_input("\nInput URL: ")
if cmp(inputurl, 'quit') == 0:
break
ret = inputurl.find('http', 0, len(inputurl))
if ret == -1:
inputurl = "
http://"+inputurl
usock = urllib.urlopen(inputurl)
parser = URLLister()
parser.feed(usock.read())
usock.close()
parser.close()
if not parser.urls:
print "This page has not picture"
else:
for img in parser.urls:
ImgDownload(inputurl, img)
本文来自ChinaUnix博客,如果查看原文请点:http://blog.chinaunix.net/u1/41982/showart_1902693.html |
|