- 论坛徽章:
- 0
|
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2006 UbuntuChina
# License: GPLv2
# Author: oneleaf
# hack by ct
import httplib
import re
import urllib
import os
import locale
global m,topid
global fsize
fsize=1 #文件大小下限(M)
allowext=['.mp3','.wma'] #允许的扩展名
topid='0'
if topid=='0':
topid='/list/newhits.html' #新歌100
elif topid=='1':
topid='/topso/mp3topsong.html' #Top500
elif topid=='2':
topid='/list/oldsong.html' #老歌经典
elif topid=='3':
topid='/list/movies.html' #电影金曲
elif topid=='4':
topid='/list/tvs.html' #电视歌曲
elif topid=='5':
topid='/minge/mp3topsong.html' #民歌精选
elif topid=='6':
topid='/xiaoyuan/mp3topsong.html' #校园歌曲
elif topid=='7':
topid='/list/liujinsuiyue.html' #流金岁月(new)
elif topid=='8':
topid='/list/yaogun.html' #摇滚地带
def getdownfileurl(url): #获取歌曲页的试听URL
url = "http://220.181.38.82/m"+url
count = url.index('" ');
url = url[:count]
tn = re.search('&tn=(.*)&word',url).group(0)
url=url.replace(tn,'&tn=baidusg,mp3%20%20&word')
print u"正在处理",url
try:
urlopen = urllib.URLopener()
fp=urlopen.open(url)
data = fp.read()
fp.close()
except IOError, errmsg:
print errmsg
expression2='"_blank">(.*)'
url = re.search(expression2, data).group(0)[16:-9]
try:
url="http://"+urllib.quote(url)
except:pass
print u"发现 "+url
return url
def getdownurl(url): #从歌曲页抓取URL列表
urllist=[]
urllist1=[]
urllist2=[]
conn = httplib.HTTPConnection('mp3.baidu.com')
conn.request("GET",url)
response = conn.getresponse()
html=response.read()
conn.close()
expression2='http://220.181.38.82/m(.*)target'
listSentence2 = re.findall(expression2, html) #抓取链接列表
filesize=re.findall('(.*)M',html) #抓取文件大小
lineno=0
while linenofloat(fsize) : #大小符合则下载
urlopen = urllib.URLopener()
fp=urlopen.open(urllists[lineno-1][0])
data = fp.read()
fp.close()
filename=filename+ext;
file=open(filename,'w+b')
file.write(data)
file.close()
print u"下载成功!"
return 1
elif float(urllists[lineno][1])= 0: #忽略
print u"%s 文件已经下载,忽略。"%filename
return 1
print u'获取文件列表'
urllists=getdownurl(url) #获取文件url列表
lineno=0
print u"获得",len(urllists),"个下载地址"
while linenofloat(fsize) : #大小符合则下载
savefilename=filename+ext;
if os.spawnlp(os.P_WAIT,'axel','-q','-n 20','-o '+savefilename,urllists[lineno-1][0])==0:
print u"下载成功!"
return 1
elif float(urllists[lineno][1])(.*).'
expression2='>'
expression3='href="http://mp3.baidu.com/m(.*)'
listSentence1 = re.findall(expression1, html) #编号特征
listSentence2 = re.findall(expression2, html) #歌曲名特征
listSentence3 = re.findall(expression3, html) #歌手名特征
lineno=0
while lineno(.*)',listSentence2[lineno])
name=name.group(0)[6:]
dirty=re.search('/',listSentence3[lineno])
if dirty is not None : #合唱
author1=re.search('>(.*)/',listSentence3[lineno])
author2=re.search('>(.*)(.*)</',listSentence3[lineno])
author=author.group(0)[6:-2]
name=name.strip()
author=author.strip()
print u"开始下载",idno,name,author,u"来自",url
filelist=os.listdir('.');
if axeldownmp3(url,author,name,filelist)==0: #判断失败
print u"下载",author,name,u'失败!'
lineno+=1
本文来自ChinaUnix博客,如果查看原文请点:http://blog.chinaunix.net/u3/92994/showart_1880431.html |
|