清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
#encoding: UTF-8
"""
音悦台mv批量下载
2015-02-11
bc523@qq.com
"""
import urllib2
import urllib
import re
import sys
import os
import time
class Yinyuetai():
"""
构造函数
@param url mv 列表地址
"""
def __init__(self, url):
self.i = 1
self.url = url
self.headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
}
self.timeout = 30
self.__init()
#end def
def __init(self,page=1):
print u"开始下载:第 %d 页 ..." % page
reurl = self.url + "&page=%d" % page
#print reurl
mvPageList = self.__getMvPageList(reurl)
if len(mvPageList) > 0:
for plist in mvPageList:
mvlist = self.getMvUrl(plist)
self.__download(mvlist[0],mvlist[1].decode("utf-8"))
self.i += 1
time.sleep(2)
page += 1
self.__init(page)
else:
print u"\n~~~~~~~~~~~~完成!~~~~~~~~~~~~"
#end def
"""
分析列表页
return 返回MV地址和名字列表[0]:视频ID[1]:视频名称
"""
def __getMvPageList(self,url):
try:
request = urllib2.Request(url, None, self.headers)
response = urllib2.urlopen(request, None, self.timeout)
responseHtml = response.read()
reg = r"<h3><a\shref=\"http:\/\/v.yinyuetai.com\/video\/([0-9]+)\".*title=\"(.*)\".*"
pattern=re.compile(reg)
findList = re.findall(pattern,responseHtml)
return findList
except:
return []
#end def
"""
读取视频列表
@param mvlist 页面视频ID和名字列表
return 返回视频地址(第一个地址)(如果有3个地址,则返回最后一个地址(高清))
"""
def getMvUrl(self,mvlist):
url = "http://www.yinyuetai.com/insite/get-video-info?flex=true&videoId=%d" % int(mvlist[0])
try:
req = urllib2.Request(url, None, self.headers)
res = urllib2.urlopen(req,None, self.timeout)
html = res.read()
reg = r"http://\w*?\.yinyuetai\.com/uploads/videos/common/.*?(?=&br)"
pattern=re.compile(reg)
findList = re.findall(pattern,html)
if len(findList) >= 3:
return [findList[2],mvlist[1]]
else:
return [findList[0],mvlist[1]]
except:
print u" 读取视频列表失败!\n"
#end def
"""
下载文件
@param url 视频地址
@param name 视频名称
"""
def __download(self,url,name):
name = name + '.flv'
print u" 下载:[%s] [%d]" % (name,self.i)
local = self.__createDir()+'/'+name
try:
urllib.urlretrieve(url,local,self.__schedule)
print u" 下载完成:[%s]\n" % name
except:
print u" 下载失败!\n"
"""
检查文件保存路径是否存在,不存在则创建
return 文件保存路径
"""
def __createDir(self):
path = sys.path[0]
new_path = os.path.join(path,'flv')
if not os.path.isdir(new_path):
os.mkdir(new_path)
return new_path
#end def
"""
回调函数获取进度
@ a 已经下载的数据块
@ b 数据块的大小
@ c 远程文件的大小
"""
def __schedule(self,a,b,c):
per = 100.0 * a * b / c
if per > 100 : per = 100
sys.stdout.write(u" 进度:%.1f%%\r" % per)
sys.stdout.flush()
#end def
#end class
if __name__ == '__main__':
url = 'http://mv.yinyuetai.com/all?pageType=page&sort=weekViews&tab=allmv&parenttab=mv'
Yinyuetai(url)