大家好,又见面了,我是你们的朋友全栈君。
需要获取的页面:
参考了此处,做了修改,代码如下:
代码语言:javascript代码运行次数:0运行复制 1 #coding:utf-8 2 import urllib2 3 import urllib 4 import re 5 import sys 6 import os 7 import time 8 9 10 class Yinyuetai(): 11 12 #地址初始化 13 def __init__(self, url): 14 self.i = 1 15 self.url = url 16 self.headers = { 17 'User-Agent':'Mozilla/5.0 (Windows NT 6.3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36', 18 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' 19 } 20 self.timeout = 30 21 self.__init() 22 23 24 def __init(self, page=1): 25 print u"开始下载:第 %d 页 ..." % page 26 reurl = self.url + "&page=%d" %page 27 page = self.getPage(reurl) 28 mvPageList = self.__getMvPageList(page) 29 if len(mvPageList) > 0: 30 for plist in mvPageList: 31 mvlist = self.getMvURL(plist) 32 self.downLoad(mvlist[0], mvlist[1].decode("utf-8")) 33 self.i += 1 34 time.sleep(2) 35 page += 1 36 self.__init(page) 37 else: 38 print u"\n~~~~~~~~~~~完成!~~~~~~~~~~~~~~" 39 40 41 42 #获取指定页面源码 43 def getPage(self, url): 44 try: 45 request = urllib2.Request(url, None, self.headers) 46 response = urllib2.urlopen(request, None, self.timeout) 47 return response.read() 48 except: 49 return [] 50 51 #分析列表页,返回MV地址和名字列表[0]:视频ID[1]:视频名称 52 def __getMvPageList(self, page): 53 reg = r"