m3u8视频下载加解密系列_练手spider_不定时长期此贴更新
本次目标 http://www.qiqi.la/vod-detail-id-46194.html 目的,down魔道祖师,实现 前期分析文件得到以下粗略步骤 1 进入二级页面,找到 <iframe width="100%" height="480" src="https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf" frameborder="0" allowfullscreen=""></iframe> 得到网址 2 访问 https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf 需要带上协议头 Referer: https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36 返回另一个页面response_1 得到文件标题 <title>重新压制魔道祖师 前尘篇02 序章 诛邪(下)福利加长版 高清(480P).qlv</title> 3 在response_1 得到:var main = "/20180710/4671_a5ef5a19/index.m3u8?sign=b0023d8b455da27a4294b38c7815f7b3"; 拼合网页:https://cn2.zuixinbo.com/20180710/4671_a5ef5a19/index.m3u8?sign=b0023d8b455da27a4294b38c7815f7b3 访问:得到返回结果 #EXTM3U #EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=800000,RESOLUTION=1080x608 1000k/hls/index.m3u8 4 拼合 https://cn2.zuixinbo.com/20180710/4671_a5ef5a19/1000k/hls/index.m3u8 带协议访问 Referer: https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36 得到.ts下载文件路径 分段下载 5 拼合.ts文件
有空更新完整代码
2018-10-21
开始编写代码
发现在重新写代码的过程中,发现直接在播放页面就有全部的播放地址,只不过是用usc2的
编码转换了一下,我们需要把其转换成ansi编码
2 OK,这下直接拿到播放地址,做一下格式化的工作,进行第2步解析,上面的第一步工作算是白费了一片心思
3 按照上面步骤依次完成,基本没问题
# -*- coding:utf-8 -*- # @time:2018-10-21 14:43 # @Auther:1043453579@qq.com from urllib.request import Request from urllib.request import urlopen import re,time,os from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor static_url_1 = \'http://www.qiqi.la/vod-detail-id-46194.html\' class A(object): def __init__(self,url,e=15): self.header= {\'user-agent\':\'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36\'} self.path = os.getcwd() self.e = e self.static_url=\'https://cn2.zuixinbo.com\' def num_of_e(self,url_2):#进入二级页面 res = urlopen(Request(url=url_2,headers=self.header)).read() res = res.decode() title = self.take_middle_text(res,\'<title>\',txt_e=\'</title>\') #标题 var_main=re.search(\'var main = "(.*?)";\',res).group(1) #访问下级地址 if not var_main: exit() return {\'var_main\':var_main,\'referer\':url_2,\'标题\':title} def open_3(self,url,referer=\'\',**kwargs): url=self.static_url+url a={\'Referer\': referer} a.update(self.header) res = urlopen(Request(url=url,headers=a)).read() res = res.decode() _=self.take_middle_text(res,\'1080x608\',\'.m3u8\')+\'.m3u8\' #得到ts视频地址 return {\'url\':_.split(),\'regerer1\':url} def open_4(self,url,referer1=\'\',**kwargs): referer=referer1 referer= referer.split(\'/\') referer=referer[0:-1] referer.append(*url) url=\'/\'.join(referer) print(url) a = {\'Referer\': referer1} a.update(self.header) res = urlopen(Request(url=url,headers=a)).read() res = res.decode() ts_list=[] for i in res.split(\'\n\'): try: if i[0]!=\'#\': ts_list.append(i) except:pass return {\'ts_list\':ts_list,\'url\':url} def take_middle_text(self,txt, txt_s, txt_e=\'\', seeks=0, seeke=0): # 取出中间文本,真返回中间文本,假返回False # seeks有传参,会按照取前几位取值 # seeke有传参,会按照取后几位取值 try: if txt_e or seeks or seeke: pass else: raise 1 s_1 = txt.find(txt_s) if s_1 == -1: raise 1 l_1 = len(txt_s) if txt_e: s_2 = txt.find(txt_e) if s_1 == -1 or s_2 == -1: return False return txt[s_1 + l_1:s_2] if seeks: return txt[s_1 - seeks:s_1] if seeke: return txt[s_1 + l_1:s_1 + l_1 + seeke] except: return \'传参错误或未找到传参文本\' def down_ts(self,dict,path_1): url = os.path.dirname(dict[\'url\'])+\'/\' ts_list=dict[\'ts_list\'] for i in ts_list: print(path_1,\'这里是path_1\') path = os.path.join(path_1, i) print(path,\'这里是path_ts文件网址\') if os.path.exists(path): print(\'已存在,跳过\') else: try: res = urlopen(Request(url=url+i,headers=self.header)).read() with open(path,\'wb\') as f: f.write(res) print(\'成功写入一条\') except: print(\'写入失败\') def main(self,url): dict_1 = self.num_of_e(url) #\'这里返回一个字典 \' dict_2 = self.open_3(dict_1[\'var_main\'],dict_1[\'referer\']) dict_3 = self.open_4(dict_2[\'url\'], dict_2[\'regerer1\']) #这里的url未提纯 title = dict_1[\'标题\'] path = os.path.join(self.path,title) #@print(title,\'这里是标题\') if not os.path.exists(path): os.mkdir(path) #没有就创建一个新的目录 self.down_ts(dict_3,path) if __name__ == \'__main__\': ex = ProcessPoolExecutor(2) a_1 = A(static_url_1, 15) with open(\'2.txt\', \'r\', encoding=\'utf8\') as f: for i in f: a = i.split()[0].split(\'$\')[1].split(\'#\')[0] print(ex.submit(a_1.main,a).result()) ex.shutdown()
View Code—第一版,用双进程当作并发,代理未加,隔几天再优化一下,先这样吧
2018-10-30
# -*- coding:utf-8 -*- # @time:2018-10-21 14:43 # @Auther:1043453579@qq.com from urllib.request import Request from urllib.request import urlopen import re,time,os from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor static_url_1 = \'http://www.qiqi.la/vod-detail-id-46194.html\' class A(object): def __init__(self): self.header= {\'user-agent\':\'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36\'} self.path = os.getcwd() #self.static_url=\'https://cn2.zuixinbo.com\' self.static_url = \'\' #self.r = redis.Redis(host=\'127.0.0.1\',port=6379,db=0) def get_proxy(self): return {\'http\':self.r.randomkey().deocode()} def down_1(self,url,referer=\'\',code=True): while True: #proxy = self.get_proxy() #这里设置代理 try: _ = self.header if referer: a = {\'Referer\': referer} _.update(a) res = urlopen(Request(url=url, headers=_),timeout=60).read() if code: res = res.decode() if res: time.sleep(1) return res else: raise 1 except Exception as e: print(\'请求失败\',url) with open(\'3.txt\',\'a+\') as f: f.write(url) f.write(\'\n\') time.sleep(10) def num_of_e(self,url_2):#进入二级页面 res = self.down_1(url_2) title = self.take_middle_text(res,\'<title>\',txt_e=\'</title>\') #标题 var_main=re.search(\'var main = "(.*?)";\',res).group(1) #访问下级地址 if not var_main: exit() return {\'var_main\':var_main,\'referer\':url_2,\'标题\':title} def open_3(self,url,referer=\'\',**kwargs): url=self.static_url+url res = self.down_1(url,referer=referer) _=self.take_middle_text(res,\'1080x608\',\'.m3u8\')+\'.m3u8\' #得到ts视频地址 return {\'url\':_.split(),\'regerer1\':url} def open_4(self,url,referer1=\'\',**kwargs): referer=referer1 referer= referer.split(\'/\') referer=referer[0:-1] referer.append(*url) url=\'/\'.join(referer) print(url) res = self.down_1(url,referer=referer1) ts_list=[] for i in res.split(\'\n\'): try: if i[0]!=\'#\': ts_list.append(i) except:pass return {\'ts_list\':ts_list,\'url\':url} def take_middle_text(self,txt, txt_s, txt_e=\'\', seeks=0, seeke=0): # 取出中间文本,真返回中间文本,假返回False # seeks有传参,会按照取前几位取值 # seeke有传参,会按照取后几位取值 try: if txt_e or seeks or seeke: pass else: raise 1 s_1 = txt.find(txt_s) if s_1 == -1: raise 1 l_1 = len(txt_s) if txt_e: s_2 = txt.find(txt_e) if s_1 == -1 or s_2 == -1: return False return txt[s_1 + l_1:s_2] if seeks: return txt[s_1 - seeks:s_1] if seeke: return txt[s_1 + l_1:s_1 + l_1 + seeke] except: return \'传参错误或未找到传参文本\' def down_ts(self,dict,path_1): url = os.path.dirname(dict[\'url\'])+\'/\' ts_list=dict[\'ts_list\'] for i in ts_list: path = os.path.join(path_1, i) if os.path.exists(path): print(\'已存在,跳过\',i) else: try: res = urlopen(Request(url=url+i,headers=self.header),timeout=60).read() time.sleep(1) if res: with open(path,\'wb\') as f: f.write(res) print(\'成功写入一条\',i) else: raise 1 except Exception as e: with open(\'3.txt\',\'a+\') as f: _ = \'-\'.join([str(i) for i in time.localtime()[0:6]]) f.write(_ +\'###\'+e+\'$$$\'+url) f.write(\'\n\') print(\'写入失败\',i,e) time.sleep(5) def main(self,url): _ = url.split(\'com\') self.static_url=_[0]+\'com\' dict_1 = self.num_of_e(url) #\'这里返回一个字典 \' dict_2 = self.open_3(dict_1[\'var_main\'],dict_1[\'referer\']) dict_3 = self.open_4(dict_2[\'url\'], dict_2[\'regerer1\']) #这里的url未提纯 title = dict_1[\'标题\'] path = os.path.join(self.path,title) #@print(title,\'这里是标题\') if not os.path.exists(path): os.mkdir(path) #没有就创建一个新的目录 self.down_ts(dict_3,path) if __name__ == \'__main__\': ex = ProcessPoolExecutor(3) a_1 = A() with open(\'2.txt\', \'r\', encoding=\'utf8\') as f: for i in f: a = i.split()[0].split(\'$\')[1].split(\'#\')[0] ex.submit(a_1.main,a) ex.shutdown() #BUG在网页的提交网址中
View Code–代理未加,需要的请自行加上代理,稍微优化了一下,里面的2.txt是下载地址,见下面
第01集$https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf# 第02集$https://cn2.zuixinbo.com/share/fbad540b2f3b5638a9be9aa6a4d8e450# 第03集$https://v-xunlei.com/share/c457d7ae48d08a6b84bc0b1b9bd7d474# 第04集$https://v-xunlei.com/share/8db1d4a631a6e9a24e2c0e842e1f1772# 第05集$https://v-xunlei.com/share/197f76fe309657064dbec74d9eea4be4# 第06集$https://v-xunlei.com/share/92b70a527191ca64ca2df1cc32142646# 第07集$https://v-xunlei.com/share/abc99d6b9938aa86d1f30f8ee0fd169f# 第08集$https://v-xunlei.com/share/22cdb13a83f73ccd1f79ffaf607b0621# 第09集$https://v-xunlei.com/share/aceacd5df18526f1d96ee1b9714e95eb# 第10集$https://v-6-cn.com/share/075b051ec3d22dac7b33f788da631fd4# 第11集$https://v-6-cn.com/share/4670c07872d5314c6ad6ffa633d4a059# 第12集$https://v-xunlei.com/share/2bba9f4124283edd644799e0cecd45ca# 第13集$https://v-cntv-cn.com/share/d87aa42cd08ba8612664a73dbdb64221# 第14集$https://v-cntv-cn.com/share/63ceea56ae1563b4477506246829b386# 第15集$https://v-cntv-cn.com/share/e8a69bf65aefc23d0f360ab695e9eac7
View Code–这里是下载地址
2020-05-02
# -*- coding:utf-8 -*- # @time:2018-10-21 14:43 # @Auther:1043453579@qq.com from urllib.request import Request from urllib.request import urlopen import re, os class A(object): def __init__(self): self.header = { \'user-agent\': \'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36\', #\':authority\':\'m3u8.xingc.vip\' } self.path = os.getcwd() # self.static_url=\'https://cn2.zuixinbo.com\' self.static_url = \'\' # self.r = redis.Redis(host=\'127.0.0.1\',port=6379,db=0) def get_proxy(self): return {\'http\': self.r.randomkey().deocode()} def down_1(self, url, referer=\'\', code=True): while True: # proxy = self.get_proxy() # 这里设置代理 try: _ = self.header if referer: a = {\'Referer\': referer} _.update(a) res = urlopen(Request(url=url, headers=_), timeout=5).read() if code: res = res.decode() if res: return res else: raise 1 except Exception as e: print(\'请求失败\', url) def num_of_e(self, url_2): # 进入二级页面 res = self.down_1(url_2) if res: return res else: return None def take_middle_text(self, txt, txt_s, txt_e=\'\', seeks=0, seeke=0): # 取出中间文本,真返回中间文本,假返回False # seeks有传参,会按照取前几位取值 # seeke有传参,会按照取后几位取值 try: if txt_e or seeks or seeke: pass else: raise 1 s_1 = txt.find(txt_s) if s_1 == -1: raise 1 l_1 = len(txt_s) if txt_e: s_2 = txt.find(txt_e) if s_1 == -1 or s_2 == -1: return False return txt[s_1 + l_1:s_2] if seeks: return txt[s_1 - seeks:s_1] if seeke: return txt[s_1 + l_1:s_1 + l_1 + seeke] except: return \'传参错误或未找到传参文本\' def down_ts(self, list_1, path_1,url,temp_int): import requests for i in list_1: path = os.path.join(path_1, i) if os.path.exists(path): print(\'已存在,跳过\', i) else: while True: try: res = requests.get(url=url + i, headers=self.header,timeout=5) if res: with open(path, \'wb\') as f: f.write(res.content) print(\'成功写入一条\', i,temp_int) break except Exception as e: print(\'requests写入失败\',temp_int) def main(self, url,title): _ = url.split(\'/\') _url="/".join(_[0:-1])+"/" dict_1 = self.num_of_e(url) # \'这里返回m3u8的内容\' m3u8_list=re.findall(\'\d+.ts\',dict_1) print(m3u8_list) path = os.path.join(self.path, title) if not os.path.exists(path): os.mkdir(path) # 没有就创建一个新的目录 self.down_ts(m3u8_list, path,_url,len(m3u8_list)) if __name__ == \'__main__\': #ex = ProcessPoolExecutor(3) a_1 = A() a=\'https:***/index.m3u8\' a_1.main(a,\'name\')
m3u8单文件,单线程下载(去代理)
2020-05-22 02:31:31
发现最近两年的视频基本都做了加密措施,综合网上的帖子,没有啥值得使用的地方,于此写下
aes-128加密系列 :男人的小视频梦想
使用工具:winhex
使用库命令:binascii.b2a_hex(“二进文本”)
有一朋友问我,一个小视频APP站做加密,怎么解不出来
m3u8文件上小图,如下:
解密如下:
好的,代码就不发了,自行根据上面的代码进行整改,解密代码如下
import binascii from Crypto.Cipher import AES with open(\'0.ts\',\'rb+\') as f: a = f.read() print(len(a)%16) #取余为0 就加16位,不为0 就加(16-余数) a=a+b\'\00\'*16 #取余为0 就加16位,不为0 就加(16-余数) print(a) cryptos = AES.new(binascii.a2b_hex(\'5a43c7619623bc347fa7dcea3ddfb1b2\'), AES.MODE_CBC,bytes.fromhex(\'3e420d580bd9244dd608850e0dec7ac8\')) c= cryptos.decrypt(a) with open(\'0_2.ts\',\'wb\') as c1: c1.write(c)
View Code--解密AES