m3u8视频下载加解密系列_练手spider

本次目标  http://www.qiqi.la/vod-detail-id-46194.html
    目的，down魔道祖师，实现
    前期分析文件得到以下粗略步骤
1 进入二级页面，找到  
<iframe width="100%" height="480" src="https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf" frameborder="0" allowfullscreen=""></iframe>
    得到网址
2 访问      https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf
    需要带上协议头
    Referer: https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf
    User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36    
    返回另一个页面response_1
    得到文件标题 <title>重新压制魔道祖师 前尘篇02 序章 诛邪（下）福利加长版 高清(480P).qlv</title>
3 在response_1
    得到：var main = "/20180710/4671_a5ef5a19/index.m3u8?sign=b0023d8b455da27a4294b38c7815f7b3";
    拼合网页：https://cn2.zuixinbo.com/20180710/4671_a5ef5a19/index.m3u8?sign=b0023d8b455da27a4294b38c7815f7b3
    访问：得到返回结果
        #EXTM3U
        #EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=800000,RESOLUTION=1080x608
        1000k/hls/index.m3u8
4 拼合 https://cn2.zuixinbo.com/20180710/4671_a5ef5a19/1000k/hls/index.m3u8
    带协议访问
        Referer: https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf
        User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36
    得到.ts下载文件路径
    分段下载
5 拼合.ts文件

有空更新完整代码
　2018-10-21
　　　　开始编写代码

　　　　　发现在重新写代码的过程中，发现直接在播放页面就有全部的播放地址，只不过是用usc2的
　　　　　　　　编码转换了一下，我们需要把其转换成ansi编码
　　　　2 OK，这下直接拿到播放地址，做一下格式化的工作，进行第2步解析，上面的第一步工作算是白费了一片心思

　　　　3 按照上面步骤依次完成，基本没问题

# -*- coding:utf-8 -*-
# @time:2018-10-21 14:43
# @Auther:1043453579@qq.com

from urllib.request import Request
from urllib.request import urlopen
import re,time,os
from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor

static_url_1 = \'http://www.qiqi.la/vod-detail-id-46194.html\'
class A(object):
    def __init__(self,url,e=15):
        self.header= {\'user-agent\':\'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36\'}
        self.path = os.getcwd()
        self.e = e
        self.static_url=\'https://cn2.zuixinbo.com\'

    def num_of_e(self,url_2):#进入二级页面
        res = urlopen(Request(url=url_2,headers=self.header)).read()
        res = res.decode()

        title = self.take_middle_text(res,\'<title>\',txt_e=\'</title>\') #标题
        var_main=re.search(\'var main = "(.*?)";\',res).group(1) #访问下级地址
        if not var_main:
            exit()
        return {\'var_main\':var_main,\'referer\':url_2,\'标题\':title}


    def open_3(self,url,referer=\'\',**kwargs):
        url=self.static_url+url
        a={\'Referer\': referer}
        a.update(self.header)
        res = urlopen(Request(url=url,headers=a)).read()
        res = res.decode()
        _=self.take_middle_text(res,\'1080x608\',\'.m3u8\')+\'.m3u8\' #得到ts视频地址
        return {\'url\':_.split(),\'regerer1\':url}


    def open_4(self,url,referer1=\'\',**kwargs):
        referer=referer1
        referer= referer.split(\'/\')
        referer=referer[0:-1]
        referer.append(*url)
        url=\'/\'.join(referer)
        print(url)
        a = {\'Referer\': referer1}
        a.update(self.header)
        res = urlopen(Request(url=url,headers=a)).read()
        res = res.decode()
        ts_list=[]
        for i in res.split(\'\n\'):
            try:
                if i[0]!=\'#\':
                    ts_list.append(i)
            except:pass
        return {\'ts_list\':ts_list,\'url\':url}

    def take_middle_text(self,txt, txt_s, txt_e=\'\', seeks=0, seeke=0):
        # 取出中间文本，真返回中间文本，假返回False
        # seeks有传参，会按照取前几位取值
        # seeke有传参，会按照取后几位取值
        try:
            if txt_e or seeks or seeke:
                pass
            else:
                raise 1
            s_1 = txt.find(txt_s)
            if s_1 == -1:
                raise 1
            l_1 = len(txt_s)
            if txt_e:
                s_2 = txt.find(txt_e)
                if s_1 == -1 or s_2 == -1:
                    return False
                return txt[s_1 + l_1:s_2]
            if seeks:
                return txt[s_1 - seeks:s_1]
            if seeke:
                return txt[s_1 + l_1:s_1 + l_1 + seeke]
        except:
            return \'传参错误或未找到传参文本\'

    def down_ts(self,dict,path_1):
        url = os.path.dirname(dict[\'url\'])+\'/\'
        ts_list=dict[\'ts_list\']
        for i in ts_list:
            print(path_1,\'这里是path_1\')
            path = os.path.join(path_1, i)
            print(path,\'这里是path_ts文件网址\')
            if os.path.exists(path):
                print(\'已存在，跳过\')
            else:
                try:
                    res = urlopen(Request(url=url+i,headers=self.header)).read()
                    with open(path,\'wb\') as f:
                        f.write(res)
                    print(\'成功写入一条\')
                except:
                    print(\'写入失败\')

    def main(self,url):
        dict_1 = self.num_of_e(url) #\'这里返回一个字典 \'
        dict_2 = self.open_3(dict_1[\'var_main\'],dict_1[\'referer\'])
        dict_3 = self.open_4(dict_2[\'url\'], dict_2[\'regerer1\']) #这里的url未提纯
        title = dict_1[\'标题\']
        path = os.path.join(self.path,title)
        #@print(title,\'这里是标题\')
        if not os.path.exists(path):
            os.mkdir(path) #没有就创建一个新的目录
        self.down_ts(dict_3,path)


if __name__ == \'__main__\':

    ex = ProcessPoolExecutor(2)
    a_1 = A(static_url_1, 15)
    with open(\'2.txt\', \'r\', encoding=\'utf8\') as f:
        for i in f:
            a = i.split()[0].split(\'$\')[1].split(\'#\')[0]
            print(ex.submit(a_1.main,a).result())
        ex.shutdown()

View Code—第一版，用双进程当作并发，代理未加，隔几天再优化一下，先这样吧

2018-10-30

# -*- coding:utf-8 -*-
# @time:2018-10-21 14:43
# @Auther:1043453579@qq.com

from urllib.request import Request
from urllib.request import urlopen
import re,time,os
from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor

static_url_1 = \'http://www.qiqi.la/vod-detail-id-46194.html\'
class A(object):
    def __init__(self):
        self.header= {\'user-agent\':\'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36\'}
        self.path = os.getcwd()
        #self.static_url=\'https://cn2.zuixinbo.com\'
        self.static_url = \'\'
        #self.r = redis.Redis(host=\'127.0.0.1\',port=6379,db=0)

    def get_proxy(self):
        return {\'http\':self.r.randomkey().deocode()}

    def down_1(self,url,referer=\'\',code=True):
        while True:
            #proxy = self.get_proxy()
            #这里设置代理
            try:
                _ = self.header
                if referer:
                    a = {\'Referer\': referer}
                    _.update(a)
                res = urlopen(Request(url=url, headers=_),timeout=60).read()
                if code:
                    res = res.decode()
                if res:
                    time.sleep(1)
                    return res
                else:
                    raise 1
            except Exception as e:
                print(\'请求失败\',url)
                with open(\'3.txt\',\'a+\') as f:
                    f.write(url)
                    f.write(\'\n\')
                time.sleep(10)
                

    def num_of_e(self,url_2):#进入二级页面
        res = self.down_1(url_2)
        title = self.take_middle_text(res,\'<title>\',txt_e=\'</title>\') #标题
        var_main=re.search(\'var main = "(.*?)";\',res).group(1) #访问下级地址
        if not var_main:
            exit()
        return {\'var_main\':var_main,\'referer\':url_2,\'标题\':title}


    def open_3(self,url,referer=\'\',**kwargs):
        url=self.static_url+url
        res = self.down_1(url,referer=referer)
        _=self.take_middle_text(res,\'1080x608\',\'.m3u8\')+\'.m3u8\' #得到ts视频地址
        return {\'url\':_.split(),\'regerer1\':url}


    def open_4(self,url,referer1=\'\',**kwargs):
        referer=referer1
        referer= referer.split(\'/\')
        referer=referer[0:-1]
        referer.append(*url)
        url=\'/\'.join(referer)
        print(url)
        res = self.down_1(url,referer=referer1)
        ts_list=[]
        for i in res.split(\'\n\'):
            try:
                if i[0]!=\'#\':
                    ts_list.append(i)
            except:pass
        return {\'ts_list\':ts_list,\'url\':url}

    def take_middle_text(self,txt, txt_s, txt_e=\'\', seeks=0, seeke=0):
        # 取出中间文本，真返回中间文本，假返回False
        # seeks有传参，会按照取前几位取值
        # seeke有传参，会按照取后几位取值
        try:
            if txt_e or seeks or seeke:
                pass
            else:
                raise 1
            s_1 = txt.find(txt_s)
            if s_1 == -1:
                raise 1
            l_1 = len(txt_s)
            if txt_e:
                s_2 = txt.find(txt_e)
                if s_1 == -1 or s_2 == -1:
                    return False
                return txt[s_1 + l_1:s_2]
            if seeks:
                return txt[s_1 - seeks:s_1]
            if seeke:
                return txt[s_1 + l_1:s_1 + l_1 + seeke]
        except:
            return \'传参错误或未找到传参文本\'

    def down_ts(self,dict,path_1):
        url = os.path.dirname(dict[\'url\'])+\'/\'
        ts_list=dict[\'ts_list\']
        for i in ts_list:
            path = os.path.join(path_1, i)
            if os.path.exists(path):
                print(\'已存在，跳过\',i)
            else:
                try:
                    res = urlopen(Request(url=url+i,headers=self.header),timeout=60).read()
                    time.sleep(1)
                    if res:
                        with open(path,\'wb\') as f:
                            f.write(res)
                        print(\'成功写入一条\',i)
                    else:
                        raise 1
                except Exception as e:
                    with open(\'3.txt\',\'a+\') as f:
                        _ = \'-\'.join([str(i) for i in time.localtime()[0:6]])
                        f.write(_ +\'###\'+e+\'$$$\'+url)
                        f.write(\'\n\')
                        print(\'写入失败\',i,e)
        time.sleep(5)

    def main(self,url):
        _ = url.split(\'com\')
        self.static_url=_[0]+\'com\'
        dict_1 = self.num_of_e(url) #\'这里返回一个字典 \'
        dict_2 = self.open_3(dict_1[\'var_main\'],dict_1[\'referer\'])
        dict_3 = self.open_4(dict_2[\'url\'], dict_2[\'regerer1\']) #这里的url未提纯
        title = dict_1[\'标题\']
        path = os.path.join(self.path,title)
        #@print(title,\'这里是标题\')
        if not os.path.exists(path):
            os.mkdir(path) #没有就创建一个新的目录
        self.down_ts(dict_3,path)

if __name__ == \'__main__\':
    ex = ProcessPoolExecutor(3)
    a_1 = A()
    with open(\'2.txt\', \'r\', encoding=\'utf8\') as f:
        for i in f:
            a = i.split()[0].split(\'$\')[1].split(\'#\')[0]
            ex.submit(a_1.main,a)
        ex.shutdown()

#BUG在网页的提交网址中

View Code–代理未加，需要的请自行加上代理，稍微优化了一下，里面的2.txt是下载地址，见下面

第01集$https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf#
第02集$https://cn2.zuixinbo.com/share/fbad540b2f3b5638a9be9aa6a4d8e450#
第03集$https://v-xunlei.com/share/c457d7ae48d08a6b84bc0b1b9bd7d474#
第04集$https://v-xunlei.com/share/8db1d4a631a6e9a24e2c0e842e1f1772#
第05集$https://v-xunlei.com/share/197f76fe309657064dbec74d9eea4be4#
第06集$https://v-xunlei.com/share/92b70a527191ca64ca2df1cc32142646#
第07集$https://v-xunlei.com/share/abc99d6b9938aa86d1f30f8ee0fd169f#
第08集$https://v-xunlei.com/share/22cdb13a83f73ccd1f79ffaf607b0621#
第09集$https://v-xunlei.com/share/aceacd5df18526f1d96ee1b9714e95eb#
第10集$https://v-6-cn.com/share/075b051ec3d22dac7b33f788da631fd4#
第11集$https://v-6-cn.com/share/4670c07872d5314c6ad6ffa633d4a059#
第12集$https://v-xunlei.com/share/2bba9f4124283edd644799e0cecd45ca#
第13集$https://v-cntv-cn.com/share/d87aa42cd08ba8612664a73dbdb64221#
第14集$https://v-cntv-cn.com/share/63ceea56ae1563b4477506246829b386#
第15集$https://v-cntv-cn.com/share/e8a69bf65aefc23d0f360ab695e9eac7

View Code–这里是下载地址

2020-05-02

# -*- coding:utf-8 -*-
# @time:2018-10-21 14:43
# @Auther:1043453579@qq.com

from urllib.request import Request
from urllib.request import urlopen
import re,  os

class A(object):
    def __init__(self):
        self.header = {
            \'user-agent\': \'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36\',
            #\':authority\':\'m3u8.xingc.vip\'
        }
        self.path = os.getcwd()
        # self.static_url=\'https://cn2.zuixinbo.com\'
        self.static_url = \'\'
        # self.r = redis.Redis(host=\'127.0.0.1\',port=6379,db=0)

    def get_proxy(self):
        return {\'http\': self.r.randomkey().deocode()}

    def down_1(self, url, referer=\'\', code=True):
        while True:
            # proxy = self.get_proxy()
            # 这里设置代理
            try:
                _ = self.header
                if referer:
                    a = {\'Referer\': referer}
                    _.update(a)
                res = urlopen(Request(url=url, headers=_), timeout=5).read()
                if code:
                    res = res.decode()
                if res:
                    return res
                else:
                    raise 1
            except Exception as e:
                print(\'请求失败\', url)


    def num_of_e(self, url_2):  # 进入二级页面
        res = self.down_1(url_2)
        if res:
            return res
        else:
            return None



    def take_middle_text(self, txt, txt_s, txt_e=\'\', seeks=0, seeke=0):
        # 取出中间文本，真返回中间文本，假返回False
        # seeks有传参，会按照取前几位取值
        # seeke有传参，会按照取后几位取值
        try:
            if txt_e or seeks or seeke:
                pass
            else:
                raise 1
            s_1 = txt.find(txt_s)
            if s_1 == -1:
                raise 1
            l_1 = len(txt_s)
            if txt_e:
                s_2 = txt.find(txt_e)
                if s_1 == -1 or s_2 == -1:
                    return False
                return txt[s_1 + l_1:s_2]
            if seeks:
                return txt[s_1 - seeks:s_1]
            if seeke:
                return txt[s_1 + l_1:s_1 + l_1 + seeke]
        except:
            return \'传参错误或未找到传参文本\'

    def down_ts(self, list_1, path_1,url,temp_int):
        import requests
        for i in list_1:
            path = os.path.join(path_1, i)
            if os.path.exists(path):
                print(\'已存在，跳过\', i)
            else:
                while True:
                    try:
                        res = requests.get(url=url + i, headers=self.header,timeout=5)
                        if res:
                            with open(path, \'wb\') as f:
                                f.write(res.content)
                            print(\'成功写入一条\', i,temp_int)
                            break
                    except Exception as e:
                        print(\'requests写入失败\',temp_int)


    def main(self, url,title):
        _ = url.split(\'/\')
        _url="/".join(_[0:-1])+"/"
        dict_1 = self.num_of_e(url)  # \'这里返回m3u8的内容\'
        m3u8_list=re.findall(\'\d+.ts\',dict_1)
        print(m3u8_list)
        path = os.path.join(self.path, title)
        if not os.path.exists(path):
            os.mkdir(path)  # 没有就创建一个新的目录
        self.down_ts(m3u8_list, path,_url,len(m3u8_list))

if __name__ == \'__main__\':
    #ex = ProcessPoolExecutor(3)
    a_1 = A()
    a=\'https:***/index.m3u8\'
    a_1.main(a,\'name\')

m3u8单文件，单线程下载（去代理）

2020-05-22 02:31:31

发现最近两年的视频基本都做了加密措施，综合网上的帖子，没有啥值得使用的地方，于此写下

aes-128加密系列：男人的小视频梦想

使用工具：winhex

使用库命令：binascii.b2a_hex(“二进文本”)

有一朋友问我，一个小视频APP站做加密，怎么解不出来

m3u8文件上小图，如下：

解密如下：

好的，代码就不发了，自行根据上面的代码进行整改，解密代码如下

import binascii
from Crypto.Cipher import AES
with open(\'0.ts\',\'rb+\') as f:
    a = f.read()
    print(len(a)%16) #取余为0 就加16位，不为0 就加(16-余数)
    a=a+b\'\00\'*16 #取余为0 就加16位，不为0 就加(16-余数)
    print(a)
    cryptos = AES.new(binascii.a2b_hex(\'5a43c7619623bc347fa7dcea3ddfb1b2\'), AES.MODE_CBC,bytes.fromhex(\'3e420d580bd9244dd608850e0dec7ac8\'))
    c= cryptos.decrypt(a)
    with open(\'0_2.ts\',\'wb\') as c1:
        c1.write(c)

View Code－－解密AES

本文链接：https://www.cnblogs.com/Skyda/archive/2004/01/13/9823607.html

m3u8视频下载加解密系列_练手spider_不定时长期此贴更新

m3u8视频下载加解密系列_练手spider_不定时长期此贴更新的更多相关文章

随机推荐

热门专题

目录导航