python协程

import time
def func():
    print('i love dawn')
    time.sleep(3)#让当前线程处于阻塞状态，cpu暂时不为此工作
    print('i love dawn true')
if __name__ == '__main__':
    func()
# input(）程序也是处于阻塞状态
#requests.get(url)在网络请求返回数据之前，程序也会处于阻塞状态
# 一般情况下，当程序处于IO操作时，线程都会处于阻塞状态，IO输入和输出
#协程：当程序遇见了IO操作的时候。可以选择性的切换到其他任务上，
# 在微观上是一个任务一个任务的进行切换。切换条件一般就是IO操作
# 在宏观上,我们能看到的其实是多个任务一起在执行
# 多任务异步操作
#上方所讲的一切，都是在单线程的条件下

#python编写协程的程序
import asyncio,time
'''async def func():
    print('i am saliy')
if __name__ =='__main__':
    g=func()#此时的函数是异步协程函数，此时函数执行得到的是一个携程对象
 #   print(g)
    asyncio.run(g)#协程程序需要asynicio模块的支持
'''
#多任务异步操作的一种写法，但一般用另一种
async def func1():
    print('i am saliy')
   #time.sleep(3)#当程序出现同步操作时，异步就中断了,requests.get()
    await  asyncio.sleep(3)#异步操作的代码,await挂起，挂起态是将进程映像调到外存去
    print('i am saliy')
async def func2():
    print('i am xiaoxueseh')
#    time.sleep(2)
    await  asyncio.sleep(2)
    print('i am xiaoxuesen')

async def func3():
    print('i am pkq')
 #   time.sleep(4)

    await  asyncio.sleep(4)
    print('i am pkq')
if __name__ =='__main__':
    f1=func1()
    f2=func2()
    f3=func3()
    tasks={f1,f2,f3}
    #一次性启动多个任务(协程)
    t1=time.time()

    asyncio.run(asyncio.wait(tasks))
    t2=time.time()
    print(t2-t1)


import asyncio,time

'''
#官方希望的写法
async def func1():
    print('i am saliy')
   #time.sleep(3)#当程序出现同步操作时，异步就中断了,requests.get()
    await  asyncio.sleep(3)#异步操作的代码,await挂起，挂起态是将进程映像调到外存去
    print('i am saliy')
async def func2():
    print('i am xiaoxueseh')
#    time.sleep(2)
    await  asyncio.sleep(2)
    print('i am xiaoxuesen')

async def func3():
    print('i am pkq')
 #   time.sleep(4)

    await  asyncio.sleep(4)
    print('i am pkq')
async  def main():
第一种写法
        f1=func1()
        await f1#await必须在函数中，一般await挂起操作放在协程对象前面

    # 第二种写法（推荐）

tasks={asyncio.create_task(func1()),#过时警告，原本自动包装，但现在不行了，需要先包装为task对象，py3. 8以后加asyncio.create_task()

       asyncio.create_task(func2()),
       asyncio.create_task(func3())}
await asyncio.wait(tasks)



if __name__ =='__main__':
    t1 = time.time()
    asyncio.run(main())
    t2 = time.time()
    print(t2 - t1)
        '''
#在爬虫领域的 应用
async def download(url):
    print('准备开始下载')
    await asyncio.sleep(2)#网络请求,需要await
    print('end')
async def main():
    urls=[
            ' https: // www.bilibili.com'
            ,'https://www.baidu.com/'
    ]
    tasks=[]
    for url in urls:
        d=asyncio.create_task(download(url))

        tasks.append(d)
        await asyncio.wait(tasks)
if __name__ =='__main__':
    asyncio.run(main())

#requests.get()同步的代码变为异步操作
#pip install aiohttp
import requests,asyncio,aiohttp
urls=['https://browser9.qhimg.com/bdr/__85/t01753453b660de14e9.jpg','https://browser9.qhimg.com/bdr/__85/t010448c46c1ecf7cab.jpg',
     'https://browser9.qhimg.com/bdr/__85/t01cd97ec806b712059.jpg']
async  def aiodownload(url):
    name=url.rsplit("/",1)[1]#rsplit从右往左切
    print(name)
        # #发送请求
        #得到内容
        #保存图片
        #s=aiohttp.ClientSession()相当于requests.session
        #s.get()  .post()
    async  with aiohttp.ClientSession() as session:#+with在是用完之后，不用填session.close(),会自动关闭
            async with session.get(url) as resp:
                with open(name,mode='wb') as f: #请求回来了，写入文件
                    f.write(await resp.content.read())#异步操作读取内容时要挂起，读取内容是异步的
    print(name,'ok')
                #resp.content.read()#等价于resp.contend
                #resp.text(),原来用resp.text
                #resp.json(),原来resp.json()

async  def main():
    tasks=[]
    for url in urls:
        tasks.append(asyncio.create_task(aiodownload(url)))
        await asyncio.wait(tasks)
if __name__=='__main__':
    asyncio.run(main())

#抓西游记
#http://dushu.baidu.com/api/pc/getCatalog?data={"book_id":"4306063500"}所有章节的内容{名称，cid}
#http://dushu.baidu.com/api/pc/getChapterContent?data={"book_id":"4306063500","cid":"4306063500|1569782244","need_bookinfo":1}小说具体内容
import requests,asyncio,aiohttp,json,aiofiles
'''
同步操作：访问getCatalog拿到所有章节的cid和名称
异步操作，访问getChapterContent下载所有章节内容
'''
async  def aiodownload(cid,b_id,title):
    data={
        "book_id": b_id,
        "cid": f"{b_id}|{cid}",
        "need_bookinfo": 1
    }
    data=json.dumps(data)
    url=f"http://dushu.baidu.com/api/pc/getChapterContent?data={data}"
    async with aiohttp.ClientSession() as session:
            async with session.get(url) as resp:
                dic=await  resp.json()
                async  with aiofiles.open(f"{title}.txt",mode='w',encoding='utf-8') as f:
                    await f.write(dic['data']['novel']['content'])



async def getCatalog(url):
    resp=requests.get(url)

    dic=resp.json()
    tasks=[]
    for item in dic['data']['novel']['items']:#item对应每一个章节的名称和cid
        title=item['title']
        cid=item['cid']
        #准备异步任务
        tasks.append(asyncio.create_task(aiodownload(cid,b_id,title)))
    await asyncio.wait(tasks)

if __name__=="__main__":
    b_id='4306063500'
    url='http://dushu.baidu.com/api/pc/getCatalog?data={"book_id":"'+b_id+'"}'
    asyncio.run(getCatalog(url))

#一般的视频网站怎么做？
#用户上传-转码(变为清晰度不同。2k，1080等几份)-切片处理(把单个文件进行拆分)
#用户按照切片拉动进度条
#需要一个文件记录：1视频播放顺序2视频存放路径
#现在一般放在M3U文件中，按照utf8文件编码后，叫m3u8
#抓取视频
    #1找到m3u8(各种手段)
    #2通过m3u8下载到ts文件
    #可以通过各种手段把ts合并为一个MP4
    #https://m3api.awenhao.com/index.php?note=kkRgkazh3snb1fjptmd89&raw=1&n.m3u8
    #https://m3api.awenhao.com/index.php?note=kkRh4231gzj79dqmpcaer&raw=1&n.m3u8
#流程
    #1．拿到页面源代码
    # 2．从源代码中提取到m3u8的urL
    # 3．下载m3u8
    # 4 读取m3u8文件，下载视频
    # 5．合并视频
import requests,re
'''obj=re.compile(r"url: '(?P<url>.*?)',",re.S)#用来提取m3u8地址
url='http://www.91kanju2.com/vod-play/57212-1-1.html'
resp=requests.get(url)
if __name__ == '__main__':
    m3u8_url=obj.search(resp.text).group('url')#拿到m3u8地址
    print(m3u8_url)
    #下载m3u8
    resp2=requests.get(m3u8_url)
    with open('觉醒年代.m3u8',mode='wb') as f:
        f.write(resp2.content)
    resp2.close()
    print('ok')'''
#解析m3u8文件
n=1

with open('觉醒年代.m3u8', mode='r',encoding='utf-8') as f:
    for line in f:
        line=line.strip()#先去掉空格空白换行符
        if line.startswith('#'):#如果以#开头，丢掉
            continue

        #下载视频片段
        resp3=requests.get(line)
        f=open(f'video/{n}.ts',mode='wb')
        f.write(resp3.content)
        f.close()
        resp3.close()
        n+=1
        print('over1'
        )

'''检查页面源代码，发现没有m3u8文件，
发现网页是内嵌一个网页，在iframe框架中找到一个网址
进入网址，f12在sources中找m3u8文件
下载第一层m3u8文件， 里面存有第二层m3u8的url，下载第二层m3u8（视频存放路径）
下载视频，发现里面有加密，下载秘钥，进行解密
合并ts文件唯一个mp4


'''
import requests,re,asyncio,aiohttp,aiofiles,re,os
#from bs4 import BeautifulSoup
from Crypto.Cipher import AES
#def getiframe(url):
 #   resp=requests.get(url)
  #  main_page=BeautifulSoup(resp.text,'html.parser')
   # src=main_page.find('iframe').get('src')
   # return src
def get_first_m3u8(url):
    resp=requests.get(url)

    obj=re.compile(r"url:'(?P<m3u8_url>.*?)',urllist:{'2",re.S)
    m3u8_url=obj.search(resp.text).group('m3u8_url')
    return m3u8_url

def download_m3u8_file(url,name):
    resp=requests.get(url)
    with open(name,'wb') as f:
        f.write(resp.content)
async def download_ts(url,name,session):
    async with session.get(url) as resp:
        async with aiofiles.open(f'video2/{name}','wb' ) as f:
            await f.write(await resp.content.read())#把下载到的内容写入到文件中
    print('下载完毕')
async def aio_download():#此处听老师讲，但网址不同，导致这里不用参数
    tasks=[]
    async with aiohttp.ClientSession() as session:#提前准备好session
        async  with aiofiles.open('镜双城1_second_m3u8.txt',mode='r',encoding='utf-8') as f:
            async for line in f:
                if line.startswith('#'):
                    continue
                line=line.strip()#去掉没用的换行
                line_name=line.rsplit('hls/')[1]


                ts_url=line
                task=asyncio.create_task(download_ts(ts_url,line_name,session))#创建任务
                tasks.append(task)
            await asyncio.wait(tasks)#等待任务结束
'''def get_key(url):
    resp=requests.get(url)
    return resp
async def dec_ts(name,key):
    aes=AES.new(key=key,IV=b'0000000000000000',mode=AES.MODE_CBC)#key多长，iv偏移量就多长
    async with aiofiles.open(f'video2/{name}','rb') as f1,\
        aiofiles.open(f'video2/temp_{name}','wb') as f2:
        bs=await f1.read()#从原文件读取内容
        await f2.write(aes.decrypt(bs))#把解密好的内容写入文件
    print('ok')
async def aio_dec(key):
    #解密
    tasks=[]
    async  with aiofiles.open('镜双城1_second_m3u8.txt', mode='r', encoding='utf-8') as f:
        async for line in f:
            if line.startswith('#'):
                continue
            line = line.strip()  # 去掉没用的换行
            line_name = line.rsplit('hls/')[1]
            #开始创建异步任务
            task=asyncio.create_task(dec_ts(line,key))
            tasks.append(task)
        await  asyncio.wait(tasks)'''
def merge_ts():
    lst=[]
    with open('镜双城1_second_m3u8.txt', mode='r', encoding='utf-8') as f:
        for line in f:
            if line.startswith('#'):
                continue
            line = line.strip()  # 去掉没用的换行
            line_name = line.rsplit('hls/')[1]
            lst.append(f'video2/{line_name}')
    s='+'.join(lst)
    os.system(f'copy \b {line_name} movie.mp4')#os模块这里不会，出错了，task任务最大是1350个，后面报错了

def main(url):
    #拿到iframe对应的url，但那个源已经没了这一步抄代码理解即可
    #iframesrc=getiframe(url)
    #拿到第一层m3u8文件的下载地址
    first_m3u8=get_first_m3u8(url)#此处直接用片库的url因为片库这个剧没有iframe
#此处视频得到的地址需要与域名拼起来，否则不完整，不过我用的是另一个网址，不需要，此处拿到域名
#iframe_domain=iframe_src.split('/share')[0]#拿到iframe域名
#first_m3u8_url=iframe_domain+first_m3u8拼接处真正m3u8地址
    download_m3u8_file(first_m3u8,'镜双城1_first_m3u8.txt')
#下载第二层m3u8
    with open('镜双城1_first_m3u8.txt',mode='r',encoding='utf-8') as f:
        for line in f:
            if line.startswith("#"):
                continue
            else:
                line=line.strip()#去掉空行,对应/20220117/bDEFx4os/1100kb/hls/index.m3u8
            #https://new.iskcd.com/20220117/bDEFx4os/1100kb/hls/index.m3u8
            #https://new.iskcd.com/20220117/bDEFx4os/index.m3u8
                #准备拼接第二层m3u8的下载路径
                #https://new.iskcd.com+/20220117/bDEFx4os/1100kb/hls/index.m3u8
                second_m3u8=first_m3u8.split("/20220117")[0]+line
                download_m3u8_file(second_m3u8,'镜双城1_second_m3u8.txt')

    # 视频此处m3u8文件内ts是相对路径，需要与网址合并，但此处我不必须要合并，直接得到完整网址
    # 下载视频
    second_m3u8_url_up=second_m3u8.replace('index.m3u8','')
    #异步协程
    asyncio.run(aio_download())
    #拿到秘钥，但我找的没有
'''   key_url=second_m3u8_url_up+'key.key'#此处是偷懒写法，原本需要打开文件，用re正则取得名字
    key=get_key(key_url)
    #解密
    asyncio.run(aio_dec(key))'''
    #合并ts为mp4文件

if __name__ == '__main__':
    url='https://www.btnull.re/py/QqL2O_1.html?164497'
    main(url)

本文链接：https://www.cnblogs.com/wzc6/p/16027459.html

python协程

python协程的更多相关文章

随机推荐

热门专题

目录导航