自从会了python,斗图我怕过谁
不多说上代码
# -*- coding: utf-8 -*-
# @Time : 2020/4/11 21:40
# @Author : zhao~xiujie
# @email : zxj0314@outlook.com
# @FileName: doutu.py.py
# @IDE: PyCharm
import requests
from lxml import etree
from urllib import request
import os
import re
# -*- coding: utf-8 -*-
# @Time : 2020/4/11 21:40
# @Author : zhao~xiujie
# @email : zxj0314@outlook.com
# @FileName: doutu.py.py
# @IDE: PyCharm
import requests
from lxml import etree
from urllib import request
import os
import re
def parse_page(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
'Referer':'https://pos.baidu.com/wh/o.htm?ltr=',
'Cookie':'BAIDUID=145DFF007C12683DB04A0420F47B3EEE:FG=1',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9'
}
response = requests.get(url,headers=headers)
text = response.content.decode('utf-8')
html = etree.HTML(text)
imgs = html.xpath(r'//div[@class="page-content text-center"]//img[@class!="gif"]')
for img in imgs:
#获取图片名字
# print(etree.tostring(img)) #打印img
img_url = img.get('data-original') #获取标签中的属性值
alt = img.get('alt') #获取图片中文名称
alt = re.sub(r'[\??\.。!!/]','',alt)
suffix = os.path.splitext(img_url)[1] #print(suffix) #os模块下的url文本分割
fale_name = alt + suffix #print(fale_name) #拼接保存文件名
#下载
request.urlretrieve(img_url,'doutula/'+fale_name)
def main():
for u in range(1,1000):
url = 'https://www.doutula.com/photo/list/?page=%d' % u
parse_page(url)
if __name__ == '__main__':
main()
版权声明:本文为baoshijie原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。