素材库图片下载

import re
import requests
import random
import time
import os.path
from bs4 import BeautifulSoup

import pymysql

user_agent_list = [
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
    "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
    "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
    "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
    "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"
]
UA = random.choice(user_agent_list)  ##从self.user_agent_list中随机取出一个字符串
headers = {\'User-Agent\': UA}  ##构造成一个完整的User-Agent (UA代表的是上面随机取出来的字符串哦)

#提取扩展名函数
def file_extension(path):
  return os.path.splitext(path)[1]

# 连接database
conn =pymysql.connect(user=\'root\', password=\'1234\' ,host=\'127.0.0.1\',database=\'sucai\')

#创建游标
cursor = conn.cursor()

#执行函数  返回受影响的函数
effect_rows = cursor.execute(\'select * from sucaix_copy2 where id>10851\')
print("受影响的行数",effect_rows)

#提取所有结果
results = cursor.fetchall()
xx=1012642  #图片变化的起始数字
datalist=[]
for row in results:
    datalist.append([row[0],row[2]])

#----------------------------------------
for row in datalist:
    id = row[0]
    rowcode = row[1]

    soup = BeautifulSoup(rowcode, \'lxml\')

    img_url=soup.find_all(\'img\')

    for x in img_url:

        #下载图片
        url0=x.get(\'src\')

        print(url0)
        response=requests.get(url0, headers=headers, timeout=5)
        img_name=str(xx) + file_extension(url0)   #方法1:提取图片名
        #img_name=url0.split(\'/\')[-1]  #方法2:提取图片名
        with open(\'C:\\Users\\mydell\\Desktop\\sucai365\\\' + img_name,\'wb\') as f:
            f.write(response.content)

        rowcode=rowcode.replace(url0,\'/20200708/\'+img_name)

        xx += 1

    # 执行函数  返回受影响的函数
    effect_rows = cursor.execute(\'update sucaix_copy2 set rowcode2=%s  where id=%s\',[rowcode,id])

    conn.commit()
    print(id)

conn.close()

版权声明:本文为duoba原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://www.cnblogs.com/duoba/p/13273623.html