Python 爬取1688货源重量,自动发邮件到指定邮箱(qq),设置定时运行程序
1 # -*- coding: utf-8 -*- 2 # @Time : 2020/7/6 13:46 3 # @Author : Chunfang 4 # @Email : 3470959534@qq.com 5 # @File : test02.py 6 # @Software: PyCharm 7 8 import os,xlrd,time 9 import re 10 import datetime 11 from openpyxl import load_workbook 12 from selenium import webdriver 13 from time import sleep 14 from selenium.webdriver.chrome.service import Service 15 16 def down_data(): 17 start = datetime.datetime.now() 18 filepath2 = \'SKU-URL-weight.xlsx\' # 新建表格,将唯一的sku,url写入 19 20 wb2 = load_workbook(filepath2) 21 ws2 = wb2.worksheets[0] 22 23 def down_data(url): # 获取每个链接的数据 24 # 加载浏览器带表头数据爬虫 25 c_service = Service(r\'D:\Python\Scripts\chromedriver.exe\') 26 c_service.command_line_args() 27 c_service.start() 28 profile_directory = r\'--user-data-dir=C:\Users\Administrator\AppData\Local\Google\Chrome\User Data\' 29 option = webdriver.ChromeOptions() 30 option.add_argument(profile_directory) 31 driver = webdriver.Chrome(options=option) 32 driver.implicitly_wait(3) 33 driver.get(url) 34 sleep(3) 35 data = driver.page_source 36 sleep(2) 37 driver.quit() 38 sleep(2) 39 c_service.stop() 40 return data 41 def station(data): 42 busy = re.findall(\'<div class="tips" style=".*?<p>.*?(亲.*?回来).*?</p>\', data, re.S) 43 # print(busy) 44 error_404 = re.findall(\'h3 class="title">.*?<em>(抱歉.*?)</em>\', data, re.S) 45 # print(error_404) 46 pro_weight = re.findall(\'<span>.*?<b>.*?重量</b>.*?<em>(.*?)</em>\', data, re.S) 47 # print(pro_weight) 48 right = re.findall(\'title="点击此按钮.*?rel="nofollow"><span>(.*?订购)</span></a>\', data, re.S) 49 # print(right) 50 stations.append(busy) 51 stations.append(error_404) 52 stations.append(pro_weight) 53 stations.append(right) 54 55 for i in range(16635,ws2.max_row+1): 56 print(\'第\' + str(i) + \'个sku:\' + str(ws2.cell(i, 1).value)) 57 stations = [] 58 data=down_data(ws2.cell(i,2).value) 59 # print(data) 60 station(data) 61 while len(stations[0])!=0:#没有加载出来 62 stations = [] 63 data = down_data(ws2.cell(i, 2).value) 64 station(data) 65 print(stations) 66 if len(stations[1])==0:#判断404 67 if len(stations[2])==0:#判断重量 68 if len(stations[3])==0:#判断产品是否有货,可订购 69 ws2.cell(i, 3).value = \'产品下架\' 70 else: 71 ws2.cell(i,3).value = \'产品有货,没有标注重量\' 72 else: 73 ws2.cell(i, 3).value = stations[2][1]#添加重量 74 else: 75 ws2.cell(i,3).value = stations[1][0]#抱歉404 76 print(stations) 77 78 wb2.save(filepath2) 79 80 end = datetime.datetime.now() 81 print(\'Running time: %s Seconds\'%(end-start)) 82 83 #发邮件给对方邮箱 84 import smtplib 85 from email.mime.text import MIMEText 86 from email.mime.multipart import MIMEMultipart 87 from email.mime.image import MIMEImage 88 from email.mime.application import MIMEApplication 89 90 #设置服务器所需信息 91 fromaddr =\'3470959534@qq.com\' 92 password = \'验证码\'#qq的邮箱验证码 93 toaddrs = [\'3470959534@qq.com\',\'1725714926@qq.com\'] 94 95 #邮件内容设置 96 message = MIMEText(\'hello! 这是跑货源结果,请查收\',\'plain\',\'utf-8\') 97 message[\'Subject\']=\'测试邮件\' 98 99 excel_file =filepath2 100 excel_apart = MIMEApplication(open(excel_file,\'rb\').read()) 101 excel_apart.add_header(\'Content-Disposition\',\'atttachment\',filename=excel_file) 102 103 m = MIMEMultipart() 104 m.attach(message) 105 m.attach(excel_apart) 106 107 try: 108 server = smtplib.SMTP(\'smtp.qq.com\') 109 server.login(fromaddr,password) 110 server.sendmail(fromaddr,toaddrs,m.as_string(),) 111 print(\'success\') 112 server.quit() 113 except smtplib.SMTPException as e: 114 print(\'error:\',e) 115 116 down_data() 117 #设置时间跑店小秘货源 118 # while True: 119 # time_now = time.strftime(\'%H:%M:%S\',time.localtime()) 120 # 121 # if time_now =="20:00:10": 122 # down_data() 123 # # print(\'Hello\') 124 # subject = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())+\'定时发送测试\' 125 # print(subject) 126 # time.sleep(2)