1 # -*- coding: utf-8 -*-
  2 # @Time : 2020/7/6 13:46
  3 # @Author : Chunfang
  4 # @Email : 3470959534@qq.com
  5 # @File : test02.py
  6 # @Software: PyCharm
  7 
  8 import os,xlrd,time
  9 import re
 10 import datetime
 11 from openpyxl import load_workbook
 12 from selenium import webdriver
 13 from time import sleep
 14 from selenium.webdriver.chrome.service import Service
 15 
 16 def down_data():
 17     start = datetime.datetime.now()
 18     filepath2 = \'SKU-URL-weight.xlsx\'                # 新建表格,将唯一的sku,url写入
 19 
 20     wb2 = load_workbook(filepath2)
 21     ws2 = wb2.worksheets[0]
 22 
 23     def down_data(url):         # 获取每个链接的数据
 24         # 加载浏览器带表头数据爬虫
 25         c_service = Service(r\'D:\Python\Scripts\chromedriver.exe\')
 26         c_service.command_line_args()
 27         c_service.start()
 28         profile_directory = r\'--user-data-dir=C:\Users\Administrator\AppData\Local\Google\Chrome\User Data\'
 29         option = webdriver.ChromeOptions()
 30         option.add_argument(profile_directory)
 31         driver = webdriver.Chrome(options=option)
 32         driver.implicitly_wait(3)
 33         driver.get(url)
 34         sleep(3)
 35         data = driver.page_source
 36         sleep(2)
 37         driver.quit()
 38         sleep(2)
 39         c_service.stop()
 40         return data
 41     def station(data):
 42         busy = re.findall(\'<div class="tips" style=".*?<p>.*?(亲.*?回来).*?</p>\', data, re.S)
 43         # print(busy)
 44         error_404 = re.findall(\'h3 class="title">.*?<em>(抱歉.*?)</em>\', data, re.S)
 45         # print(error_404)
 46         pro_weight = re.findall(\'<span>.*?<b>.*?重量</b>.*?<em>(.*?)</em>\', data, re.S)
 47         # print(pro_weight)
 48         right = re.findall(\'title="点击此按钮.*?rel="nofollow"><span>(.*?订购)</span></a>\', data, re.S)
 49         # print(right)
 50         stations.append(busy)
 51         stations.append(error_404)
 52         stations.append(pro_weight)
 53         stations.append(right)
 54 
 55     for i in range(16635,ws2.max_row+1):
 56         print(\'\' + str(i) + \'个sku:\' + str(ws2.cell(i, 1).value))
 57         stations = []
 58         data=down_data(ws2.cell(i,2).value)
 59         # print(data)
 60         station(data)
 61         while len(stations[0])!=0:#没有加载出来
 62             stations = []
 63             data = down_data(ws2.cell(i, 2).value)
 64             station(data)
 65             print(stations)
 66         if len(stations[1])==0:#判断404
 67             if len(stations[2])==0:#判断重量
 68                 if len(stations[3])==0:#判断产品是否有货,可订购
 69                     ws2.cell(i, 3).value = \'产品下架\'
 70                 else:
 71                     ws2.cell(i,3).value = \'产品有货,没有标注重量\'
 72             else:
 73                 ws2.cell(i, 3).value = stations[2][1]#添加重量
 74         else:
 75             ws2.cell(i,3).value = stations[1][0]#抱歉404
 76         print(stations)
 77 
 78         wb2.save(filepath2)
 79 
 80     end = datetime.datetime.now()
 81     print(\'Running time: %s Seconds\'%(end-start))
 82 
 83     #发邮件给对方邮箱
 84     import smtplib
 85     from email.mime.text import MIMEText
 86     from email.mime.multipart import MIMEMultipart
 87     from email.mime.image import MIMEImage
 88     from email.mime.application import MIMEApplication
 89 
 90     #设置服务器所需信息
 91     fromaddr =\'3470959534@qq.com\'
 92     password = \'验证码\'#qq的邮箱验证码
 93     toaddrs = [\'3470959534@qq.com\',\'1725714926@qq.com\']
 94 
 95     #邮件内容设置
 96     message = MIMEText(\'hello! 这是跑货源结果,请查收\',\'plain\',\'utf-8\')
 97     message[\'Subject\']=\'测试邮件\'
 98 
 99     excel_file =filepath2
100     excel_apart = MIMEApplication(open(excel_file,\'rb\').read())
101     excel_apart.add_header(\'Content-Disposition\',\'atttachment\',filename=excel_file)
102 
103     m = MIMEMultipart()
104     m.attach(message)
105     m.attach(excel_apart)
106 
107     try:
108         server = smtplib.SMTP(\'smtp.qq.com\')
109         server.login(fromaddr,password)
110         server.sendmail(fromaddr,toaddrs,m.as_string(),)
111         print(\'success\')
112         server.quit()
113     except smtplib.SMTPException as e:
114         print(\'error:\',e)
115 
116 down_data()
117 #设置时间跑店小秘货源
118 # while True:
119 #     time_now = time.strftime(\'%H:%M:%S\',time.localtime())
120 #
121 #     if time_now =="20:00:10":
122 #         down_data()
123 #         # print(\'Hello\')
124 #         subject = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())+\'定时发送测试\'
125 #         print(subject)
126 #         time.sleep(2)

 

版权声明:本文为chunfang原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://www.cnblogs.com/chunfang/p/13343184.html