用python爬虫,对12306网站进行模拟登陆
from selenium import webdriver
from time import sleep
from PIL import Image
from selenium.webdriver import ActionChains
from Cjy import Chaojiying_Client
bro = webdriver.Chrome(executable_path=\’chromedriver.exe\’)
bro.get(\’https://kyfw.12306.cn/otn/login/init\’)
sleep(5)
截屏
bro.save_screenshot(\’main.png\’)
验证码所对应的标签
code_img_tag = bro.find_element_by_xpath(\’//*[@id=”loginForm”]/div/ul[2]/li[4]/div/div/div[3]/img\’)
获取标签的起始坐标
location = code_img_tag.location
获取起始坐标的长和宽
size = code_img_tag.size
裁剪的区域范围
ragle = (int(location[\’x\’]),int(location[\’y\’]),int(location[\’x\’]+size[\’width\’]),int(location[\’y\’]+size[\’height\’]))
读取图片
i = Image.open(\’./main.png\’)
根据指定的大小进行裁剪
frame = i.crop(range)
将裁剪的图片进行保存
frame.save(\’code.png\’)
使用超级鹰进行识别
def get_text(imgPath,imgType):
chaojiying = Chaojiying_Client(\’123456\’,\’123456.\’,\’901485\’)
im = open(\’imgPath\’,\’rb\’).read()
return chaojiying.PostPic(im,imgType)[\’pic_str\’]
获取超级鹰返回的数据
result = get_text(\’./code.png\’,9004)
对超级鹰的数据进行处理
all_list = []
if \’|\’ in result:
list_1 = result.split(\’|\’)
count_1 = len(list_1)
for i in range(count_1):
xy_list = []
x = int(list_1[i].split(\’,\’)[0])
y = int(list_1[i].split(\’,\’)[1])
xy_list.append(x)
xy_list.append(y)
all_list.append(xy_list)
else:
x = int(result.split(\’,\’)[0])
y = int(result.split(\’,\’)[1])
xy_list = []
xy_list.append(x)
xy_list.append(y)
all_list.append(xy_list)
在图片中找到相应的点,进行点击
for a in all_list:
x = a[0]
y = a[1]
#在这里找的坐标不是整张页面,而是验证码的图片
ActionChains(bro).move_to_element_with_offset(code_img_tag,x,y).click().perform()
sleep(1)
找到用户名对应的标签进行输入
bro.find_element_by_id(\’username\’).send_keys(\’123456\’)
sleep(1)
找到密码对应的标签进行输入
bro.find_element_by_id(\’password\’).send_keys(\’123456\’)
sleep(1)
找的登陆按钮,点击
bro.find_element_by_id(\’loginSub\’).click()
sleep(5)
bro.quit()