python爬虫实践——爬取京东商品信息
1 \'\'\' 2 爬取京东商品信息: 3 请求url: 4 https://www.jd.com/ 5 提取商品信息: 6 1.商品详情页 7 2.商品名称 8 3.商品价格 9 4.评价人数 10 5.商品商家 11 \'\'\' 12 from selenium import webdriver 13 from selenium.webdriver.common.keys import Keys 14 import time 15 16 17 def get_good(driver): 18 try: 19 20 # 通过JS控制滚轮滑动获取所有商品信息 21 js_code = \'\'\' 22 window.scrollTo(0,5000); 23 \'\'\' 24 driver.execute_script(js_code) # 执行js代码 25 26 # 等待数据加载 27 time.sleep(2) 28 29 # 3、查找所有商品div 30 # good_div = driver.find_element_by_id(\'J_goodsList\') 31 good_list = driver.find_elements_by_class_name(\'gl-item\') 32 n = 1 33 for good in good_list: 34 # 根据属性选择器查找 35 # 商品链接 36 good_url = good.find_element_by_css_selector( 37 \'.p-img a\').get_attribute(\'href\') 38 39 # 商品名称 40 good_name = good.find_element_by_css_selector( 41 \'.p-name em\').text.replace("\n", "--") 42 43 # 商品价格 44 good_price = good.find_element_by_class_name( 45 \'p-price\').text.replace("\n", ":") 46 47 # 评价人数 48 good_commit = good.find_element_by_class_name( 49 \'p-commit\').text.replace("\n", " ") 50 51 good_content = f\'\'\' 52 商品链接: {good_url} 53 商品名称: {good_name} 54 商品价格: {good_price} 55 评价人数: {good_commit} 56 \n 57 \'\'\' 58 print(good_content) 59 with open(\'jd.txt\', \'a\', encoding=\'utf-8\') as f: 60 f.write(good_content) 61 62 next_tag = driver.find_element_by_class_name(\'pn-next\') 63 next_tag.click() 64 65 time.sleep(2) 66 67 # 递归调用函数 68 get_good(driver) 69 70 time.sleep(10) 71 72 finally: 73 driver.close() 74 75 76 if __name__ == \'__main__\': 77 78 good_name = input(\'请输入爬取商品信息:\').strip() 79 80 driver = webdriver.Chrome() 81 driver.implicitly_wait(10) 82 # 1、往京东主页发送请求 83 driver.get(\'https://www.jd.com/\') 84 85 # 2、输入商品名称,并回车搜索 86 input_tag = driver.find_element_by_id(\'key\') 87 input_tag.send_keys(good_name) 88 input_tag.send_keys(Keys.ENTER) 89 time.sleep(2) 90 91 get_good(driver)
版权声明:本文为lweiser原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。