import scrapy

from scrapy.linkextractors import LinkExtractor

from scrapy.spiders import CrawlSpider

import re

 

class
QicheSpider(CrawlSpider):

name =
‘qiche’

allowed_domains = [‘autohome.com.cn’]


# 123

start_urls = [‘https://www.autohome.com.cn/grade/carhtml/’+ i.upper() +‘_photo.html’
for i in
map(chr,range(97,99))]

 


# rules = (


# Rule(LinkExtractor(allow=()), callback=’parse_item’),


# )

 


def
parse_start_url(self, response):

brand_node_list = response.xpath(“//dl”)


for brand_node in brand_node_list:


#
商标名

brand_name = brand_node.xpath(“./dt/div/a/text()”).extract_first()

 

producer_list = brand_node.xpath(“./dd/div[@class=’h3-tit’]/text()”).extract()

producer_ul_node_list = brand_node.xpath(“./dd/ul”)


for producer_name, producer_ul_list in
zip(producer_list, producer_ul_node_list):

 

 

 

 

import requests

def load_page(name):
url = ‘http://music.163.com/’
headers = {
“Proxy-Connection”: ” keep-alive”,
“Cache-Control”: ” max-age=0″,
“Upgrade-Insecure-Requests”: ” 1″,
“User-Agent”: ” Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36″,
“Accept”: ” text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8″,
“Referer”: ” https://www.baidu.com/link?url=ZQWYYdadp-J_1XzVWmJ7lIE1daavW_516QtvU_47Z8u&wd=&eqid=ff7ee69e0001ca2d000000035a03d64d”,
“Accept-Encoding”: ” gzip, deflate”,
“Accept-Language”: ” zh-CN,zh;q=0.8,zh-TW;q=0.6″,

版权声明:本文为ppansj原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:http://www.cnblogs.com/ppansj/p/7954497.html