爬虫实战爬取58同城房源信息
import requests
from lxml import etree
if __name__ == \’__main__\’:
#爬取到页面源码数据
url=\’https://su.58.com/ershoufang/\’
headers={
\’User-Agent\’:\’Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36\’
}
page_text=requests.get(url=url,headers=headers).text
tree=etree.HTML(page_text) #数据解析
#存储li标签对象
li_list=tree.xpath(\’//ul[@class=”house-list-wrap”]/li\’) #这里class属性值应该是双引号 “”
fp= open(\’./58.txt\’,\’w\’,encoding=\’utf-8\’)
for li in li_list:
#页面数据局部解析
title=li.xpath(\’./div[2]/p/span/text()\’)[0]
print(title)
fp.write(title+\’\n\’)
from lxml import etree
if __name__ == \’__main__\’:
#爬取到页面源码数据
url=\’https://su.58.com/ershoufang/\’
headers={
\’User-Agent\’:\’Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36\’
}
page_text=requests.get(url=url,headers=headers).text
tree=etree.HTML(page_text) #数据解析
#存储li标签对象
li_list=tree.xpath(\’//ul[@class=”house-list-wrap”]/li\’) #这里class属性值应该是双引号 “”
fp= open(\’./58.txt\’,\’w\’,encoding=\’utf-8\’)
for li in li_list:
#页面数据局部解析
title=li.xpath(\’./div[2]/p/span/text()\’)[0]
print(title)
fp.write(title+\’\n\’)
版权声明:本文为yangyang-1127原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。