python爬虫小说代码,可用的
python爬虫小说代码,可用的,以笔趣阁为例子,python3.6以上,可用 作者的QQ:342290433,汉唐自远工程师
import requests
import re
from lxml import etree
url = “https://www.biquga.com/33_33132/16700250.html”
def get_content(url):
nodes = \’\’;
html_doc = requests.get(url).content.decode(\’gbk\’)
# 网站地址编码
tree = etree.HTML(html_doc)
# http://www.shuangxiniao.com下一章地址
url = tree.xpath(\’//*[@id=”wrapper”]/div[4]/div/div[4]/a[4]//@href\’)[0]
url = \’https://www.biquga.com/\’ + url
# http://www.hiry.cn章节标题
node_title = tree.xpath(\’//*[@id=”wrapper”]/div[4]/div/div[2]/h1//text()\’)[0]
# http://www.qijihu.com小说内容
node_content = tree.xpath(\’//*[@id=”content”]//text()\’)
nodes += node_title
nodes += \’\n\n\’
for node in node_content:
node = node.strip(\’\r\’)
nodes += node
nodes += \’\n\n\’
print(node_title)
filename = \’./全职妙手.txt\’
with open(filename,\’a+\’, encoding=\’utf-8\’) as f:
f.write(nodes)
if re.search(\’.html\’, url) != None:
get_content(url)
get_content(url)