python爬虫之爬取小说(一)
爬取“盗墓笔记”小说
import requests from bs4 import BeautifulSoup headers = { \'User-Agent\': \'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36\' } def open_url(url): r = requests.get(url, headers=headers) r.encoding = \'gbk\' html = r.text return html def get_title(html): soup = BeautifulSoup(html, \'lxml\') title_tag = soup.find(\'div\', class_=\'h1title\') return title_tag.text + \'\n\' def get_text(html): soup2 = BeautifulSoup(html, \'lxml\') text_tag = soup2.find(\'div\', id=\'htmlContent\') return text_tag.text def save(title, text): with open(\'盗墓笔记.txt\', \'a+\', encoding=\'utf-8\') as file: file.write(title) file.write(text) print(\'下载完成!\') def main(): while True: num = int(input(\'请输入你想要下载第几章:\')) + 78209 url = \'http://www.taiuu.com/0/67/\' + str(num) + \'.html\' html = open_url(url) title = get_title(html) text = get_text(html) save(title, text) repeat = input(\'请问还要继续下载吗?(y/n)\') if repeat == \'y\': continue else: break print(\'已退出!\') if __name__ == \'__main__\': main()
版权声明:本文为jsxxd原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。