用Python编写爬取股票信息的代码
北京理工大学崇天老师编写的小课件,很不完善,放进来只是为了方便自己用手机查看
1 import requests 2 import re 3 import bs4 4 import traceback 5 6 def getHTMLText(url, code = "utf-8"): 7 # 获得股票页面 8 try: 9 r = requests.get(url) 10 r.raise_for_status() 11 r.encoding = code 12 # r.encoding = r.apparent_encoding 13 # 直接用"utf-8"编码节省时间 14 return r.text 15 except: 16 return "" 17 18 def getStockList(lst, stockURL): 19 # 获取股票列表 20 html = getHTMLText(stockURL, "GB2312") 21 # 东方财富网用"GB2312"方式编码 22 soup = bs4.BeautifulSoup(html, "html.parser") 23 a = soup. find_all("a") 24 for i in a: 25 try: 26 href = i.attrs["href"] 27 lst.append(re.findall(r"[s][hz]\d{6}", href)[0]) 28 except: 29 continue 30 31 def getStockInfo(lst, stockURL, fpath): 32 33 count = 0 34 # 增加进度条 35 36 # 获取个股信息 37 for stock in lst: 38 url = stockURL + stock + ".html" 39 html = getHTMLText(url) 40 try: 41 if html == "": 42 # 判断页面是否为空 43 continue 44 infoDict = { } 45 # 定义一个字典用来储存股票信息 46 soup = bs4.BeautifulSoup(html, "html.parser") 47 stockInfo = soup.find("div", attrs={"class":"stock-bets"}) 48 # 获得股票信息标签 49 50 name = stockInfo.find_all(attrs={"class":"bets-name"})[0] 51 # 在标签中查找股票名称 52 infoDict.update({"股票名称":name.text.split()[0]}) 53 # 将股票名称增加到字典中 54 55 keyList = stockInfo.find_all("dt") 56 # "dt"标签是股票信息键的域 57 valueList = stockInfo.find_all("dd") 58 # "dd"标签是股票信息值的域 59 60 for i in range(len(keyList)): 61 # 还原键值对并存储到列表中 62 key = keyList[i].text 63 val = valueList[i].text 64 infoDict[key] = val 65 66 with open(fpath, "a", encoding="utf-8") as f: 67 f.write(str(infoDict) + "\n") 68 69 count += 1 70 # 增加进度条 71 print("\r当前进度:{:.2f}%".format(count*100/len(lst)),end = "") 72 73 except: 74 count += 1 75 # 增加进度条 76 print("\r当前进度:{:.2f}%".format(count * 100 / len(lst)), end="") 77 78 # 用traceback获得异常信息 79 #traceback.print_exc() 80 continue 81 return "" 82 83 if __name__ == \'__main__\': 84 stock_list_url = "http://quote.eastmoney.com/stocklist.html" 85 # 获得个股链接 86 stock_info_url = "https://gupiao.baidu.com/stock/" 87 # 获取股票信息的主题部分 88 output_file = "C:\\Users\\W419L\\Desktop\\股票爬取.txt" 89 # 文件保存地址 90 slist = [] 91 # 存储股票信息 92 getStockList(slist, stock_list_url) 93 getStockInfo(slist, stock_info_url, output_file)
版权声明:本文为csrw原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。