python爬取珞珈1号卫星数据
首先登录珞珈一号数据系统查询想要的数据
利用浏览器审查元素获取包含下载信息的源码
将最右侧的table相关的网页源码copy到剪切板备用
利用python下载数据
## utf-8 import requests import os # import urllib.request from bs4 import BeautifulSoup from tqdm import tqdm import pandas as pd def saveFile(url,fileName): # \'\'\' 保存文件\'\'\' r = requests.get(url, stream=True) chunkSize = 256 # print(\'dowloading...\',fileName) with open(\'data/\'+fileName, \'wb\') as f: pbar = tqdm( unit="B", total=int( r.headers[\'Content-Length\'] ) ,desc = "downloading..."+fileName) for chunk in r.iter_content(chunk_size=chunkSize): if chunk: # filter out keep-alive new chunks pbar.update (len(chunk)) f.write(chunk) html = \'\'\'将table的源码粘贴到这里\'\'\' ## get download url and file name soup = BeautifulSoup(html) tbody = soup.findAll(\'tbody\')[0] trs = tbody.findAll("tr") data = [] for tr in trs: tds = tr.findAll("td")[-4:] temp = [] # for td in tds[:-1]: temp.append(td.text) a = tds[-1].findAll("a")[-1] ## download url href = "http://59.175.109.173:8888" + a["href"] temp.append(href) data.append(temp) dataSet = pd.DataFrame(data,columns = ["weixing","chuanganqi","time","url"]) ###file name dataSet.loc[:,"fileName"] = dataSet.loc[:,"weixing"] + dataSet.loc[:,"chuanganqi"] + dataSet.loc[:,"time"] + "-" + dataSet.index.map(str) + ".tar.gz" #### dowload for i in tqdm(range(dataSet.shape[0])): # if i<start: # continue # if i > 200: # continue row = dataSet.loc[i,:] fileName = row["fileName"] url = row["url"] saveFile(url,fileName)
版权声明:本文为wybert原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。