利用python爬取点小图片，满足私欲(爬虫)

 1 import requests
 2 import re
 3 import os,sys
 4 
 5 
 6 
 7 
 8 def get_url(page,headers):
 9     url=\'http://www.zbjuran.com/mei/xinggan/list_13_%s.html\'%(page)
10     data=requests.get(url,headers=headers).text
11     data_use=re.findall(\'<div class="name"><a target="_blank" href=".*?" title=".*?</a></div>\',data)
12     for use in data_use:
13         link=\'http://www.zbjuran.com/\'+use.split(\'href="\')[1].split(\'" title\')[0]
14         links.append(link)
15         title=use.split(\'title="\')[1].split(\'">\')[0]
16         titles.append(title)
17         mkpath=\'/Users/b1ancheng/mzpc/%s\'%title
18         def get_pic():
19             url_data=requests.get(link).text
20             print(link)
21             try:
22                 link_page = int(url_data.split(\'<div class="page"><li><a>共\')[1].split(\'页:\')[0])
23                 for i in range(1, link_page + 1):
24                     print(\'正在下载第%s页\'%i)
25                     try:
26                         pic_url = (link[:-5] + \'_%s\' + link[-5:])%i
27                         print(pic_url)
28                         try:
29                             pic_data_link=\'http://www.zbjuran.com\'+requests.get(pic_url,headers=headers).text.split(\'<img alt="" src="\')[1].split(\'" /></div>\')[0]
30                             with open(\'/Users/b1ancheng/mzpc/%s/%s_%s.JPG\' % (title, title,i),\'wb\') as pic_download:
31                                 pic_download.write(requests.get(pic_data_link).content)
32                         except Exception as otherdown:
33                             print(otherdown)
34                             pic_data_link = \'http://www.zbjuran.com\' + requests.get(pic_url, headers=headers).text.split(\'<img src="\')[1].split(\'" /></div>\')[0]
35                             with open(\'/Users/b1ancheng/mzpc/%s/%s_%s.JPG\' % (title, title,i),\'wb\') as pic_download:
36                                 pic_download.write(requests.get(pic_data_link).content)
37                             continue
38                     except Exception as error:
39                         print(error)
40                         continue
41             except Exception as e1:
42                 print(e1)
43                 os.rmdir(mkpath)
44                 pass
45         # 创建目录          //可修改进get_pic
46         isExists = os.path.exists(mkpath)
47         if not isExists:
48             os.makedirs(mkpath)
49             get_pic()
50         else:
51             return False
52 if __name__ == \'__main__\':
53     headers = {
54         \'User-Agent\': \'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36\',
55         \'Host\': \'www.zbjuran.com\',
56         \'Cookie\': \'UM_distinctid=15ef9964528386-07264d76850875-31657c00-13c680-15ef9964529361; CNZZDATA1264461841=1179231757-1507422986-null%7C1508056601\'
57     }
58     links = []
59     titles = []
60     for page in range(1,88):
61         get_url(page,headers=headers)
望兄多提意见，相互进步
本文链接：https://www.cnblogs.com/b1ancheng/p/7671148.html
利用python爬取点小图片，满足私欲(爬虫)

利用python爬取点小图片，满足私欲(爬虫)的更多相关文章

随机推荐

热门专题

目录导航