自己写python爬虫从百度上下载图片脚本
参考URL: https://blog.csdn.net/z704630835/article/details/82992036
1 下载脚本
# 导入需要的库 import requests import os import json # 爬取百度图片,解析页面的函数 def getManyPages(keyword, pages): \'\'\' 参数keyword:要下载的影像关键词 参数pages:需要下载的页面数 \'\'\' params = [] for i in range(30, 30 * pages + 30, 30): params.append({ \'tn\': \'resultjson_com\', \'ipn\': \'rj\', \'ct\': 201326592, \'is\': \'\', \'fp\': \'result\', \'queryWord\': keyword, \'cl\': 2, \'lm\': -1, \'ie\': \'utf-8\', \'oe\': \'utf-8\', \'adpicid\': \'\', \'st\': -1, \'z\': \'\', \'ic\': 0, \'word\': keyword, \'s\': \'\', \'se\': \'\', \'tab\': \'\', \'width\': \'\', \'height\': \'\', \'face\': 0, \'istype\': 2, \'qc\': \'\', \'nc\': 1, \'fr\': \'\', \'pn\': i, \'rn\': 30, \'gsm\': \'1e\', \'1488942260214\': \'\' }) url = \'https://image.baidu.com/search/acjson\' urls = [] for i in params: try: urls.append(requests.get(url, params=i).json().get(\'data\')) except json.decoder.JSONDecodeError: print("解析出错") return urls # 下载图片并保存 def getImg(dataList, localPath): \'\'\' 参数datallist:下载图片的地址集 参数localPath:保存下载图片的路径 \'\'\' if not os.path.exists(localPath): # 判断是否存在保存路径,如果不存在就创建 os.makedirs(localPath) x = 0 for list in dataList: for i in list: if i.get(\'thumbURL\') != None: print(\'正在下载:%s\' % i.get(\'thumbURL\')) ir = requests.get(i.get(\'thumbURL\')) open(localPath + \'%d.jpg\' % x, \'wb\').write(ir.content) x += 1 else: print(\'图片链接不存在\') # 根据关键词来下载图片 if __name__ == \'__main__\': dataList = getManyPages(\'吃惊\', 20) # 参数1:关键字,参数2:要下载的页数 getImg(dataList, \'./data/chijing/\') # 参数2:指定保存的路径
2 通过人脸检测来过滤非人脸和剪切人脸
2.1 使用opencv的人脸检测 #!/usr/bin/env python # -*- coding:utf-8-*- import os import os.path as osp import cv2 import glob from io_helper import * cv_root = \'D:/install packages/opencv-3.4.2/data/haarcascades\' cv_face_model_path = cv_root + \'/haarcascade_frontalface_alt2.xml\' cv_face_model_path2 = cv_root + \'/haarcascade_profileface.xml\' def test_face_detect_cv(): classifier1 = cv2.CascadeClassifier(cv_face_model_path) # 正脸 filepath = \'\' img = cv2.imread(filepath) # 读取图片 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 转换灰色 faceRects = classifier1.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=1, minSize=(10, 10)) if len(faceRects): # 大于0则检测到人脸 for box in faceRects: # 单独框出每一张人脸 x, y, w, h = box cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 1) cv2.imshow(\'a\', img) cv2.waitKey(0) cv2.destroyAllWindows() def face_detect_save(): path = r\'D:/AI/DataSet/emotion/fer2013/train_class\' files = glob.glob(path + \'/**/*.jpg\') new_dir = path + \'/cut_face\' new_dir2 = path + \'/no_face\' mkdir_if_not_exist(new_dir) mkdir_if_not_exist(new_dir2) # OpenCV人脸识别分类器 classifier1 = cv2.CascadeClassifier(cv_face_model_path) # 正脸 # classifier2 = cv2.CascadeClassifier(cv_face_model_path2) # 侧脸 for filepath in files: chd_dir = new_dir + \'/\' + filepath.split(\'\\\')[-2] mkdir_if_not_exist(chd_dir) chd_dir2 = new_dir2 + \'/\' + filepath.split(\'\\\')[-2] mkdir_if_not_exist(chd_dir2) filename = osp.basename(filepath) img = cv2.imread(filepath) # 读取图片 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 转换灰色 color = (0, 255, 0) # 定义绘制颜色 # 调用识别 正脸人脸 faceRects = classifier1.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=1, minSize=(10, 10)) if len(faceRects): for box in faceRects: # 单独框出每一张人脸 x, y, w, h = box face_roi = img[y:y + h, x:x + w, :] file = chd_dir + \'/\' + filename cv2.imwrite(file,face_roi) else: file = chd_dir2 + \'/\' + filename shutil.copy(filepath,file) print(\'work is done .\') if __name__ == \'__main__\': face_detect_save() 2.2 使用mtcnn的包进行人脸检测 ----------------------------------------- 使用python公开包 mtcnn 来进行人脸检测和关键点检测 pip install -i https://pypi.tuna.tsinghua.edu.cn/simple mtcnn
gpu_id = 3 os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) import tensorflow as tf from mtcnn.mtcnn import MTCNN detector = MTCNN(scale_factor=0.99) face_list = detector.detect_faces(img) for item in face_list: box = item[\'box\'] conf = item[\'confidence\'] keypoints_dict = item[\'keypoints\'] # {\'left_eye\': (14, 16), \'right_eye\': (31, 12), \ # \'nose\': (23, 25), \'mouth_left\': (19, 35), \'mouth_right\': (33, 32)} left_eyeXY = keypoints_dict[\'left_eye\'] right_eyeXY = keypoints_dict[\'right_eye\'] noseXY = keypoints_dict[\'nose\'] mouth_leftXY = keypoints_dict[\'mouth_left\'] mouth_rightXY = keypoints_dict[\'mouth_right\'] if conf > 0: print(\'detect a face .\') x, y, w, h = box offset = 5 x = max(0, x - offset) y = max(0, y - offset) w = min(w + 2 * offset, src_w - x) h = min(h + 2 * offset, src_h - y) face_img = img[y:y + h, x:x + w, :]
-----------------------------------
2.3 使用关键点来进行人脸对齐
版权声明:本文为dxscode原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。