赞
踩
由于工作需要,需要从网上获取大量图片。百度图片就是一个动态网页,需要使用动态爬取功能进行爬图片。
import requests
import json
import os
- 下载链接分析
首先,打开百度,搜索一个内容
然后,打开抓包工具,选择XHR选项,按Ctrl+R,然后你会发现,随着你鼠标的滑动,右侧会出现一个又一个的数据包。
这里简单说一下什么是抓包工具:
抓取到的数据包如下:
然后,选一个包,查看它的headers,如图:
截取之后,保存下来,作为一个URL
剩余的其他参数也可以保存下来使用,如图:
结果展示:
- 代码分析
# -*- coding: UTF-8 -*-""" import requests import tqdm def configs(search, page, number): """ :param search: :param page: :param number: :return: """ url = 'https://image.baidu.com/search/acjson' params = { "tn": "resultjson_com", "logid": "11555092689241190059", "ipn": "rj", "ct": "201326592", "is": "", "fp": "result", "queryWord": search, "cl": "2", "lm": "-1", "ie": "utf-8", "oe": "utf-8", "adpicid": "", "st": "-1", "z": "", "ic": "0", "hd": "", "latest": "", "copyright": "", "word": search, "s": "", "se": "", "tab": "", "width": "", "height": "", "face": "0", "istype": "2", "qc": "", "nc": "1", "fr": "", "expermode": "", "force": "", "pn": str(60 * page), "rn": number, "gsm": "1e", "1617626956685": "" } return url, params def loadpic(number, page): """ :param number: :param page: :return: """ while (True): if number == 0: break url, params = configs(search, page, number) result = requests.get(url, headers=header, params=params).json() url_list = [] for data in result['data'][:-1]: url_list.append(data['thumbURL']) for i in range(len(url_list)): getImg(url_list[i], 60 * page + i, path) bar.update(1) number -= 1 if number == 0: break page += 1 print("\nfinish!") def getImg(url, idx, path): """ :param url: :param idx: :param path: :return: """ img = requests.get(url, headers=header) file = open(path + 'maintenanceWorker_' + str(idx + 1) + '.jpg', 'wb') file.write(img.content) file.close() if __name__ == '__main__': search = input("请输入搜索内容:") number = int(input("请输入需求数量:")) path = 'E:\data\MaintenanceWorker/' header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'} bar = tqdm.tqdm(total=number) page = 0 loadpic(number, page)
谷歌图片爬取见:https://blog.csdn.net/Wenweno0o/article/details/121487706
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。