赞
踩
from requests_html import HTMLSession #载入爬虫模块
如示没有requests_html 该模块如下安装:
from requests_html import HTMLSession #载入爬虫模块
from requests_html import HTMLSession #载入爬虫模块
session =HTMLSession() #创建完毕
#拿二傻子为例
response = session.get('http://image.baidu.com/search/index?tn=baiduimage&fm=result&ie=utf-8&word=二傻子')
#获取我们图片的url的正则匹配格式
img_url_regex = '"thumbURL":"{}",'
#解析并获取图片url_list
img_url_list = response.html.search_all(img_url_regex)
from requests_html import HTMLSession #载入爬虫模块 session =HTMLSession() #创建完毕 #拿二傻子为例 response = session.get('http://image.baidu.com/search/index?tn=baiduimage&fm=result&ie=utf-8&word=二傻子') #获取我们图片的url的正则匹配格式 img_url_regex = '"thumbURL":"{}",' #解析并获取图片url_list img_url_list = response.html.search_all(img_url_regex) mun=0 for url in img_url_list: mun+=1 #访问图片链接 response= session.get(url[0]) #保存二进制并保存至本地 with open(f'第{mun}张.jpg','wb') as fw: fw.write(response.content)
# -*- coding: utf-8 -*- # @Author : xionghao.chen # @File : BaiduImgSession.py # @date : 2020.03.13 # @Software: PyCharm #网上爬虫图片 from requests_html import HTMLSession import os class BaiduImgSession: session = HTMLSession() img_url_regex = '"thumbURL":"{}",' url = '' img_url_list = [] SearchName = '' def get_search(self): search = input('请输入你要搜索的图片') # 有点点偷懒参数没有好好分析全,只对关键参数处理 self.url = f'https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord={search}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=©right=&word={search}&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&rn=30&gsm=' self.SearchName = 'images/' +search if not os.path.exists('images'): os.makedirs('images') def get_img_url_list(self): '&pn=30000' pn = 0 try: while True: # 由于百度限制只能抓取450张,嗯可能能获取480张,我懒没接着分析了,如果真的需要私聊我我可以写全 res = self.session.get(f'{self.url}&pn={pn}') #print(res.json()['bdIsClustered']) if res.json()['bdIsClustered'] == '2': break else: pn += 30 for dic in res.json()['data']: img_url = dic.get('thumbURL') if img_url: self.img_url_list.append(img_url) except Exception as e: pass def save_img(self, numMax): num = 0 if not os.path.exists('%s'%(self.SearchName)): os.mkdir('%s'%(self.SearchName)) for url in self.img_url_list: num += 1 if num > numMax: break # 访问图片链接 response = self.session.get(url) # 保存二进制并保存至本地 with open(f'%s/%06d.jpg'%(self.SearchName,num), 'wb') as fw: fw.write(response.content) print(f'第{num}张保存本地完毕') def run(self): self.get_search() self.get_img_url_list() print("可保存的图片数量为:%d"%len(self.img_url_list)) numMax = int(input('请输入你要保存的图片的数量')) self.save_img(numMax) if __name__ == '__main__': try: imgSession = BaiduImgSession() imgSession.run() print("BaiduImgSession finish") except Exception as err: print("BaiduImgSession occur error: ",err) os.system('pause')
*images\深度学习*
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。