赞
踩
爬取图片主要是用到了 urlretrieve 这个函数:
话不多说,直接上代码:
# -*- coding: utf-8 -*- """ Created on Sat Oct 31 14:41:46 2020 @author: zxw """ # 引入必要的库 from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import json import time import re import urllib from urllib.request import urlretrieve import string def get_driver(): try: return webdriver.PhantomJS() except Exception: return webdriver.Firefox() # 得到登录的cookie def login_cookie(): driver = get_driver() driver.set_page_load_timeout(20) driver.set_script_timeout(20) LOGIN_URL = 'https://www.zhihu.com/' driver.get(LOGIN_URL) time.sleep(5) input("请登录后按 Enter") cookies = driver.get_cookies() jsonCookies = json.dumps(cookies) #下面的文件位置需要自己改 with open('C:/Users/zxw/Desktop/修身/与自己/数据分析/数据分析/爬虫/cookies/zhihu.txt','w') as f: f.write(jsonCookies) driver.quit() # 再次登录 def login(): driver.set_page_load_timeout(20) driver.set_script_timeout(20) LOGIN_URL = 'https://www.zhihu.com/' driver.get(LOGIN_URL) time.sleep(5) #下面的文件位置需要自己改,与上面的改动一致 f = open('C:/Users/zxw/Desktop/修身/与自己/数据分析/数据分析/爬虫/cookies/zhihu.txt') cookies = f.read() jsonCookies = json.loads(cookies) for co in jsonCookies: driver.add_cookie(co) driver.refresh() time.sleep(5) # 爬取某问题下的图片 def get_pictures(question_url): driver.get(question_url) driver.find_element_by_partial_link_text('查看全部').click() for k in range(50): js="window.scrollTo(0,document.body.scrollHeight)" driver.execute_script(js) time.sleep(1) img_list = driver.find_elements_by_class_name('origin_image.zh-lightbox-thumb.lazy') k=1 for img in img_list: img_src = img.get_attribute('src') if img_src.startswith('https'): urlretrieve(img_src, path+'/'+str(k)+'.jpg') k=k+1 time.sleep(1) print('获得第'+str(k-1)+'张图片') if __name__ == "__main__": # 设置你想要搜索的问题 question_url = 'https://www.zhihu.com/question/328457531/answer/855549300' login_cookie() driver = get_driver() login() path='C:/Users/zxw/Desktop/修身/与自己/数据分析/数据分析/爬虫/img' get_pictures(question_url)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。