赞
踩
import os
import time
import ssl
import urllib.request
from selenium.webdriver.common.by import By
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
ssl._create_default_https_context = ssl._create_unverified_context
# 搜索关键词
search_keyword = "cherry flower "
# 搜索页数
search_pages = 100
# 下载图片数量
max_download_num = 200
# 配置 ChromeOptions
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless') # 无头模式
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
# 创建一个 ChromeDriver 的 Service 对象
service = Service('/usr/local/bin/chromedriver')
# 启动 ChromeDriver
driver = webdriver.Chrome(service=service, options=chrome_options)
# 访问搜索引擎
# google_search_url = 'https://www.google.com/search?q={}&source=lnms&tbm=isch'
# google_search_url = 'https://www.google.com/search?q={}&source=lnms&tbm=isch&tbm=isch_hq'
google_search_url = 'https://www.google.com/search?q={}&source=lnms&tbm=isch&imgsize=large'
driver.get(google_search_url.format(search_keyword))
# 模拟向下滚动,加载更多图片
for i in range(search_pages):
# 将 driver 对象改为 WebElement 对象
body = driver.find_element(By.TAG_NAME, 'body')
# 模拟按 END 键
body.send_keys(Keys.END)
time.sleep(2)
# 提取所有图片元素的 src 属性
img_elements = driver.find_elements(By.CSS_SELECTOR, 'img.rg_i')
img_src_list = [img_element.get_attribute('src') for img_element in img_elements]
# 创建文件夹,用于保存图片
if not os.path.exists(search_keyword):
os.makedirs(search_keyword)
# 下载图片
download_num = 0
for img_src in img_src_list:
if img_src is None or not isinstance(img_src, str):
continue
try:
# 下载图片
img_name = os.path.join(search_keyword, f"{download_num}.jpg")
urllib.request.urlretrieve(img_src, img_name)
print(f"Downloaded {img_name}")
# 计数器加 1
download_num += 1
# 如果下载图片数量达到要求,则退出循环
if download_num >= max_download_num:
break
except Exception as e:
print(f"Failed to download {img_src}: {e}")
# 关闭浏览器
driver.quit()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。