当前位置:   article > 正文

爬虫selenium下载搜索百度图片_selenium 安装 爬百度图片 mac

selenium 安装 爬百度图片 mac
  1. # -- coding: utf-8 --
  2. import re
  3. import time
  4. import requests
  5. from selenium import webdriver
  6. import os
  7. if not os.path.exists('imgs'):
  8. os.mkdir('imgs')
  9. bd_browser = webdriver.Chrome('chromedriver.exe')
  10. bd_browser.maximize_window()
  11. with open('stealth.min.js', 'r', encoding='utf-8')as f:
  12. js_code = f.read()
  13. bd_browser.get('https://image.baidu.com/')
  14. input_jd = bd_browser.find_element_by_id('kw')
  15. input_jd.send_keys(input("请输入要搜索图片的名字:"))
  16. cick_job = bd_browser.find_element_by_class_name('s_newBtn')
  17. cick_job.click()
  18. bd_browser.implicitly_wait(20)
  19. def drop_down():
  20. for i in range(100):
  21. time.sleep(0.3)
  22. bd_browser.execute_script('window.scrollBy(0,300)', '')
  23. bd_browser.implicitly_wait(50)
  24. def Download():
  25. lis = bd_browser.find_elements_by_class_name('main_img')
  26. titles = bd_browser.find_elements_by_class_name('imgitem-title')
  27. names = []
  28. urls = []
  29. for name in titles:
  30. name = name.get_attribute('title')
  31. name2 = name.replace(" ", "").replace("|", "").replace("?", "").replace(":", "").replace("/", "").replace(">", "").replace(".", "").replace(",", "").replace("_", "")
  32. name3 = re.sub('"', "", name2)
  33. names.append(name3)
  34. for url in lis:
  35. url = url.get_attribute('src')
  36. urls.append(url)
  37. headers = {
  38. "user-agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36",
  39. }
  40. result = dict(zip(names, urls))
  41. for k, v in result.items():
  42. img = requests.get(v, headers=headers).content
  43. try:
  44. with open('imgs/'+k+".png", 'wb') as f:
  45. f.write(img)
  46. except:
  47. print(k+"不符合古命名规则")
  48. if __name__ == '__main__':
  49. drop_down()
  50. Download()

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/你好赵伟/article/detail/801132
推荐阅读
相关标签
  

闽ICP备14008679号