赞
踩
- from selenium import webdriver
- import time
- import os
- import uuid
- import requests
-
- rows = []
-
-
- def recursion_spider():
- # 搜索金立手机
- input.send_keys("金立手机")
- searchBtn = chrome.find_element_by_css_selector("button.button > i")
- # 搜索按钮的点击
- searchBtn.click()
- # 等待页面加载完成
- time.sleep(5)
- chrome.execute_script("window.scrollTo(0,document.body.scrollHeight)")
-
- phoneLis = chrome.find_elements_by_class_name("gl-item")
-
- for phone in phoneLis:
- sku = phone.get_attribute("data-sku")
- title = phone.find_element_by_css_selector("div.p-img>a").get_attribute("title")
- imgUrl = phone.find_element_by_css_selector("div.p-img>a>img").get_attribute("src")
- if imgUrl is None:
- imgSrc = phone.find_element_by_css_selector("div.p-img>a>img").get_attribute(
- "data-lazy-img")
- else:
- imgSrc = phone.find_element_by_css_selector("div.p-img>a>img").get_attribute("src")
-
- if not imgSrc.startswith("https"):
- imgSrc = "https:" + imgSrc
- price = phone.find_element_by_css_selector("div.p-price>strong").text
-
- rows.append({"title": title, "imgSrc": imgSrc, "price": price})
- print(price)
- dirPath = "D:/images"
-
- if not os.path.exists(dirPath):
- os.mkdir(dirPath)
-
- print(imgSrc)
- data = requests.get(imgSrc)
- file = open(dirPath + "/" + str(uuid.uuid1()) + ".png", "wb")
- file.write(data.content)
- file.close()
-
- next_page_button = chrome.find_element_by_class_name("pn-next")
- if next_page_button is not None:
- next_page_button.click()
- recursion_spider()
-
-
- chrome = webdriver.Chrome()
-
- chrome.get("https://www.jd.com/")
- input = chrome.find_element_by_id("key")
- recursion_spider()

递归翻页查询,都是模拟chrome来进行数据的爬取,可谓是大大的方便!!!
- chrome = webdriver.Chrome()
-
- chrome.get("https://www.jd.com/")
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。