赞
踩
在课堂上,老师给我们布置了一项作业,如何利用selenium爬取电商网站的商品数据信息,并将其存储到mysql数据库中,目标网站是网商园,毕竟大的电商网站反爬技术比较高,爬取难度也比较大,下面是详细代码
- from selenium import webdriver
- from selenium.webdriver.common.keys import Keys
- from selenium.webdriver.common.by import By
- import pymysql
- from bs4 import BeautifulSoup
- from time import sleep
-
-
- #模拟登录
- driver = webdriver.Chrome()
- driver.get("https://www.wsy.com/member/login.htm?f=top&redirectURL=http%3A%2F%2Fwww.wsy.com%2F")
- username_field = driver.find_element(By.ID, "TPL_username")
- password_field = driver.find_element(By.ID, "TPL_password")
- login_button = driver.find_element(By.XPATH, "//*[@id='login']")
- username_field.send_keys("") #这里填你的网商园账号名
- password_field.send_keys("") #这里填写密码
- login_button.click()
-
- #让程序睡眠20秒,防止被检查出来是爬虫的风险,之后每一个sleep都是同样道理
- sleep(20)
-
-
- #连接MySQL数据库
- conn = pymysql.connect(host='localhost', user='root', password='123456', database='aaa',charset='utf8',autocommit=True)
- cursor = conn.cursor()
-
-
- #这里是爬取操作,爬取了五个类目,每个类目的前十页
- for page in range(1,11):
- #driver.get(f"https://www.wsy.com/category.htm?&cid=50000436 & page= {page}")#T恤
- #driver.get(f"https://www.wsy.com/category.htm?&cid=50000557 & page= {page}")#毛衣
- driver.get(f"https://www.wsy.com/category.htm?&cid=50010158 & page= {page}")#夹克
- #driver.get(f"https://www.wsy.com/category.htm?&cid=50010159 & page= {page}")#西装
- #driver.get(f"https://www.wsy.com/category.htm?&cid=50010160 & page= {page}")#卫衣
- sleep(20)
- tshirts = driver.find_elements(By.XPATH, "//*[@id='goodslist']")
- tshirt_data = []
-
- #遍历爬取网页中的每一个商品,将数据取出
- for i in range(1,49):
- for tshirt in tshirts:
- title = tshirt.find_element(By.XPATH, f"//*[@id='goodslist']/div[{i}]/div/div[2]/a").text
- price = tshirt.find_element(By.XPATH, f"//*[@id='goodslist']/div[{i}]/div/div[3]/div[1]/strong").text
- sales_count = tshirt.find_element(By.XPATH, f"//*[@id='goodslist']/div[{i}]/div/div[3]/div[2]/span").text
- tshirt_data.append({
- "ID": i,
- "商品名": title,
- "商品品类": "夹克",
- "商品价格": price,
- "商品销售数量": sales_count
- })
- print(f"已爬取 {len(tshirt_data)} 条夹克销量数据")
-
- #将爬取到的数据存储到MySQL数据库中
- insert_sql = f"INSERT INTO t桖 (ID, 商品名, 商品品类, 商品价格, 商品销售数量) VALUES ('{tshirt_data[i-1].get('ID')}','{tshirt_data[i-1].get('商品名')}','{tshirt_data[i-1].get('商品品类')}','{tshirt_data[i-1].get('商品价格')}','{tshirt_data[i-1].get('商品销售数量')}')"
- cursor.execute(insert_sql)
- conn.commit()
- cursor.close()
- conn.close()
到最后就可以实现将网站每个商品的数据采集存储到数据库中了
这里是新人程序菜鸟云起风程,麻烦看到这篇文章的大佬们多多点赞,支持一下!
新人菜鸟在这里不胜感激!给诸位磕一个了!
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。