赞
踩
由于源码分了几个源文件,还是比较长的,所以这里就不跟大家一一讲解了,懂爬虫的人看几遍就看懂了,不懂爬虫的说再多也是云里雾里,等以后学会了爬虫再来看就懂了。
import csv
import os
import time
import wordcloud
from selenium import webdriver
from selenium.webdriver.common.by import By
def tongji():
prices = []
with open(‘前十页销量和金额.csv’, ‘r’, encoding=‘utf-8’, newline=‘’) as f:
fieldnames = [‘价格’, ‘销量’, ‘店铺位置’]
reader = csv.DictReader(f, fieldnames=fieldnames)
for index, i in enumerate(reader):
if index != 0:
price = float(i[‘价格’].replace(‘¥’, ‘’))
prices.append(price)
DATAS = {‘<10’: 0, ‘10~30’: 0, ‘30~50’: 0,
‘50~70’: 0, ‘70~90’: 0, ‘90~110’: 0,
‘110~130’: 0, ‘130~150’: 0, ‘150~170’: 0, ‘170~200’: 0, }
for price in prices:
if price < 10:
DATAS[‘<10’] += 1
elif 10 <= price < 30:
DATAS[‘10~30’] += 1
elif 30 <= price < 50:
DATAS[‘30~50’] += 1
elif 50 <= price < 70:
DATAS[‘50~70’] += 1
elif 70 <= price < 90:
DATAS[‘70~90’] += 1
elif 90 <= price < 110:
DATAS[‘90~110’] += 1
elif 110 <= price < 130:
DATAS[‘110~130’] += 1
elif 130 <= price < 150:
DATAS[‘130~150’] += 1
elif 150 <= price < 170:
DATAS[‘150~170’] += 1
elif 170 <= price < 200:
DATAS[‘170~200’] += 1
for k, v in DATAS.items():
print(k, ‘:’, v)
def get_the_top_10(url):
top_ten = []
ip = zhima1()[2][random.randint(0, 399)]
os.system(‘“C:\Program Files\Quicker\QuickerStarter.exe” runaction:5e3abcd2-9271-47b6-8eaf-3e7c8f4935d8’)
options = webdriver.ChromeOptions()
options.add_experimental_option(‘debuggerAddress’, ‘127.0.0.1:9222’)
options.add_argument(f’–proxy-server={ip}')
driver = webdriver.Chrome(options=options)
driver.implicitly_wait(3)
driver.get(url)
driver.find_element(By.PARTIAL_LINK_TEXT, ‘销量’).click()
time.sleep(1)
driver.execute_script(‘window.scrollTo(0,document.body.scrollHeight)’)
time.sleep(1)
element = driver.find_element(By.ID, ‘mainsrp-itemlist’).find_element(By.XPATH, ‘.//div[@class=“items”]’)
items = element.find_elements(By.XPATH, ‘.//div[@data-category=“auctions”]’)
for index, item in enumerate(items):
if index == 10:
break
price = item.find_element(By.XPATH, ‘./div[2]/div[1]/div[contains(@class,“price”)]’).text
paid_num_data = item.find_element(By.XPATH, ‘./div[2]/div[1]/div[@class=“deal-cnt”]’).text
store_location = item.find_element(By.XPATH, ‘./div[2]/div[3]/div[@class=“location”]’).text
store_href = item.find_element(By.XPATH, ‘./div[2]/div[@class=“row row-2 title”]/a’).get_attribute(
‘href’).strip()
top_ten.append(
{‘价格’: price,
‘销量’: paid_num_data,
‘店铺位置’: store_location,
‘店铺链接’: store_href
})
for i in top_ten:
print(i)
def get_top_10_comments(url):
with open(‘排名前十评价.txt’, ‘w+’, encoding=‘utf-8’) as f:
pass
os.system(‘“C:\Program Files\Quicker\QuickerStarter.exe” runaction:5e3abcd2-9271-47b6-8eaf-3e7c8f4935d8’)
options = webdriver.ChromeOptions()
options.add_experimental_option(‘debuggerAddress’, ‘127.0.0.1:9222’)
driver = webdriver.Chrome(options=options)
driver.implicitly_wait(3)
driver.get(url)
driver.find_element(By.PARTIAL_LINK_TEXT, ‘销量’).click()
time.sleep(1)
element = driver.find_element(By.ID, ‘mainsrp-itemlist’).find_element(By.XPATH, ‘.//div[@class=“items”]’)
items = element.find_elements(By.XPATH, ‘.//div[@data-category=“auctions”]’)
original_handle = driver.current_window_handle
item_hrefs = []
for index, item in enumerate(items):
if index == 10:
break
item_hrefs.append(
item.find_element(By.XPATH, ‘.//div[2]/div[@class=“row row-2 title”]/a’).get_attribute(‘href’).strip())
for item_href in item_hrefs:
driver.execute_script(f’window.open(“{item_href}”)')
handles = driver.window_handles
driver.switch_to.window(handles[-1])
try:
driver.find_element(By.PARTIAL_LINK_TEXT, ‘评价’).click()
except Exception as e1:
try:
x = driver.find_element(By.PARTIAL_LINK_TEXT, ‘评价’).location_once_scrolled_into_view
driver.find_element(By.PARTIAL_LINK_TEXT, ‘评价’).click()
except Exception as e2:
try:
driver.execute_script(‘var q=document.documentElement.scrollTop=100’)
x = driver.find_element(By.PARTIAL_LINK_TEXT, ‘评价’).location_once_scrolled_into_view
except Exception as e3:
driver.find_element(By.XPATH, ‘/html/body/div[6]/div/div[3]/div[2]/div/div[2]/ul/li[2]/a’).click()
time.sleep(1)
try:
trs = driver.find_elements(By.XPATH, ‘//div[@class=“rate-grid”]/table/tbody/tr’)
for index, tr in enumerate(trs):
if index == 0:
comments = tr.find_element(By.XPATH, ‘./td[1]/div[1]/div/div’).text.strip()
else:
try:
comments = tr.find_element(By.XPATH,
‘./td[1]/div[1]/div[@class=“tm-rate-fulltxt”]’).text.strip()
except Exception as e:
comments = tr.find_element(By.XPATH,
‘./td[1]/div[1]/div[@class=“tm-rate-content”]/div[@class=“tm-rate-fulltxt”]’).text.strip()
with open(‘排名前十评价.txt’, ‘a+’, encoding=‘utf-8’) as f:
f.write(comments + ‘\n’)
print(comments)
except Exception as e:
lis = driver.find_elements(By.XPATH, ‘//div[@class=“J_KgRate_MainReviews”]/div[@class=“tb-revbd”]/ul/li’)
for li in lis:
comments = li.find_element(By.XPATH, ‘./div[2]/div/div[1]’).text.strip()
with open(‘排名前十评价.txt’, ‘a+’, encoding=‘utf-8’) as f:
f.write(comments + ‘\n’)
print(comments)
def get_top_10_comments_wordcloud():
file = ‘排名前十评价.txt’
f = open(file, encoding=‘utf-8’)
txt = f.read()
f.close()
w = wordcloud.WordCloud(width=1000,
height=700,
background_color=‘white’,
font_path=‘msyh.ttc’)
w.generate(txt)
name = file.replace(‘.txt’, ‘’)
w.to_file(name + ‘词云.png’)
os.startfile(name + ‘词云.png’)
def get_10_pages_datas():
with open(‘前十页销量和金额.csv’, ‘w+’, encoding=‘utf-8’, newline=‘’) as f:
f.write(‘\ufeff’)
fieldnames = [‘价格’, ‘销量’, ‘店铺位置’]
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
infos = []
options = webdriver.ChromeOptions()
options.add_experimental_option(‘debuggerAddress’, ‘127.0.0.1:9222’)
driver = webdriver.Chrome(options=options)
driver.implicitly_wait(3)
driver.get(url)
element = driver.find_element(By.ID, ‘mainsrp-itemlist’).find_element(By.XPATH, ‘.//div[@class=“items”]’)
items = element.find_elements(By.XPATH, ‘.//div[@data-category=“auctions”]’)
for index, item in enumerate(items):
price = item.find_element(By.XPATH, ‘./div[2]/div[1]/div[contains(@class,“price”)]’).text
paid_num_data = item.find_element(By.XPATH, ‘./div[2]/div[1]/div[@class=“deal-cnt”]’).text
store_location = item.find_element(By.XPATH, ‘./div[2]/div[3]/div[@class=“location”]’).text
infos.append(
{‘价格’: price,
‘销量’: paid_num_data,
‘店铺位置’: store_location})
try:
driver.find_element(By.PARTIAL_LINK_TEXT, ‘下一’).click()
except Exception as e:
driver.execute_script(‘window.scrollTo(0,document.body.scrollHeight)’)
driver.find_element(By.PARTIAL_LINK_TEXT, ‘下一’).click()
for i in range(9):
time.sleep(1)
driver.execute_script(‘window.scrollTo(0,document.body.scrollHeight)’)
element = driver.find_element(By.ID, ‘mainsrp-itemlist’).find_element(By.XPATH, ‘.//div[@class=“items”]’)
items = element.find_elements(By.XPATH, ‘.//div[@data-category=“auctions”]’)
for index, item in enumerate(items):
自我介绍一下,小编13年上海交大毕业,曾经在小公司待过,也去过华为、OPPO等大厂,18年进入阿里一直到现在。
深知大多数Python工程师,想要提升技能,往往是自己摸索成长或者是报班学习,但对于培训机构动则几千的学费,着实压力不小。自己不成体系的自学效果低效又漫长,而且极易碰到天花板技术停滞不前!
因此收集整理了一份《2024年Python开发全套学习资料》,初衷也很简单,就是希望能够帮助到想自学提升又不知道该从何学起的朋友,同时减轻大家的负担。
既有适合小白学习的零基础资料,也有适合3年以上经验的小伙伴深入学习提升的进阶课程,基本涵盖了95%以上Python开发知识点,真正体系化!
由于文件比较大,这里只是将部分目录大纲截图出来,每个节点里面都包含大厂面经、学习笔记、源码讲义、实战项目、讲解视频,并且后续会持续更新
如果你觉得这些内容对你有帮助,可以添加V获取:vip1024c (备注Python)
学好 Python 不论是就业还是做副业赚钱都不错,但要学会 Python 还是要有一个学习规划。最后大家分享一份全套的 Python 学习资料,给那些想学习 Python 的小伙伴们一点帮助!
Python所有方向路线就是把Python常用的技术点做整理,形成各个领域的知识点汇总,它的用处就在于,你可以按照上面的知识点去找对应的学习资源,保证自己学得较为全面。
工欲善其事必先利其器。学习Python常用的开发软件都在这里了,给大家节省了很多时间。
书籍的好处就在于权威和体系健全,刚开始学习的时候你可以只看视频或者听某个人讲课,但等你学完之后,你觉得你掌握了,这时候建议还是得去看一下书籍,看权威技术书籍也是每个程序员必经之路。
我们在看视频学习的时候,不能光动眼动脑不动手,比较科学的学习方法是在理解之后运用它们,这时候练手项目就很适合了。
光学理论是没用的,要学会跟着一起敲,要动手实操,才能将自己的所学运用到实际当中去,这时候可以搞点实战案例来学习。
我们学习Python必然是为了找到高薪的工作,下面这些面试题是来自阿里、腾讯、字节等一线互联网大厂最新的面试资料,并且有阿里大佬给出了权威的解答,刷完这一套面试资料相信大家都能找到满意的工作。
一个人可以走的很快,但一群人才能走的更远。不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎扫码加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!
我们学习Python必然是为了找到高薪的工作,下面这些面试题是来自阿里、腾讯、字节等一线互联网大厂最新的面试资料,并且有阿里大佬给出了权威的解答,刷完这一套面试资料相信大家都能找到满意的工作。
一个人可以走的很快,但一群人才能走的更远。不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎扫码加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!
[外链图片转存中…(img-qzjFs2jb-1712857413540)]
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。