Python 爬取天猫 iPhone8plus 销售数据_爬取手机销售数据

作者：笔触狂放9 | 2024-05-13 20:19:15

踩

爬取手机销售数据

流程:

一，爬取数据保存到mysql数据库

二，读取数据分析三种颜色的占比。

1，爬取数据保存到mysql数据库：


# -*- coding: utf-8 -*-
"""
Created on Mon Mar  4 11:09:45 2019
@author: Lenovo
"""""
import urllib
import mysql.connector
import re
import urllib.error
import json
import time as t
 
#设置请求头
headers = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36")
opener=urllib.request.build_opener()
opener.addheaders=[headers]#添加报头
urllib.request.install_opener(opener)#设置opner全局化
 
#设置代理服务器
def use_proxy_1(url,proxy_add):
    proxy=urllib.request.ProxyHandler({'http':proxy_add})
    opener=urllib.request.build_opener(proxy,urllib.request.HTTPHandler)
    urllib.request.install_opener(opener)
    data=urllib.request.urlopen(url).read().decode('utf-8')
    return data
 
for currentPage in range(25,100):
    try:
        ratecontent = []
        color = []
        commt_url='https://rate.tmall.com/list_detail_rate.htm?itemId=558760911386&spuId=877095771&sellerId=2616970884&order=3&currentPage='+str(currentPage)
        proxy_add="182.44.224.198:9999"#设置代理服务器
        commt_data=use_proxy_1(commt_url,proxy_add) #爬取网页的评论内容
        #筛选json格式数据
        jsondata=re.search('^[^(]*?\((.*)\)[^)]*$',commt_data).group(1)
        #用json加载数据
        data = json.loads(jsondata)
        
         #数据保存在变量里
        conn = mysql.connector.connect(host='localhost', port=3306, user='root', passwd='password', db='tianmao', charset='utf8mb4')
        cur = conn.cursor()
         #连接mysql
        print('连接成功!!!!')
        for i in range(0, len(data['rateDetail']['rateList'])):
            name = data['rateDetail']['rateList'][i]['displayUserNick']
            content = data['rateDetail']['rateList'][i]['rateContent']
            time = data['rateDetail']['rateList'][i]['rateDate']
            type = data['rateDetail']['rateList'][i]['auctionSku']
            
            typeDetails=re.split('[:;]',type)
            color=typeDetails[3]#颜色
            rom=typeDetails[7]#存贮容量
            net=typeDetails[1]#网络类型
 
            sql_content = "replace into iphone(name,content,time,color,rom,net) values (\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\")"  \
            %(str(name),str(content),str(time),str(color),str(rom),str(net))
            cur.execute(sql_content)
            #提交数据
            conn.commit()
            t.sleep(2)
        print('第'+str(currentPage)+'页数据保存完毕！')
        #关闭连接
        conn.close()
    except urllib.error.URLError as e:
        if hasattr(e,"code"):
            print(e.code)
        if hasattr(e,"reason"):
            print(e.reason)

效果：

2，读取数据并分析颜色占比


# -*- coding: utf-8 -*-
"""
Created on Mon Mar  4 16:55:25 2019
@author: Lenovo
"""
import mysql.connector
import matplotlib.pyplot as plt
 
plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
 
 
conn = mysql.connector.connect(host='localhost', port=3306, user='root', passwd='password', db='tianmao', charset='utf8mb4')
cur = conn.cursor()
 
cur.execute("select count(*) from iphone;")
alldata = cur.fetchall()
print("总数: "+str(alldata))
#颜色统计
cur.execute("select count(*) from iphone where color='银色';")
color1 = cur.fetchall()
    
cur.execute("select count(*) from iphone where color='金色';")
color2 = cur.fetchall()
    
cur.execute("select count(*) from iphone where color='深空灰色';")
color3 = cur.fetchall()
    
xlabels=[u'银色',u'金色',u'深空灰色']
xValues=[color1[0],color2[0],color3[0]]
fig=plt.figure()
plt.pie(xValues,labels=xlabels,autopct='%.2f%%')
plt.title("颜色比例图",14)
plt.show()

效果：

由图可见最受欢迎的颜色为深空灰

词云图：


# -*- coding: utf-8 -*-
"""
Created on Mon Mar  4 16:55:25 2019
@author: Lenovo
"""
import mysql.connector
import matplotlib.pyplot as plt
 
from wordcloud import WordCloud,STOPWORDS,ImageColorGenerator
import jieba
 
#词云图
comments=[]
with open('content.txt',mode='r',encoding='utf-8') as f:
    rows=f.readlines()
    print('readlines:'+str(len(rows)))
    for row in rows:
        comments.append(row)
 
#设置分词
comment_after_split = jieba.cut(str(comments),cut_all=False)#非全模式分词
words=' '.join(comment_after_split) #以空格进行拼接
 
#设置屏蔽词
stopwords=STOPWORDS.copy()
stopwords.add('此用户没有填写评论!')
stopwords.add('儿子')
stopwords.add('第一次')
stopwords.add('手机')
stopwords.add('苏宁')
stopwords.add('苹果')
 
stopwords.add('还是')
stopwords.add('不错')
stopwords.add('问题')
stopwords.add('收到')
stopwords.add('用户没有')
stopwords.add('那天')
stopwords.add('非常')
 
#导入背景图片
bg_image = plt.imread('bg.jpg')
# 设置词云参数，参数分别表示：画布宽高、背景颜色、背景图形状、字体、屏蔽词、最大词的字体大小
wc = WordCloud(width=1024, height=768, background_color='white', mask=bg_image, font_path='STKAITI.TTF',
               stopwords=stopwords, max_font_size=400, random_state=50)
# 将分词后数据传入云图
wc.generate_from_text(words)
plt.imshow(wc)
plt.axis('off')  # 不显示坐标轴
plt.show()
# 保存结果到本地
wc.to_file('词云图.jpg')

声明：本文内容由网友自发贡献，不代表【wpsshop博客】立场，版权归原作者所有，本站不承担相应法律责任。如您发现有侵权的内容，请联系我们。转载请注明出处：https://www.wpsshop.cn/w/笔触狂放9/article/detail/565383