当前位置:   article > 正文

Python 爬取天猫 iPhone8plus 销售数据_爬取手机销售数据

爬取手机销售数据

流程:

一,爬取数据 保存到mysql数据库

二,读取数据 分析三种颜色的占比。

1,爬取数据保存到mysql数据库:

  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Mon Mar 4 11:09:45 2019
  4. @author: Lenovo
  5. """""
  6. import urllib
  7. import mysql.connector
  8. import re
  9. import urllib.error
  10. import json
  11. import time as t
  12. #设置请求头
  13. headers = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36")
  14. opener=urllib.request.build_opener()
  15. opener.addheaders=[headers]#添加报头
  16. urllib.request.install_opener(opener)#设置opner全局化
  17. #设置代理服务器
  18. def use_proxy_1(url,proxy_add):
  19. proxy=urllib.request.ProxyHandler({'http':proxy_add})
  20. opener=urllib.request.build_opener(proxy,urllib.request.HTTPHandler)
  21. urllib.request.install_opener(opener)
  22. data=urllib.request.urlopen(url).read().decode('utf-8')
  23. return data
  24. for currentPage in range(25,100):
  25. try:
  26. ratecontent = []
  27. color = []
  28. commt_url='https://rate.tmall.com/list_detail_rate.htm?itemId=558760911386&spuId=877095771&sellerId=2616970884&order=3&currentPage='+str(currentPage)
  29. proxy_add="182.44.224.198:9999"#设置代理服务器
  30. commt_data=use_proxy_1(commt_url,proxy_add) #爬取网页的评论内容
  31. #筛选json格式数据
  32. jsondata=re.search('^[^(]*?\((.*)\)[^)]*$',commt_data).group(1)
  33. #用json加载数据
  34. data = json.loads(jsondata)
  35. #数据保存在变量里
  36. conn = mysql.connector.connect(host='localhost', port=3306, user='root', passwd='password', db='tianmao', charset='utf8mb4')
  37. cur = conn.cursor()
  38. #连接mysql
  39. print('连接成功!!!!')
  40. for i in range(0, len(data['rateDetail']['rateList'])):
  41. name = data['rateDetail']['rateList'][i]['displayUserNick']
  42. content = data['rateDetail']['rateList'][i]['rateContent']
  43. time = data['rateDetail']['rateList'][i]['rateDate']
  44. type = data['rateDetail']['rateList'][i]['auctionSku']
  45. typeDetails=re.split('[:;]',type)
  46. color=typeDetails[3]#颜色
  47. rom=typeDetails[7]#存贮容量
  48. net=typeDetails[1]#网络类型
  49. sql_content = "replace into iphone(name,content,time,color,rom,net) values (\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\")" \
  50. %(str(name),str(content),str(time),str(color),str(rom),str(net))
  51. cur.execute(sql_content)
  52. #提交数据
  53. conn.commit()
  54. t.sleep(2)
  55. print('第'+str(currentPage)+'页数据保存完毕!')
  56. #关闭连接
  57. conn.close()
  58. except urllib.error.URLError as e:
  59. if hasattr(e,"code"):
  60. print(e.code)
  61. if hasattr(e,"reason"):
  62. print(e.reason)

效果:

2,读取数据 并分析颜色占比

  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Mon Mar 4 16:55:25 2019
  4. @author: Lenovo
  5. """
  6. import mysql.connector
  7. import matplotlib.pyplot as plt
  8. plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
  9. plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
  10. conn = mysql.connector.connect(host='localhost', port=3306, user='root', passwd='password', db='tianmao', charset='utf8mb4')
  11. cur = conn.cursor()
  12. cur.execute("select count(*) from iphone;")
  13. alldata = cur.fetchall()
  14. print("总数: "+str(alldata))
  15. #颜色统计
  16. cur.execute("select count(*) from iphone where color='银色';")
  17. color1 = cur.fetchall()
  18. cur.execute("select count(*) from iphone where color='金色';")
  19. color2 = cur.fetchall()
  20. cur.execute("select count(*) from iphone where color='深空灰色';")
  21. color3 = cur.fetchall()
  22. xlabels=[u'银色',u'金色',u'深空灰色']
  23. xValues=[color1[0],color2[0],color3[0]]
  24. fig=plt.figure()
  25. plt.pie(xValues,labels=xlabels,autopct='%.2f%%')
  26. plt.title("颜色比例图",14)
  27. plt.show()

效果:

由图可见 最受欢迎的颜色为 深空灰

词云图

  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Mon Mar 4 16:55:25 2019
  4. @author: Lenovo
  5. """
  6. import mysql.connector
  7. import matplotlib.pyplot as plt
  8. from wordcloud import WordCloud,STOPWORDS,ImageColorGenerator
  9. import jieba
  10. #词云图
  11. comments=[]
  12. with open('content.txt',mode='r',encoding='utf-8') as f:
  13. rows=f.readlines()
  14. print('readlines:'+str(len(rows)))
  15. for row in rows:
  16. comments.append(row)
  17. #设置分词
  18. comment_after_split = jieba.cut(str(comments),cut_all=False)#非全模式分词
  19. words=' '.join(comment_after_split) #以空格进行拼接
  20. #设置屏蔽词
  21. stopwords=STOPWORDS.copy()
  22. stopwords.add('此用户没有填写评论!')
  23. stopwords.add('儿子')
  24. stopwords.add('第一次')
  25. stopwords.add('手机')
  26. stopwords.add('苏宁')
  27. stopwords.add('苹果')
  28. stopwords.add('还是')
  29. stopwords.add('不错')
  30. stopwords.add('问题')
  31. stopwords.add('收到')
  32. stopwords.add('用户没有')
  33. stopwords.add('那天')
  34. stopwords.add('非常')
  35. #导入背景图片
  36. bg_image = plt.imread('bg.jpg')
  37. # 设置词云参数,参数分别表示:画布宽高、背景颜色、背景图形状、字体、屏蔽词、最大词的字体大小
  38. wc = WordCloud(width=1024, height=768, background_color='white', mask=bg_image, font_path='STKAITI.TTF',
  39. stopwords=stopwords, max_font_size=400, random_state=50)
  40. # 将分词后数据传入云图
  41. wc.generate_from_text(words)
  42. plt.imshow(wc)
  43. plt.axis('off') # 不显示坐标轴
  44. plt.show()
  45. # 保存结果到本地
  46. wc.to_file('词云图.jpg')

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/笔触狂放9/article/detail/565383
推荐阅读
相关标签
  

闽ICP备14008679号