当前位置:   article > 正文

数据分析及可视化(当当网畅销书)_基于当当网2020年售书数据的数据分析系统

基于当当网2020年售书数据的数据分析系统
对下面数据进行数据分析可视化。

导入包:

  1. # coding:utf-8
  2. import matplotlib
  3. import matplotlib.pyplot as plt
  4. import numpy as np
  5. import pandas as pd
  6. from wordcloud import WordCloud
  7. import jieba
  8. from collections import Counter
  9. matplotlib.rcParams['font.sans-serif'] = ['SimHei'] # 显示中文
  10. # 为了坐标轴负号正常显示。matplotlib默认不支持中文,设置中文字体后,负号会显示异常。需要手动将坐标轴负号设为False才能正常显示负号。
  11. matplotlib.rcParams['axes.unicode_minus'] = False
  12. data=pd.read_excel("当当网畅销书Top500 .xlsx")
  13. print(data.head)

1.

图一:饼图:前一百本书中 书本量前五作者占比
  1. writer=data["作者"][:100]
  2. writers=writer.value_counts().sort_values(ascending=False)[:7]
  3. print(writers.index.tolist())
  4. print(writers.values)
  5. a=[]
  6. for i in range(7):
  7. s=writers.index.tolist()[i]+":"+str(writers.values[i])
  8. a.append(s)
  9. print(a)
  10. plt.pie(writers.values,labels=a, autopct='%1.1f%%')
  11. plt.show()

2.

图二:简介词云图
  1. intro=data["简述"].tolist()
  2. intros=[str(item) for item in intro]
  3. intro="".join(intros)
  4. print(intro)
  5. # 分词
  6. stop_words = [",",'的', '是', '在', '等',"。","、", '!', '《', '》', ' ', '“', '”',"你","和",";","+",","]
  7. # 分词并去除停用词
  8. filtered_words = [word for word in jieba.cut(intro) if word not in stop_words]
  9. # 统计词频
  10. word_counts = Counter(filtered_words)
  11. # 绘制词云图
  12. wordcloud = WordCloud(background_color='white', width=800, height=400, font_path='C:\Windows\Fonts\SimHei.ttf').generate_from_frequencies(word_counts)
  13. plt.imshow(wordcloud)
  14. plt.axis('off')
  15. plt.show()

3.

图三:书名词云图
  1. name=data["书名"].tolist()
  2. names=[str(item) for item in name]
  3. names="".join(names)
  4. stop_words = ['(', ')', ',',",",'的', '是', '在', '等',"。","、", '!', '《', '》', ' ', '“', '”',"你","和",";","+"]
  5. filtered_words = [word for word in jieba.cut(names) if word not in stop_words]
  6. word_counts = Counter(filtered_words)
  7. print(word_counts)
  8. wordcloud = WordCloud(background_color='white', width=800, height=400, font_path='C:\Windows\Fonts\SimHei.ttf').generate_from_frequencies(word_counts)
  9. plt.imshow(wordcloud)
  10. plt.axis('off')
  11. plt.show()

4.

图四:柱状图:按作者分类看平均定价
  1. price=data.loc[:100,["作者","定价"]]
  2. prices=price.groupby("作者")["定价"].mean().sort_values(ascending=False)[:10]
  3. print(prices.index.tolist())
  4. print(prices.values.tolist())
  5. plt.bar(prices.index.tolist(),prices.values.tolist())
  6. for i, v in enumerate(prices.values.tolist()):
  7. plt.text(i, v, str(v), ha='center', va='bottom')
  8. plt.xticks(rotation=45)
  9. plt.show()
5.
图五:饼图:开本占比
  1. kaiben=data.groupby("开本").size()
  2. plt.pie(kaiben, labels=kaiben.index, autopct='%1.1f%%') # 绘制饼图,显示百分比
  3. plt.title('Column Data Distribution') # 添加饼图标题
  4. plt.legend() # 显示图例
  5. plt.show()

6.
图六:饼图: 对定价进行分类 画出占比饼图
  1. prices=data["定价"]
  2. ranges=[(0, 50), (50, 70), (70, 100), (100, 200), (200, 500)]
  3. category_counts = {}
  4. for price in prices:
  5. for min_price, max_price in ranges:
  6. if min_price <= price < max_price:
  7. if (min_price, max_price) in category_counts:
  8. category_counts[(min_price, max_price)] += 1
  9. else:
  10. category_counts[(min_price, max_price)] = 1
  11. break
  12. labels = [f"{min_price}-{max_price} ({category_counts[(min_price, max_price)]})" for min_price, max_price in category_counts.keys()]
  13. sizes = list(category_counts.values())
  14. plt.pie(sizes, labels=labels, autopct='%1.1f%%')
  15. plt.title("Category Distribution")
  16. plt.show()


                
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/空白诗007/article/detail/809560
推荐阅读
相关标签
  

闽ICP备14008679号