赞
踩
import numpy as np import pandas as pd import pymysql from pyecharts import options as opts from pyecharts.charts import Bar, Line from pyecharts.commons.utils import JsCode from pyecharts.globals import ThemeType conn = pymysql.connect( host = '127.0.0.1', user = 'root', password = '123', database = 'cloudmusic', charset = 'utf8' ) df = pd.read_sql("select * from playlist", con=conn)
df.head(5)
df.shape
共有51203行数据,16个字段
# 根据type分组,对share_count和comment_count求平均值
type_group = df.groupby("type")['play_count', 'subscribed_count', 'share_count', 'comment_count'].mean().apply(lambda x: round(x, 2))
# 根据share_count降序排序,取前10名
sorted_type_group = type_group.sort_values(by='share_count', ascending=False)[:10]
sorted_type_group
统计总数
share_count_sum = sorted_type_group["share_count"].sum()
comment_count_sum = sorted_type_group["comment_count"].sum()
print(share_count_sum, comment_count_sum)
xdata = sorted_type_group.index.tolist()
ydata1 = [{"value": y, "percent": round((y / share_count_sum)*100, 2)} for y in sorted_type_group['share_count'].tolist()]
ydata2 = [{"value": y, "percent": round((y / comment_count_sum)*100, 2)} for y in sorted_type_group['comment_count'].tolist()]
ydata1
bar1 = ( Bar(init_opts=opts.InitOpts(width="1000px")) .add_xaxis(xdata) .add_yaxis("转发量", ydata1, stack="stack1", category_gap="50%", color="#009db2") .add_yaxis("评论数", ydata2, stack="stack1", category_gap="50%", color="#f47a75") .set_global_opts( # 标题配置项 title_opts = opts.TitleOpts( title = "各类型歌单的转发量和评论数", subtitle = "列举了前十名", ), # X轴配置项 xaxis_opts = opts.AxisOpts( name = "类型" ), # Y轴配置项 yaxis_opts = opts.AxisOpts( name = "数量", name_location = "center", name_gap = "40" ), # 区域选择组件 brush_opts = opts.BrushOpts(), # 区域缩放配置项 datazoom_opts = opts.DataZoomOpts(), # 工具箱组件 toolbox_opts = opts.ToolboxOpts(), ) .set_series_opts( # 标签配置项 label_opts = opts.LabelOpts( position = "right", formatter = JsCode( "function(x){return Number(x.data.percent).toFixed() + '%';}" ) ) ) ) bar1.render_notebook()
图表特色:
type_group.head(10)
type_group10 = type_group[:10]
bar2 = ( Bar(init_opts=opts.InitOpts(width="1000px")) .add_xaxis(xaxis_data=type_group10.index.tolist()) .add_yaxis( series_name = "平均转发量", yaxis_data = type_group10['share_count'].tolist(), color="#009db2", label_opts = opts.LabelOpts(is_show=False) ) .add_yaxis( series_name = "平均评论数", yaxis_data = type_group10['comment_count'].tolist(), color="#f47a75", label_opts = opts.LabelOpts(is_show=False) ) .extend_axis( yaxis = opts.AxisOpts( name = "收藏量", type_ = "value", axislabel_opts = opts.LabelOpts( formatter = JsCode( "function(y){return Number(y / 1000) + 'K';}" ), ), axisline_opts = opts.AxisLineOpts(linestyle_opts=opts.LineStyleOpts(color="#e75840")), ) ) .set_global_opts( title_opts = opts.TitleOpts( title = "复合柱状图与折线图" ), tooltip_opts = opts.TooltipOpts( is_show = True, trigger = "axis", axis_pointer_type = "cross" ), xaxis_opts = opts.AxisOpts( type_ = "category", axislabel_opts = {'interval': '0'}, axispointer_opts = opts.AxisPointerOpts(is_show=True, type_="shadow"), ), yaxis_opts = opts.AxisOpts( name = "转发量与评论数", type_ = "value", min_ = 0, max_ = 400, interval = 100, axisline_opts = opts.AxisLineOpts(linestyle_opts=opts.LineStyleOpts(color="#024b51")), axislabel_opts = opts.AxisTickOpts(is_show=True), splitline_opts = opts.SplitLineOpts(is_show=True) ) ) ) line2 = ( Line() .add_xaxis(xaxis_data=type_group10.index.tolist()) .add_yaxis( series_name = "平均收藏量", yaxis_index = 1, y_axis = type_group10['subscribed_count'].tolist(), linestyle_opts = opts.LineStyleOpts(color="#e75840", width=1), z = 10, label_opts = opts.LabelOpts(color="#e75840", is_show=True) ) )
图表特色:
type_group30 = type_group[:30].apply(lambda x: x.astype(int))
type_group30.head()
bar3 = ( Bar() .add_xaxis(xaxis_data=type_group30.index.tolist()) .add_yaxis( series_name = "平均转发量", yaxis_data = type_group30['share_count'].tolist(), color="#009db2", ) .set_global_opts( title_opts = opts.TitleOpts( title = "垂直数据缩放条", ), datazoom_opts = opts.DataZoomOpts(orient="vertical"), ) )
图表特色:
bar4 = ( Bar() .add_xaxis(xaxis_data=type_group30.index.tolist()) .add_yaxis( series_name = "平均转发量", yaxis_data = type_group30['share_count'].tolist(), color="#009db2", ) .set_global_opts( title_opts = opts.TitleOpts( title = "最大最小平均值标记", ) ) .set_series_opts( label_opts = opts.LabelOpts(is_show=False), markpoint_opts = opts.MarkPointOpts( data = [ opts.MarkPointItem(type_ = "max", name="最大值"), opts.MarkPointItem(type_ = "min", name="最小值"), opts.MarkPointItem(type_ = "average", name="平均值"), ] ), ) )
图表特色:
color_function = """ function (params) { if (params.value > 0 && params.value <= 100) { return '#71c16f'; } else if (params.value > 100 && params.value <= 200 ) { return '#f7af59'; } return '#f06464'; } """ bar5 = ( Bar() .add_xaxis(xaxis_data=type_group30.index.tolist()) .add_yaxis( series_name = "平均转发量", yaxis_data = type_group30['share_count'].tolist(), itemstyle_opts = opts.ItemStyleOpts(color=JsCode(color_function)), ) )
图表特色:
bar6 = ( Bar() .add_xaxis(xaxis_data=type_group10.index.tolist()) .add_yaxis("平均转发量", yaxis_data=type_group10["share_count"].tolist(), category_gap="50%") .set_global_opts( title_opts = opts.TitleOpts( title = "渐变圆柱", ), xaxis_opts = opts.AxisOpts( type_ = "category", axislabel_opts = {'interval': '0'}, ), ) .set_series_opts( itemstyle_opts={ "normal": { "color": JsCode( """new echarts.graphic.LinearGradient(0,0,0,1, [{offset: 0, color: '#0780cf'}, {offset: 1, color: '#47aee3'} ], false)""" ), "barBorderRadius": [30, 30, 30, 30], #"shadowColor": "#009db2", } } ) )
图表特色:
bar7 = (
Bar(
init_opts = opts.InitOpts(
animation_opts = opts.AnimationOpts(
animation_delay=1000, animation_easing="elasticOut"
)
)
)
.add_xaxis(xaxis_data=type_group30.index.tolist())
.add_yaxis("平均转发量",yaxis_data=type_group30['share_count'].tolist(), color="#009db2")
.set_global_opts(
title_opts = opts.TitleOpts(title="内外缩放"),
datazoom_opts = [opts.DataZoomOpts(), opts.DataZoomOpts(type_="inside")],
)
)
图表特色:
(1)替换省份字段信息
根据年份和省份对数据进行分组,并规范省份的名称
import re
def replace_str(x):
rep_list = ['省', '市', '维吾尔','自治区', '壮族', '回族', '维吾尔族', '特别行政区']
for rep in rep_list:
x = re.sub(rep, '', x)
return x
time_df = df.groupby([df['create_time'].str[:4], df['province'].apply(replace_str)]).sum()
time_df
(2)重建索引
re_time_df = time_df.reset_index()
re_time_df
(3)获取所有省份
province = re_time_df['province'].drop_duplicates().tolist()
province
对各年度的省份数据进行计数,发现2013年和2014年有缺失数据
re_time_df['create_time'].value_counts()
(4)处理缺失数据
def add_province(df_data): # 所有年份 years = df_data['create_time'].drop_duplicates().tolist() for year in years: # 每年的省份 new_province = df_data.loc[df_data['create_time']==year,:]['province'].drop_duplicates().tolist() # 缺失的省份 = 所有省份 - 每年的省份 rest_province = [x for x in province if x not in new_province] # 对缺失的省份生成一个DataFrame,填充0值,并与原DataFrame合并 if len(rest_province): rest_df = pd.DataFrame([[year,x,0,0,0,0] for x in rest_province], columns=df_data.columns) df_data = pd.concat([df_data, rest_df], ignore_index=True) return df_data re_time_df2 = add_province(re_time_df) re_time_df2
已填充缺失数据
我们也可以分步来做这个过程
先处理2013年
new_province2013 = re_time_df.loc[re_time_df['create_time']=='2013',:]['province'].drop_duplicates().tolist()
rest_province2013 = [x for x in province if x not in new_province2013]
rest_province2013
rest_df2013 = pd.DataFrame([['2013',x,0,0,0,0] for x in rest_province2013], columns=re_time_df.columns)
rest_df2013
re_time_df1 = pd.concat([re_time_df, rest_df2013], ignore_index=True)
再处理2014年
new_province2014 = re_time_df.loc[re_time_df1['create_time']=='2014',:]['province'].drop_duplicates().tolist()
rest_province2014 = [x for x in province if x not in new_province2014]
rest_df2014 = pd.DataFrame([['2014',x,0,0,0,0] for x in rest_province2014], columns=re_time_df.columns)
rest_df2014
re_time_df2 = pd.concat([re_time_df1, rest_df2014], ignore_index=True)
(5)重建索引,得到最后数据
final_time_df = re_time_df2.sort_values(by=['create_time', 'province']).reset_index(drop=True)
(6)提取图表所需数据
省份
final_province = final_time_df['province'].drop_duplicates().tolist()
年份
final_year = final_time_df['create_time'].drop_duplicates().tolist()
播放量
# 播放量
data_play_count = {}
for year in final_year:
data_play_count[year] = final_time_df.loc[final_time_df['create_time']==year, 'play_count'].tolist()
收藏量
# 收藏量
data_subscribed_count = {}
for year in final_year:
data_subscribed_count[year] = final_time_df.loc[final_time_df['create_time']==year, 'subscribed_count'].tolist()
转发量
# 转发量
data_share_count = {}
for year in final_year:
data_share_count[year] = final_time_df.loc[final_time_df['create_time']==year, 'share_count'].tolist()
评论数
# 评论数
data_comment_count = {}
for year in final_year:
data_comment_count[year] = final_time_df.loc[final_time_df['create_time']==year, 'comment_count'].tolist()
汇总到一个字典中
total_data = {}
def format_data(data: dict) -> dict:
for year in final_year:
max_data, sum_data = 0, 0
temp = data[year]
max_data = max(temp)
for i in range(len(temp)):
sum_data += temp[i]
data[year][i] = {"name": final_province[i], "value": temp[i]}
data[year + "max"] = int(max_data / 100) * 100
data[year + "sum"] = sum_data
return data
total_data['play_count'] = format_data(data=data_play_count)
total_data['subscribed_count'] = format_data(data=data_subscribed_count)
total_data['share_count'] = format_data(data=data_share_count)
total_data['comment_count'] = format_data(data=data_comment_count)
from pyecharts.charts import Timeline, Pie def get_year_overlap_chart(year: str) -> Bar: bar = ( Bar() .add_xaxis(xaxis_data=final_province) .add_yaxis( series_name = "播放量", yaxis_data = total_data['play_count'][year], is_selected = False, label_opts = opts.LabelOpts(is_show=False), ) .add_yaxis( series_name = "收藏量", yaxis_data = total_data['subscribed_count'][year], is_selected = False, label_opts = opts.LabelOpts(is_show=False), ) .add_yaxis( series_name = "转发量", yaxis_data = total_data['share_count'][year], label_opts = opts.LabelOpts(is_show=False), ) .add_yaxis( series_name = "评论数", yaxis_data = total_data['comment_count'][year], label_opts = opts.LabelOpts(is_show=False), ) .set_global_opts( title_opts = opts.TitleOpts( title = "{}年网易云音乐热门歌单数据".format(year), subtitle = "数据来源于网易云音乐" ), tooltip_opts = opts.TooltipOpts( is_show = True, trigger = "axis", axis_pointer_type = "shadow" ), ) ) pie = ( Pie() .add( series_name = "收藏量/转发量/评论数占比", data_pair = [ ["转发量", total_data["share_count"]["{}sum".format(year)]], ["评论数", total_data["comment_count"]["{}sum".format(year)]], ], center = ["80%", "30%"], radius = ["14%", "28%"], color = ["#f47a75", "#009db2"] ) .set_series_opts(tooltip_opts=opts.TooltipOpts(is_show=True, trigger="item")) .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}")) .set_colors(["#f47a75", "#009db2"]) ) return bar.overlap(pie) # 生成时间轴 timeline = Timeline(init_opts=opts.InitOpts(width="1000px")) for y in final_year: timeline.add(get_year_overlap_chart(year=y), time_point=y) timeline.add_schema(is_auto_play=True, play_interval=2000)
图表特色:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。