当前位置:   article > 正文

分组统计绘图处理matplotlib

分组统计绘图处理matplotlib

可以参考代码

  1. import matplotlib.pyplot as plt
  2. import numpy as np
  3. #显示中文
  4. plt.rcParams['font.sans-serif']=['SimHei']
  5. data1=[3,2,4,2]
  6. data2=[5,4,6,3]
  7. #显示多个柱形图所必须
  8. x=np.arange(4)
  9. myNames=["姓名1","姓名2","姓名3","姓名4"]
  10. plt.xlabel("name")
  11. plt.ylabel("value")
  12. plt.bar(x-0.1,data1,color="red",width=0.2,label="年龄")
  13. plt.bar(x+0.1,data2,color="blue",width=0.2,label="收入")
  14. #显示柱状图值
  15. for x1, y1 in zip(x,data1):
  16. plt.text(x1-0.1, y1, str(y1), ha='center', va='bottom', fontsize=10,color="red")
  17. for x1, y1 in zip(x,data2):
  18. plt.text(x1+0.1, y1, str(y1), ha='center', va='bottom', fontsize=10,color="blue")
  19. #设置x轴刻度值
  20. plt.xticks(x,myNames)
  21. #显示图例
  22. plt.legend()
  23. plt.show()

效果

 

-------------------------------------------------------------------------------------------------------------------

pandas分组处理,结果清晰。

  1. import matplotlib.pyplot as plt
  2. import numpy as np
  3. import pandas as pd
  4. plt.rcParams['font.sans-serif']=['SimHei']
  5. plt.rcParams['axes.unicode_minus']=False
  6. df=pd.read_excel("数据可视化-1.xls",sheet_name="成绩")
  7. mymax=df["收入"].max()
  8. df1=df[df["收入"]>=mymax]
  9. print(df1["姓名"].tolist()[0],df1["收入"].tolist()[0])
  10. #分组操作------饼图--------------------------------------------
  11. df2=df.groupby(["性别"])
  12. #male
  13. dfMale=df2.get_group("male")
  14. #female
  15. dfFemale=df2.get_group("female")
  16. mydata=[dfMale["收入"].count(),dfFemale["收入"].count()]
  17. mylab=["male","femal"]
  18. explode=[0,0.05]
  19. plt.pie(mydata, labels =mylab,autopct='%1.2f%%',colors=["red","blue"],explode=explode)
  20. plt.title("男女百分比",fontsize=20)
  21. plt.show()
  22. #-----柱状图--------------------------------------------------------
  23. #查询条件
  24. dfAge=["less 30","[30,50]","above 50"]
  25. dfless30=df[df["年龄"]<30]
  26. df3050=df[(df["年龄"]>=30)&(df["年龄"]<=50)]
  27. dfabove50=df[df["年龄"]>50]
  28. #x坐标轴分区
  29. listX=np.arange(3)
  30. dfMoney=[dfless30["收入"].mean(),df3050["收入"].mean(),dfabove50["收入"].mean()]
  31. dfScore=[dfless30["积分"].sum(),df3050["积分"].sum(),dfabove50["积分"].sum()]
  32. plt.xlim([-0.5,3])
  33. plt.bar(listX, dfMoney,color="red",width=0.2,label="收入均值")
  34. plt.bar(listX+0.2,dfScore,color="blue",width=0.2,label="积分和")
  35. for x1, y1 in zip(listX, dfMoney):
  36. plt.text(x1, y1, str(y1), ha='center', va='bottom', fontsize=10)
  37. for x1, y1 in zip(listX, dfScore):
  38. plt.text(x1+0.2, y1, str(y1), ha='center', va='bottom', fontsize=10)
  39. plt.legend(loc="upper right")
  40. plt.show()
  41. #--------折线图---------------------------------------------------------------
  42. plt.plot(dfAge,dfScore,"ro--",color="r",linewidth=0.5,alpha=0.8,label="消费积分")
  43. plt.plot(dfAge,dfMoney,"ro--",color="b",linewidth=0.5,alpha=0.8,label="平均收入")
  44. plt.legend(loc="upper left")
  45. plt.title("标题信息",fontsize=20)
  46. plt.xlabel("年龄阶段",fontsize=10)
  47. plt.ylabel("数据数量",fontsize=10)
  48. for x1, y1 in zip(listX, dfMoney):
  49. plt.text(x1, y1-500, str(y1), ha='center', va='bottom', fontsize=10)
  50. for x1, y1 in zip(listX, dfScore):
  51. plt.text(x1, y1+100, str(y1), ha='center', va='bottom', fontsize=10)
  52. plt.show()
  53. exit()
  54. """
  55. x=np.arange(-10,10.5,0.5)
  56. y=x**2
  57. plt.plot(x,y) #坐标绘图
  58. #plt.step(x,y) #梯度图
  59. #plt.bar(x,y) #柱状图
  60. #plt.scatter(x,y) #散列图
  61. #plt.pie(x,y)
  62. plt.grid(True,color='b') #网格线,默认是关闭的
  63. plt.xlabel("angle") #x轴标签
  64. plt.ylabel("sin(x)") #y轴标签
  65. plt.title("sin") #图形标题
  66. plt.xlim(-15,15) #x轴显示范围
  67. plt.ylim(-2,120) #y轴显示范围
  68. #plt.xticks(x) #x轴刻度列表
  69. plt.show()
  70. langs = ['C', 'C++', 'Java', 'Python', 'PHP']
  71. students = [23,17,35,29,12]
  72. #绘制饼状图
  73. plt.pie(students, labels = langs,autopct='%1.2f%%')
  74. plt.show()
  75. #准备数据
  76. data = [[30, 25, 50, 20],
  77. [40, 23, 51, 17],
  78. [35, 22, 45, 19]]
  79. X = np.arange(4)
  80. #绘制柱状图
  81. plt.bar(X + 0.00, data[0], color = 'b', width = 0.25)
  82. plt.bar(X + 0.25, data[1], color = 'g', width = 0.25)
  83. plt.bar(X + 0.50, data[2], color = 'r', width = 0.25)
  84. plt.show()
  85. """
  86. import numpy as np
  87. import matplotlib.pyplot as plt
  88. countries = ['USA', 'India', 'China', 'Russia', 'Germany']
  89. bronzes = np.array([38, 17, 26, 19, 15])
  90. silvers = np.array([37, 23, 18, 18, 10])
  91. golds = np.array([46, 27, 26, 19, 17])
  92. # 此处的 _ 下划线表示将循环取到的值放弃,只得到[0,1,2,3,4]
  93. #ind = [x for x, _ in enumerate(countries)]
  94. ind = countries
  95. #绘制堆叠图
  96. plt.bar(ind, golds, width=0.5, label='golds', color='gold', bottom=silvers+bronzes)
  97. plt.bar(ind, silvers, width=0.5, label='silvers', color='silver', bottom=bronzes)
  98. plt.bar(ind, bronzes, width=0.5, label='bronzes', color='#CD853F')
  99. #设置坐标轴
  100. plt.xticks(ind, countries)
  101. plt.ylabel("Medals")
  102. plt.xlabel("Countries")
  103. plt.legend(loc="upper right")
  104. plt.title("2019 Olympics Top Scorers")
  105. plt.show()

结果:

声明:本文内容由网友自发贡献,转载请注明出处:【wpsshop】
推荐阅读
相关标签
  

闽ICP备14008679号