当前位置:   article > 正文

数据可视化_数据集可视化

数据集可视化

1. Iris数据集每个维度画盒图

  1. import matplotlib.pyplot as plt
  2. from matplotlib.font_manager import FontProperties
  3. import numpy as np
  4. import pandas as pd
  5. def dreaw(filename):
  6. '''
  7. 数据盒图
  8. :param filename: 数据集的相对地址
  9. :return:
  10. '''
  11. df = pd.read_csv(filename) # 读文件返回DataFrame对象
  12. df = pd.DataFrame(df).drop(labels=['class'], axis=1)
  13. data_arr = np.array(df)
  14. font_set = FontProperties(fname=r"venv/Lib/site-packages/matplotlib/mpl-data/fonts/ttf/方正仿宋.TTF", size=30) # 中文字体
  15. arr=[]
  16. data=[ "sepal length in cm","sepal width in cm","petal length in cm","petal width in cm"]
  17. colr=['#9999ff','#ef476f', '#ffd166', '#118AD5']
  18. for i in range(len(data_arr.T)):
  19. num = data_arr[:, i]
  20. arr.append(num)
  21. data1=dict(zip(data,arr))
  22. data1=pd.DataFrame(data1)
  23. plt.figure(figsize=(10,8))
  24. plt.grid()
  25. f=plt.boxplot(data1,labels=data,
  26. patch_artist=True, # 要求用自定义颜色填充盒形图,默认白色填充
  27. showmeans=True, # 以点的形式显示均值
  28. #boxprops = {'color':'black','facecolor':'#9999ff'}, # 设置箱体属性,填充色和边框色
  29. flierprops = {'marker':'o','markerfacecolor':'red','color':'black'}, # 设置异常值属性,点的形状、填充色和边框色
  30. meanprops = {'marker':'D','markerfacecolor':'indianred'}, # 设置均值点的属性,点的形状、填充色
  31. medianprops = {'linestyle':'--','color':'orange'}) # 设置中位数线的属性,线的类型和颜色)
  32. for box, colr in zip(f['boxes'], colr): # 对箱线图设置颜色
  33. box.set(color='black', linewidth=2)
  34. box.set(facecolor=colr)
  35. plt.xticks(rotation=20)
  36. plt.suptitle("Iris数据集盒图",fontproperties=font_set,fontweight='bold')
  37. plt.savefig("Iris数据集盒图")
  38. plt.show()
  39. dreaw('iris.csv')

运行图示:

代码分析:将data与arr两个数组构建成字典(data:arr),然后将字典转换为DataFrame类型,然后利用 matplotlib 库中的 boxplot 函数进行画图

2. risi数据集找分位数画直方图

  1. import matplotlib.pyplot as plt
  2. from matplotlib.font_manager import FontProperties
  3. import numpy as np
  4. import pandas as pd
  5. import math
  6. from pylab import mpl
  7. def dreaw(filename):
  8. '''
  9. 分位数直方图
  10. :param filename: 数据集的相对地址
  11. :return:
  12. '''
  13. df = pd.read_csv(filename) # 读文件返回DataFrame对象
  14. df = pd.DataFrame(df).drop(labels=['class'], axis=1)
  15. data_arr = np.array(df)
  16. mpl.rcParams['font.sans-serif'] = ['SimHei']
  17. font_set = FontProperties(size=20)
  18. font_set1 = FontProperties(size=15) # 中文字体
  19. p = [1, 3, 7, 9]
  20. hear = ["sepal length in cm", "sepal width in cm", "petal length in cm", "petal width in cm"]
  21. ax = plt.figure(figsize=(25, 5), dpi=100)
  22. for i in range(len(data_arr.T)):
  23. num = data_arr[:, i]
  24. num3 = sorted(num)
  25. n = len(num3)
  26. n1 = int(math.ceil(n * 0.25))
  27. n2 = int(math.ceil(n * 0.75))
  28. b = np.split(num3, [n1, n2])
  29. a = [i + 1 for i in range(len(num3))]
  30. a = np.split(a, [n1, n2])
  31. # plt.text(0, 2, '25%分位数{},75%分位数{}'.format(num3[n1], num3[n2]), fontsize=15, bbox=dict(fc='yellow'))
  32. ax.add_subplot(1, 4, i + 1)
  33. xpoint = np.array([n1, n1])
  34. ypoint = np.array([0, num.max()])
  35. plt.plot(xpoint, ypoint, color='r')
  36. xpoint = np.array([n2, n2])
  37. ypoint = np.array([0, num.max()])
  38. plt.plot(xpoint, ypoint, color='b')
  39. for j in range(len(b)):
  40. x = np.array(a[j])
  41. y = np.array(b[j])
  42. # plt.grid()
  43. plt.bar(x, y)
  44. plt.xlabel('株数', fontproperties=font_set)
  45. plt.ylabel('量化', fontproperties=font_set)
  46. plt.title('{}(25%分位数{},75%分位数{})'.format(hear[i], n1, n2), fontproperties=font_set1)
  47. plt.legend(labels=['25%分位数', '75%分位数', 'x<25%', 'x>25%&&x<75%', 'x<75%'], loc='upper center')
  48. plt.savefig("Iris数据集分位数直方图")
  49. plt.show()
  50. dreaw('iris.csv')
运行图示:

3. iris数据每一维属性做一个饼图

  1. import matplotlib.pyplot as plt
  2. from matplotlib.font_manager import FontProperties
  3. import numpy as np
  4. import pandas as pd
  5. def dreawCsv(filename):
  6. '''
  7. 数据饼图
  8. :param filename: 数据集的相对地址
  9. :return:
  10. '''
  11. df = pd.read_csv(filename) # 读文件返回DataFrame对象
  12. df = pd.DataFrame(df).drop(labels=['class'],axis=1)
  13. data_arr = np.array(df)
  14. font_set = FontProperties(fname=r"venv/Lib/site-packages/matplotlib/mpl-data/fonts/ttf/方正仿宋.TTF", size=100)#中文字体
  15. for i in range(len(data_arr.T)):
  16. num = data_arr[:, i]
  17. num1=pd.value_counts(num)
  18. num2=sorted(set(num),reverse=False)
  19. plt.figure(figsize=(36,36))
  20. num1=sorted(num1,reverse=False)
  21. patches,l_texl,p_text=plt.pie(num1,labels=num2,radius=1,autopct='%.2f%%')
  22. for t in l_texl:#图对的字
  23. t.set_size(60)
  24. for t in p_text:#图内的字
  25. t.set_size(35)
  26. plt.axis('equal')
  27. plt.legend()
  28. plt.suptitle("第{}列数据饼图".format(i+1),fontproperties=font_set,fontweight='bold')
  29. plt.savefig('Iris数据集第{}列数据饼图'.format(i+1))
  30. plt.show()
  31. dreawCsv('iris.csv')

运行图示:

4.Falme数据集画散点图

  1. import matplotlib.pyplot as plt
  2. from matplotlib.font_manager import FontProperties
  3. import numpy as np
  4. import pandas as pd
  5. def dreawCsv(filename):
  6. '''
  7. 数据散点图
  8. :param filename: 数据集的相对地址
  9. :return:
  10. '''
  11. df = pd.read_csv(filename) # 读文件返回DataFrame对象
  12. df = pd.DataFrame(df)
  13. data_arr = np.array(df)
  14. font_set = FontProperties(fname=r"venv/Lib/site-packages/matplotlib/mpl-data/fonts/ttf/方正仿宋.TTF", size=100) # 中文字体
  15. for i in range(len(data_arr.T)):
  16. num = data_arr[:, i]
  17. plt.figure(figsize=(25, 15), dpi=100)
  18. plt.grid(b=True, color='y', linestyle='--', linewidth=2)
  19. # x=range(len(num))
  20. y = num
  21. x = range(data_arr.T.shape[1])
  22. # y=num
  23. plt.scatter(x, y, c=num, cmap='brg')
  24. plt.colorbar()
  25. plt.suptitle("第{}列数据饼散点图".format(i + 1), fontproperties=font_set, fontweight='bold')
  26. plt.savefig('第{}列数据散点图'.format(i + 1))
  27. plt.show()
  28. dreawCsv('Flame.csv')

运行图示:

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/羊村懒王/article/detail/402266
推荐阅读
相关标签
  

闽ICP备14008679号