当前位置:   article > 正文

25个常用Matplotlib图的Python代码(四):棒棒糖图、包点图、坡度图、哑铃图、连续变量的直方图_python哑铃图

python哑铃图

1.棒棒糖图

棒棒糖图表以一种视觉上令人愉悦的方式提供与有序条形图类似的目的。

  1. # Prepare Data
  2. df_raw = pd.read_csv("https://github.com/selva86/datasets/raw/master/mpg_ggplot2.csv")
  3. df = df_raw[['cty', 'manufacturer']].groupby('manufacturer').apply(lambda x: x.mean())
  4. df.sort_values('cty', inplace=True)
  5. df.reset_index(inplace=True)
  6. # Draw plot
  7. fig, ax = plt.subplots(figsize=(16,10), dpi= 80)
  8. ax.vlines(x=df.index, ymin=0, ymax=df.cty, color='firebrick', alpha=0.7, linewidth=2)
  9. ax.scatter(x=df.index, y=df.cty, s=75, color='firebrick', alpha=0.7)
  10. # Title, Label, Ticks and Ylim
  11. ax.set_title('Lollipop Chart for Highway Mileage', fontdict={'size':22})
  12. ax.set_ylabel('Miles Per Gallon')
  13. ax.set_xticks(df.index)
  14. ax.set_xticklabels(df.manufacturer.str.upper(), rotation=60, fontdict={'horizontalalignment': 'right', 'size':12})
  15. ax.set_ylim(0, 30)
  16. # Annotate
  17. for row in df.itertuples():
  18. ax.text(row.Index, row.cty+.5, s=round(row.cty, 2), horizontalalignment= 'center', verticalalignment='bottom', fontsize=14)
  19. plt.show()

2.包点图 

点图表传达了项目的排名顺序。由于它沿水平轴对齐,因此您可以更容易地看到点彼此之间的距离。

  1. # Prepare Data
  2. df_raw = pd.read_csv("https://github.com/selva86/datasets/raw/master/mpg_ggplot2.csv")
  3. df = df_raw[['cty', 'manufacturer']].groupby('manufacturer').apply(lambda x: x.mean())
  4. df.sort_values('cty', inplace=True)
  5. df.reset_index(inplace=True)
  6. # Draw plot
  7. fig, ax = plt.subplots(figsize=(16,10), dpi= 80)
  8. ax.hlines(y=df.index, xmin=11, xmax=26, color='gray', alpha=0.7, linewidth=1, linestyles='dashdot')
  9. ax.scatter(y=df.index, x=df.cty, s=75, color='firebrick', alpha=0.7)
  10. # Title, Label, Ticks and Ylim
  11. ax.set_title('Dot Plot for Highway Mileage', fontdict={'size':22})
  12. ax.set_xlabel('Miles Per Gallon')
  13. ax.set_yticks(df.index)
  14. ax.set_yticklabels(df.manufacturer.str.title(), fontdict={'horizontalalignment': 'right'})
  15. ax.set_xlim(10, 27)
  16. plt.show()

3.坡度图 

斜率图最适合比较给定人/项目的“之前”和“之后”位置。

  1. import matplotlib.lines as mlines
  2. # Import Data
  3. df = pd.read_csv("https://raw.githubusercontent.com/selva86/datasets/master/gdppercap.csv")
  4. left_label = [str(c) + ', '+ str(round(y)) for c, y in zip(df.continent, df['1952'])]
  5. right_label = [str(c) + ', '+ str(round(y)) for c, y in zip(df.continent, df['1957'])]
  6. klass = ['red' if (y1-y2) < 0 else 'green' for y1, y2 in zip(df['1952'], df['1957'])]
  7. # draw line
  8. # https://stackoverflow.com/questions/36470343/how-to-draw-a-line-with-matplotlib/36479941
  9. def newline(p1, p2, color='black'):
  10. ax = plt.gca()
  11. l = mlines.Line2D([p1[0],p2[0]], [p1[1],p2[1]], color='red' if p1[1]-p2[1] > 0 else 'green', marker='o', markersize=6)
  12. ax.add_line(l)
  13. return l
  14. fig, ax = plt.subplots(1,1,figsize=(14,14), dpi= 80)
  15. # Vertical Lines
  16. ax.vlines(x=1, ymin=500, ymax=13000, color='black', alpha=0.7, linewidth=1, linestyles='dotted')
  17. ax.vlines(x=3, ymin=500, ymax=13000, color='black', alpha=0.7, linewidth=1, linestyles='dotted')
  18. # Points
  19. ax.scatter(y=df['1952'], x=np.repeat(1, df.shape[0]), s=10, color='black', alpha=0.7)
  20. ax.scatter(y=df['1957'], x=np.repeat(3, df.shape[0]), s=10, color='black', alpha=0.7)
  21. # Line Segmentsand Annotation
  22. for p1, p2, c in zip(df['1952'], df['1957'], df['continent']):
  23. newline([1,p1], [3,p2])
  24. ax.text(1-0.05, p1, c + ', ' + str(round(p1)), horizontalalignment='right', verticalalignment='center', fontdict={'size':14})
  25. ax.text(3+0.05, p2, c + ', ' + str(round(p2)), horizontalalignment='left', verticalalignment='center', fontdict={'size':14})
  26. # 'Before' and 'After' Annotations
  27. ax.text(1-0.05, 13000, 'BEFORE', horizontalalignment='right', verticalalignment='center', fontdict={'size':18, 'weight':700})
  28. ax.text(3+0.05, 13000, 'AFTER', horizontalalignment='left', verticalalignment='center', fontdict={'size':18, 'weight':700})
  29. # Decoration
  30. ax.set_title("Slopechart: Comparing GDP Per Capita between 1952 vs 1957", fontdict={'size':22})
  31. ax.set(xlim=(0,4), ylim=(0,14000), ylabel='Mean GDP Per Capita')
  32. ax.set_xticks([1,3])
  33. ax.set_xticklabels(["1952", "1957"])
  34. plt.yticks(np.arange(500, 13000, 2000), fontsize=12)
  35. # Lighten borders
  36. plt.gca().spines["top"].set_alpha(.0)
  37. plt.gca().spines["bottom"].set_alpha(.0)
  38. plt.gca().spines["right"].set_alpha(.0)
  39. plt.gca().spines["left"].set_alpha(.0)
  40. plt.show()

4.哑铃图

 哑铃图传达各种项目的“前”和“后”位置以及项目的排序。如果您想要将特定项目/计划对不同对象的影响可视化,那么它非常有用。

  1. import matplotlib.lines as mlines
  2. # Import Data
  3. df = pd.read_csv("https://raw.githubusercontent.com/selva86/datasets/master/health.csv")
  4. df.sort_values('pct_2014', inplace=True)
  5. df.reset_index(inplace=True)
  6. # Func to draw line segment
  7. def newline(p1, p2, color='black'):
  8. ax = plt.gca()
  9. l = mlines.Line2D([p1[0],p2[0]], [p1[1],p2[1]], color='skyblue')
  10. ax.add_line(l)
  11. return l
  12. # Figure and Axes
  13. fig, ax = plt.subplots(1,1,figsize=(14,14), facecolor='#f7f7f7', dpi= 80)
  14. # Vertical Lines
  15. ax.vlines(x=.05, ymin=0, ymax=26, color='black', alpha=1, linewidth=1, linestyles='dotted')
  16. ax.vlines(x=.10, ymin=0, ymax=26, color='black', alpha=1, linewidth=1, linestyles='dotted')
  17. ax.vlines(x=.15, ymin=0, ymax=26, color='black', alpha=1, linewidth=1, linestyles='dotted')
  18. ax.vlines(x=.20, ymin=0, ymax=26, color='black', alpha=1, linewidth=1, linestyles='dotted')
  19. # Points
  20. ax.scatter(y=df['index'], x=df['pct_2013'], s=50, color='#0e668b', alpha=0.7)
  21. ax.scatter(y=df['index'], x=df['pct_2014'], s=50, color='#a3c4dc', alpha=0.7)
  22. # Line Segments
  23. for i, p1, p2 in zip(df['index'], df['pct_2013'], df['pct_2014']):
  24. newline([p1, i], [p2, i])
  25. # Decoration
  26. ax.set_facecolor('#f7f7f7')
  27. ax.set_title("Dumbell Chart: Pct Change - 2013 vs 2014", fontdict={'size':22})
  28. ax.set(xlim=(0,.25), ylim=(-1, 27), ylabel='Mean GDP Per Capita')
  29. ax.set_xticks([.05, .1, .15, .20])
  30. ax.set_xticklabels(['5%', '15%', '20%', '25%'])
  31. ax.set_xticklabels(['5%', '15%', '20%', '25%'])
  32. plt.show()

5.连续变量的直方图 

直方图显示给定变量的频率分布。下面的表示基于分类变量对频率条进行分组,从而更好地了解连续变量和串联变量。

  1. # Import Data
  2. df = pd.read_csv("https://github.com/selva86/datasets/raw/master/mpg_ggplot2.csv")
  3. # Prepare data
  4. x_var = 'displ'
  5. groupby_var = 'class'
  6. df_agg = df.loc[:, [x_var, groupby_var]].groupby(groupby_var)
  7. vals = [df[x_var].values.tolist() for i, df in df_agg]
  8. # Draw
  9. plt.figure(figsize=(16,9), dpi= 80)
  10. colors = [plt.cm.Spectral(i/float(len(vals)-1)) for i in range(len(vals))]
  11. n, bins, patches = plt.hist(vals, 30, stacked=True, density=False, color=colors[:len(vals)])
  12. # Decoration
  13. plt.legend({group:col for group, col in zip(np.unique(df[groupby_var]).tolist(), colors[:len(vals)])})
  14. plt.title(f"Stacked Histogram of ${x_var}$ colored by ${groupby_var}$", fontsize=22)
  15. plt.xlabel(x_var)
  16. plt.ylabel("Frequency")
  17. plt.ylim(0, 25)
  18. plt.xticks(ticks=bins[::3], labels=[round(b,1) for b in bins[::3]])
  19. plt.show()

 

声明:本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:【wpsshop博客】
推荐阅读
相关标签
  

闽ICP备14008679号