赞
踩
棒棒糖图表以一种视觉上令人愉悦的方式提供与有序条形图类似的目的。
- # Prepare Data
- df_raw = pd.read_csv("https://github.com/selva86/datasets/raw/master/mpg_ggplot2.csv")
- df = df_raw[['cty', 'manufacturer']].groupby('manufacturer').apply(lambda x: x.mean())
- df.sort_values('cty', inplace=True)
- df.reset_index(inplace=True)
-
- # Draw plot
- fig, ax = plt.subplots(figsize=(16,10), dpi= 80)
- ax.vlines(x=df.index, ymin=0, ymax=df.cty, color='firebrick', alpha=0.7, linewidth=2)
- ax.scatter(x=df.index, y=df.cty, s=75, color='firebrick', alpha=0.7)
-
- # Title, Label, Ticks and Ylim
- ax.set_title('Lollipop Chart for Highway Mileage', fontdict={'size':22})
- ax.set_ylabel('Miles Per Gallon')
- ax.set_xticks(df.index)
- ax.set_xticklabels(df.manufacturer.str.upper(), rotation=60, fontdict={'horizontalalignment': 'right', 'size':12})
- ax.set_ylim(0, 30)
-
- # Annotate
- for row in df.itertuples():
- ax.text(row.Index, row.cty+.5, s=round(row.cty, 2), horizontalalignment= 'center', verticalalignment='bottom', fontsize=14)
-
- plt.show()
点图表传达了项目的排名顺序。由于它沿水平轴对齐,因此您可以更容易地看到点彼此之间的距离。
- # Prepare Data
- df_raw = pd.read_csv("https://github.com/selva86/datasets/raw/master/mpg_ggplot2.csv")
- df = df_raw[['cty', 'manufacturer']].groupby('manufacturer').apply(lambda x: x.mean())
- df.sort_values('cty', inplace=True)
- df.reset_index(inplace=True)
-
- # Draw plot
- fig, ax = plt.subplots(figsize=(16,10), dpi= 80)
- ax.hlines(y=df.index, xmin=11, xmax=26, color='gray', alpha=0.7, linewidth=1, linestyles='dashdot')
- ax.scatter(y=df.index, x=df.cty, s=75, color='firebrick', alpha=0.7)
-
- # Title, Label, Ticks and Ylim
- ax.set_title('Dot Plot for Highway Mileage', fontdict={'size':22})
- ax.set_xlabel('Miles Per Gallon')
- ax.set_yticks(df.index)
- ax.set_yticklabels(df.manufacturer.str.title(), fontdict={'horizontalalignment': 'right'})
- ax.set_xlim(10, 27)
- plt.show()
斜率图最适合比较给定人/项目的“之前”和“之后”位置。
- import matplotlib.lines as mlines
- # Import Data
- df = pd.read_csv("https://raw.githubusercontent.com/selva86/datasets/master/gdppercap.csv")
-
- left_label = [str(c) + ', '+ str(round(y)) for c, y in zip(df.continent, df['1952'])]
- right_label = [str(c) + ', '+ str(round(y)) for c, y in zip(df.continent, df['1957'])]
- klass = ['red' if (y1-y2) < 0 else 'green' for y1, y2 in zip(df['1952'], df['1957'])]
-
- # draw line
- # https://stackoverflow.com/questions/36470343/how-to-draw-a-line-with-matplotlib/36479941
- def newline(p1, p2, color='black'):
- ax = plt.gca()
- l = mlines.Line2D([p1[0],p2[0]], [p1[1],p2[1]], color='red' if p1[1]-p2[1] > 0 else 'green', marker='o', markersize=6)
- ax.add_line(l)
- return l
-
- fig, ax = plt.subplots(1,1,figsize=(14,14), dpi= 80)
-
- # Vertical Lines
- ax.vlines(x=1, ymin=500, ymax=13000, color='black', alpha=0.7, linewidth=1, linestyles='dotted')
- ax.vlines(x=3, ymin=500, ymax=13000, color='black', alpha=0.7, linewidth=1, linestyles='dotted')
-
- # Points
- ax.scatter(y=df['1952'], x=np.repeat(1, df.shape[0]), s=10, color='black', alpha=0.7)
- ax.scatter(y=df['1957'], x=np.repeat(3, df.shape[0]), s=10, color='black', alpha=0.7)
-
- # Line Segmentsand Annotation
- for p1, p2, c in zip(df['1952'], df['1957'], df['continent']):
- newline([1,p1], [3,p2])
- ax.text(1-0.05, p1, c + ', ' + str(round(p1)), horizontalalignment='right', verticalalignment='center', fontdict={'size':14})
- ax.text(3+0.05, p2, c + ', ' + str(round(p2)), horizontalalignment='left', verticalalignment='center', fontdict={'size':14})
-
- # 'Before' and 'After' Annotations
- ax.text(1-0.05, 13000, 'BEFORE', horizontalalignment='right', verticalalignment='center', fontdict={'size':18, 'weight':700})
- ax.text(3+0.05, 13000, 'AFTER', horizontalalignment='left', verticalalignment='center', fontdict={'size':18, 'weight':700})
-
- # Decoration
- ax.set_title("Slopechart: Comparing GDP Per Capita between 1952 vs 1957", fontdict={'size':22})
- ax.set(xlim=(0,4), ylim=(0,14000), ylabel='Mean GDP Per Capita')
- ax.set_xticks([1,3])
- ax.set_xticklabels(["1952", "1957"])
- plt.yticks(np.arange(500, 13000, 2000), fontsize=12)
-
- # Lighten borders
- plt.gca().spines["top"].set_alpha(.0)
- plt.gca().spines["bottom"].set_alpha(.0)
- plt.gca().spines["right"].set_alpha(.0)
- plt.gca().spines["left"].set_alpha(.0)
- plt.show()
哑铃图传达各种项目的“前”和“后”位置以及项目的排序。如果您想要将特定项目/计划对不同对象的影响可视化,那么它非常有用。
- import matplotlib.lines as mlines
-
- # Import Data
- df = pd.read_csv("https://raw.githubusercontent.com/selva86/datasets/master/health.csv")
- df.sort_values('pct_2014', inplace=True)
- df.reset_index(inplace=True)
-
- # Func to draw line segment
- def newline(p1, p2, color='black'):
- ax = plt.gca()
- l = mlines.Line2D([p1[0],p2[0]], [p1[1],p2[1]], color='skyblue')
- ax.add_line(l)
- return l
-
- # Figure and Axes
- fig, ax = plt.subplots(1,1,figsize=(14,14), facecolor='#f7f7f7', dpi= 80)
-
- # Vertical Lines
- ax.vlines(x=.05, ymin=0, ymax=26, color='black', alpha=1, linewidth=1, linestyles='dotted')
- ax.vlines(x=.10, ymin=0, ymax=26, color='black', alpha=1, linewidth=1, linestyles='dotted')
- ax.vlines(x=.15, ymin=0, ymax=26, color='black', alpha=1, linewidth=1, linestyles='dotted')
- ax.vlines(x=.20, ymin=0, ymax=26, color='black', alpha=1, linewidth=1, linestyles='dotted')
-
- # Points
- ax.scatter(y=df['index'], x=df['pct_2013'], s=50, color='#0e668b', alpha=0.7)
- ax.scatter(y=df['index'], x=df['pct_2014'], s=50, color='#a3c4dc', alpha=0.7)
-
- # Line Segments
- for i, p1, p2 in zip(df['index'], df['pct_2013'], df['pct_2014']):
- newline([p1, i], [p2, i])
-
- # Decoration
- ax.set_facecolor('#f7f7f7')
- ax.set_title("Dumbell Chart: Pct Change - 2013 vs 2014", fontdict={'size':22})
- ax.set(xlim=(0,.25), ylim=(-1, 27), ylabel='Mean GDP Per Capita')
- ax.set_xticks([.05, .1, .15, .20])
- ax.set_xticklabels(['5%', '15%', '20%', '25%'])
- ax.set_xticklabels(['5%', '15%', '20%', '25%'])
- plt.show()
直方图显示给定变量的频率分布。下面的表示基于分类变量对频率条进行分组,从而更好地了解连续变量和串联变量。
- # Import Data
- df = pd.read_csv("https://github.com/selva86/datasets/raw/master/mpg_ggplot2.csv")
-
- # Prepare data
- x_var = 'displ'
- groupby_var = 'class'
- df_agg = df.loc[:, [x_var, groupby_var]].groupby(groupby_var)
- vals = [df[x_var].values.tolist() for i, df in df_agg]
-
- # Draw
- plt.figure(figsize=(16,9), dpi= 80)
- colors = [plt.cm.Spectral(i/float(len(vals)-1)) for i in range(len(vals))]
- n, bins, patches = plt.hist(vals, 30, stacked=True, density=False, color=colors[:len(vals)])
-
- # Decoration
- plt.legend({group:col for group, col in zip(np.unique(df[groupby_var]).tolist(), colors[:len(vals)])})
- plt.title(f"Stacked Histogram of ${x_var}$ colored by ${groupby_var}$", fontsize=22)
- plt.xlabel(x_var)
- plt.ylabel("Frequency")
- plt.ylim(0, 25)
- plt.xticks(ticks=bins[::3], labels=[round(b,1) for b in bins[::3]])
- plt.show()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。