赞
踩
plt.hist
(x, bins=10, range=None, normed=False, weights=None, cumulative=False, bottom=None,
histtype=‘bar’, align=‘mid’, orientation=‘vertical’,rwidth=None, log=False, color=None, label=None,
stacked=False, hold=None, data=None, **kwargs)
参数讲解:(常配合密度图s.plot(kind='kde')
进行绘制)
bin
:箱子的宽度
normed
: 标准化
histtype
: 风格,bar,barstacked,step,stepfilled
orientation
: 水平还是垂直{‘horizontal’, ‘vertical’}
align
: {‘left’, ‘mid’, ‘right’}, optional(对齐方式)
1) 直方图 + 密度图
s = pd.Series(np.random.randn(1000))
s.hist(bins = 20,
histtype = 'bar',
align = 'mid',
orientation = 'vertical',
alpha=0.5,
normed =True)
s.plot(kind='kde',style='k--')
–> 输出的结果为:
2)堆叠直方图
使用DataFrame.plot.hist()
和Series.plot.hist()
方法绘制
df = pd.DataFrame({'a': np.random.randn(1000) + 1, 'b': np.random.randn(1000),
'c': np.random.randn(1000) - 1, 'd': np.random.randn(1000)-2},
columns=['a', 'b', 'c','d'])
df.plot.hist(stacked=True,
bins=20,
colormap='Greens_r',
alpha=0.5,
edgecolor = 'k',
grid=True)
–> 输出的结果为:
stacked
:是否堆叠
df.hist(bins=50,edgecolor = 'k')
–> 输出的结果为:(默认是不堆叠的)
plt.scatter
(x, y, s=20, c=None, marker=‘o’, cmap=None, norm=None, vmin=None, vmax=None, alpha=None, linewidths=None, verts=None, edgecolors=None, hold=None, data=None, **kwargs)
参数讲解:
s
:散点的大小
c
:散点的颜色
vmin,vmax
:亮度设置,标量
cmap
:colormap
1) plt.scatter()
散点图绘制
plt.figure(figsize=(8,6))
x = np.random.randn(1000)
y = np.random.randn(1000)
plt.scatter(x,y,marker='.',
s = np.random.randn(1000)*100,
cmap = 'Reds',
c = y,
edgecolor = 'k',
alpha = 0.8)
–> 输出的结果为:
2) pd.plotting.scatter_matrix()
矩阵散点图绘制
df = pd.DataFrame(np.random.randn(100,4),columns = ['a','b','c','d'])
pd.plotting.scatter_matrix(df,figsize=(10,6),
marker = 'o',
diagonal='kde',
alpha = 0.5,
range_padding=0.1)
–> 输出的结果为:
调用subplot()
创建子图时通过设置projection='polar'
,便可创建一个极坐标子图,然后调用plot()
在极坐标子图中绘图
1) 简单绘制极坐标图
fig = plt.figure(figsize=(6,3))
s = pd.Series(np.arange(20))
theta=np.arange(0,2*np.pi,0.02)
ax1 = plt.subplot(121, projection = 'polar')
ax2 = plt.subplot(122)
# 还可以写:ax = fig.add_subplot(111,polar=True)
ax1.plot(theta,theta*3,linestyle = '--',lw=1) #lw为线宽
ax1.plot(s, linestyle = '--', marker = '.',lw=2)
ax2.plot(theta,theta*3,linestyle = '--',lw=1)
ax2.plot(s)
–> 输出的结果为:(输出图形中的两条线段是对应的)
2) 极坐标参数设置
set_theta_direction()
:坐标轴正方向,默认逆时针
set_thetagrids()
:设置极坐标角度网格线显示及标签 → 网格和标签数量一致
set_rgrids()
:设置极径网格线显示,其中参数必须是正数
set_theta_offset()
:设置角度偏移,逆时针,弧度制
set_rlim()
:设置显示的极径范围
set_rmax()
:设置显示的极径最大值
set_rticks()
:设置极径网格线的显示范围
plt.figure(figsize=(8,4))
theta=np.arange(0,2*np.pi,0.02)
ax1= plt.subplot(121, projection='polar')
ax2= plt.subplot(122, projection='polar')
ax1.plot(theta,theta/6,'--',lw=2)
ax2.plot(theta,theta/6,'--',lw=2)
ax2.set_theta_direction(-1)
ax2.set_thetagrids(np.arange(0.0, 360.0, 90),['a','b','c','d'])
ax2.set_rgrids(np.arange(0.2,2,0.4))
ax2.set_theta_offset(np.pi/2)
ax2.set_rlim(0.2,1.2)
ax2.set_rmax(2)
ax2.set_rticks(np.arange(0.1, 1.5, 0.2))
–> 输出的结果为:(极其重要)
1) plt.plot()
简单雷达图
其实就是之前方法的综合
plt.figure(figsize=(8,4)) ax1= plt.subplot(111, projection='polar') ax1.set_title('radar map\n') # 创建标题 ax1.set_rlim(0,12) data1 = np.random.randint(1,10,10) data2 = np.random.randint(1,10,10) data3 = np.random.randint(1,10,10) theta=np.arange(0,2*np.pi,2*np.pi/10) # 创建数据 ax1.plot(theta,data1,'.--',label='data1') ax1.fill(theta,data1,alpha=0.2) ax1.plot(theta,data2,'.--',label='data2') ax1.fill(theta,data2,alpha=0.2) ax1.plot(theta,data3,'.--',label='data3') ax1.fill(theta,data3,alpha=0.2)
–> 输出的结果为:(想一下,怎么把最后的线连接起来,也就是最后一个数据是第一个数据)
2) plt.polar()
绘制首尾相连的雷达图
labels = np.array(['a','b','c','d','e','f']) # 标签 dataLenth = 6 # 数据长度 data1 = np.random.randint(0,10,6) data2 = np.random.randint(0,10,6) # 数据 angles = np.linspace(0, 2*np.pi, dataLenth, endpoint=False) # 分割圆周长 data1 = np.concatenate((data1, [data1[0]])) # 闭合 data2 = np.concatenate((data2, [data2[0]])) # 闭合 angles = np.concatenate((angles, [angles[0]])) # 闭合 plt.polar(angles, data1, 'o-', linewidth=1) #做极坐标系 plt.fill(angles, data1, alpha=0.25)# 填充 plt.polar(angles, data2, 'o-', linewidth=1) #做极坐标系 plt.fill(angles, data2, alpha=0.25)# 填充 plt.thetagrids(angles * 180/np.pi, labels) # 设置网格、标签 plt.ylim(0,10) # polar的极值设置为ylim
–> 输出的结果为:
也就是在极坐标图上绘制的柱状图
plt.figure(figsize=(6,3))
ax1= plt.subplot(111, projection='polar')
ax1.set_title('radar map\n') # 创建标题
ax1.set_rlim(0,12)
data = np.random.randint(1,10,10)
theta=np.arange(0,2*np.pi,2*np.pi/10)
# 创建数据
bar = ax1.bar(theta,data,alpha=0.5,edgecolor = 'k')
for r,bar in zip(data, bar):
bar.set_facecolor(plt.cm.jet(r/10.)) # 设置颜色
plt.thetagrids(np.arange(0.0, 360.0, 90), []) # 设置网格、标签(这里是空标签,则不显示内容)
是一种用作显示一组数据分散情况资料的统计图,常用来做异常值处理。包含一组数据的:最大值、最小值、中位数、上四分位数(Q3)、下四分位数(Q1)
① 中位数
→ 一组数据平均分成两份,中间的数
② 上四分位数Q1
→ 是将序列平均分成四份,计算(n+1)/4与(n-1)/4两种,一般使用(n+1)/4
③ 下四分位数Q3
→ 是将序列平均分成四份,计算(1+n)/4*3=6.75
④ 内限
→ T形的盒须就是内限,最大值区间Q3+1.5IQR,最小值区间Q1-1.5IQR (IQR=Q3-Q1)
⑤ 外限
→ T形的盒须就是内限,最大值区间Q3+3IQR,最小值区间Q1-3IQR (IQR=Q3-Q1)
异常值处理 → 内限之外 - 中度异常,外限之外 - 极度异常
1) plt.plot.box()
绘制
参数讲解:
boxes
→ 箱线
whiskers
→ 分位数与error bar横线之间竖线的颜色
medians
→ 中位数线颜色
caps
→ error bar横线颜色
color
:样式填充
vert
:是否垂直,默认True
position
:箱型图占位
① 纵向箱型图绘制
fig,axes = plt.subplots(2,1,figsize=(10,6))
df = pd.DataFrame(np.random.rand(10, 5), columns=['A', 'B', 'C', 'D', 'E'])
color = dict(boxes='DarkGreen', whiskers='DarkOrange', medians='DarkBlue', caps='Gray')
df.plot.box(ylim=[0,1.2],
grid = True,
color = color,
ax = axes[0])
–> 输出的结果为:
横向箱型图:vert=False
参数的使用
df.plot.box(vert=False,
positions=[1, 4, 5, 6, 8],
ax = axes[1],
grid = True,
color = color)
–> 输出的结果为:
2) plt.boxplot()
绘制
pltboxplot
(x, notch=None, sym=None, vert=None, whis=None, positions=None, widths=None, patch_artist=None, bootstrap=None,
usermedians=None, conf_intervals=None, meanline=None, showmeans=None, showcaps=None, showbox=None, showfliers=None, boxprops=None,
labels=None, flierprops=None, medianprops=None, meanprops=None, capprops=None, whiskerprops=None, manage_xticks=True, autorange=False,
zorder=None, hold=None, data=None)
参数讲解:
x
→ 数据
notch
→ 中间箱体是否缺口
sym
→ 异常点形状,参考marker
vert
→ 是否垂直
whis
→ IQR,默认1.5,也可以设置区间比如[5,95],代表强制上下边缘为数据95%和5%位置
patch_artist
→ 上下四分位框内是否填充,True为填充
meanline,showmeans
→ 是否有均值线及其形状
showbox
→ 是否显示箱线
showcaps
→ 是否显示边缘线
showfliers
→ 是否显示异常值
return_type
→ 返回数据的类型
meanprops
→ 设置均值的属性,如点的大小、颜色等
df = pd.DataFrame(np.random.rand(10, 5), columns=['A', 'B', 'C', 'D', 'E']) plt.figure(figsize=(10,4)) # 创建图表、数据 f = df.boxplot(sym = 'o', vert = True, whis = 1.5, patch_artist = True, meanline = False,showmeans=True, meanprops = {'marker':'D','markerfacecolor':'red'} showbox = True, showcaps = True, showfliers = True, notch = False, return_type='dict' ) plt.title('boxplot') print(f)
–> 输出的结果为:(会返回可操作的对象)
{'caps': [<matplotlib.lines.Line2D object at 0x0000000010042CF8>, <matplotlib.lines.Line2D object at 0x0000000010047BE0>, <matplotlib.lines.Line2D object at 0x0000000010057C88>, <matplotlib.lines.Line2D object at 0x000000001005DB70>, <matplotlib.lines.Line2D object at 0x000000001006EC18>, <matplotlib.lines.Line2D object at 0x0000000010074B00>, <matplotlib.lines.Line2D object at 0x0000000010085BA8>, <matplotlib.lines.Line2D object at 0x000000001008BA90>, <matplotlib.lines.Line2D object at 0x00000000104896D8>, <matplotlib.lines.Line2D object at 0x00000000104998D0>],
'whiskers': [<matplotlib.lines.Line2D object at 0x0000000010042198>, <matplotlib.lines.Line2D object at 0x0000000010042B70>, <matplotlib.lines.Line2D object at 0x0000000010057208>, <matplotlib.lines.Line2D object at 0x0000000010057B00>, <matplotlib.lines.Line2D object at 0x000000001006E198>, <matplotlib.lines.Line2D object at 0x000000001006EA90>, <matplotlib.lines.Line2D object at 0x0000000010085128>, <matplotlib.lines.Line2D object at 0x0000000010085A20>, <matplotlib.lines.Line2D object at 0x000000001009B0B8>, <matplotlib.lines.Line2D object at 0x000000001009B9B0>],
'medians': [<matplotlib.lines.Line2D object at 0x0000000010047D68>, <matplotlib.lines.Line2D object at 0x000000001005DCF8>, <matplotlib.lines.Line2D object at 0x0000000010074C88>, <matplotlib.lines.Line2D object at 0x000000001008BC18>, <matplotlib.lines.Line2D object at 0x0000000010497828>],
'fliers': [<matplotlib.lines.Line2D object at 0x000000001004CD30>, <matplotlib.lines.Line2D object at 0x0000000010062CC0>, <matplotlib.lines.Line2D object at 0x000000001007BC50>, <matplotlib.lines.Line2D object at 0x0000000010090BE0>, <matplotlib.lines.Line2D object at 0x00000000100A37B8>],
'means': [<matplotlib.lines.Line2D object at 0x000000001004C5C0>, <matplotlib.lines.Line2D object at 0x0000000010062550>, <matplotlib.lines.Line2D object at 0x000000001007B4E0>, <matplotlib.lines.Line2D object at 0x0000000010090470>, <matplotlib.lines.Line2D object at 0x00000000102CFC18>],
'boxes': [<matplotlib.patches.PathPatch object at 0x00000000104BAB00>, <matplotlib.patches.PathPatch object at 0x0000000010051C50>, <matplotlib.patches.PathPatch object at 0x0000000010069B00>, <matplotlib.patches.PathPatch object at 0x000000001007EA90>, <matplotlib.patches.PathPatch object at 0x0000000010096B00>]}
接着就可以进行精细设置了
boxes
→ 箱线
medians
→ 中位值的横线,
whiskers
→从box到error bar之间的竖线.
fliers
→ 异常值
caps, error bar
→横线
means
→ 均值的横线,
for box in f['boxes']:
box.set( color='b', linewidth=1) # 箱体边框颜色
box.set( facecolor = 'b' ,alpha=0.5) # 箱体内部填充颜色
for whisker in f['whiskers']:
whisker.set(color='k', linewidth=0.5,linestyle='-')
for cap in f['caps']:
cap.set(color='gray', linewidth=2)
for median in f['medians']:
median.set(color='DarkBlue', linewidth=2)
for flier in f['fliers']:
flier.set(marker='o', color='y', alpha=0.5)
–> 输出的结果为:(箱型图的参数较多,所以可以按照自己的要求绘制出想要的样式)
3) 分类箱型图
columns
:按照数据的列分子图
by
:按照列分组做箱型图
单列分类箱型图
df = pd.DataFrame(np.random.rand(10,2), columns=['Col1', 'Col2'] )
df['X'] = pd.Series(['A','A','A','A','A','B','B','B','B','B'])
df['Y'] = pd.Series(['A','B','A','B','A','B','A','B','A','B'])
df.boxplot(by = 'X')
–> 输出的结果为:
多列分类箱型图
df.boxplot(column=['Col1','Col2'], by=['X','Y'])
–> 输出的结果为:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。