赞
踩
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['SimHei']#显示中文标签
plt.rcParams['axes.unicode_minus']=False #显示负号
%matplotlib inline
f1=pd.read_excel('tips.xls')
f1.head()
total_bill | tip | sex | smoker | day | time | size | |
---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
f1.describe()
total_bill | tip | size | |
---|---|---|---|
count | 244.000000 | 244.000000 | 244.000000 |
mean | 19.785943 | 2.998279 | 2.569672 |
std | 8.902412 | 1.383638 | 0.951100 |
min | 3.070000 | 1.000000 | 1.000000 |
25% | 13.347500 | 2.000000 | 2.000000 |
50% | 17.795000 | 2.900000 | 2.000000 |
75% | 24.127500 | 3.562500 | 3.000000 |
max | 50.810000 | 10.000000 | 6.000000 |
f1.columns =['总金额','小费金额','性别','吸烟','星期','时间','人数']
f1.head()
总金额 | 小费金额 | 性别 | 吸烟 | 星期 | 时间 | 人数 | |
---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
f1['人均消费']=round(f1['总金额']/f1['人数'],2)
f1.head()
总金额 | 小费金额 | 性别 | 吸烟 | 星期 | 时间 | 人数 | 人均消费 | |
---|---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 | 8.49 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 | 3.45 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 | 7.00 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 | 11.84 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 | 6.15 |
# f1[ (f1['吸烟']=='Yes') & (f1['性别']=='Male') & (f1['总金额']> 15) ]
f1.query( '吸烟=="Yes" & 性别=="Male" & 总金额>15').head()
总金额 | 小费金额 | 性别 | 吸烟 | 星期 | 时间 | 人数 | 人均消费 | 慷慨度 | |
---|---|---|---|---|---|---|---|---|---|
56 | 38.01 | 3.00 | Male | Yes | Sat | Dinner | 4 | 9.50 | 0.073153 |
60 | 20.29 | 3.21 | Male | Yes | Sat | Dinner | 2 | 10.14 | 0.136596 |
63 | 18.29 | 3.76 | Male | Yes | Sat | Dinner | 4 | 4.57 | 0.170522 |
69 | 15.01 | 2.09 | Male | Yes | Sat | Dinner | 2 | 7.50 | 0.122222 |
76 | 17.92 | 3.08 | Male | Yes | Sat | Dinner | 2 | 8.96 | 0.146667 |
f1.plot.scatter(x='总金额', y='小费金额', color='DarkBlue', label='小费金额与总金额的关系')
average_tip = f1.groupby('性别')['小费金额'].mean()
average_tip.plot.bar()
average_tip = f1.groupby('星期')['小费金额'].mean()
average_tip.plot.bar()
# 构建慷慨度指标
f1['慷慨度']=f1['小费金额']/(f1['总金额']+f1['小费金额'])
r=f1.groupby(['性别','吸烟'])['慷慨度'].mean()
r.plot(kind='bar',x=['性别','吸烟'],y='慷慨度')
time = f1.groupby('时间')['小费金额'].mean()
time.plot.bar()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。