赞
踩
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
plt.rcParams['axes.unicode_minus'] = False
sns.set_style('darkgrid', {'font.sans-serif':['SimHei', 'Arial']})
df_player = pd.read_csv(r"C:\Users\imqqdong\Downloads\NBAdata\season_2021_detailed.csv")
df_team = pd.read_csv(r"C:\Users\imqqdong\Downloads\NBAdata\season_2021_basic.csv")
df_team.head()
date | weekday | home_team | home_score | away_team | away_score | attendance | overtime | remarks | |
---|---|---|---|---|---|---|---|---|---|
0 | 2020-12-22T19:00:00 | Tuesday | Brooklyn Nets | 125 | Golden State Warriors | 99 | 0 | NaN | NaN |
1 | 2020-12-22T22:00:00 | Tuesday | Los Angeles Lakers | 109 | Los Angeles Clippers | 116 | 0 | NaN | NaN |
2 | 2020-12-23T19:00:00 | Wednesday | Cleveland Cavaliers | 121 | Charlotte Hornets | 114 | 0 | NaN | NaN |
3 | 2020-12-23T19:00:00 | Wednesday | Indiana Pacers | 121 | New York Knicks | 107 | 0 | NaN | NaN |
4 | 2020-12-23T19:00:00 | Wednesday | Orlando Magic | 113 | Miami Heat | 107 | 0 | NaN | NaN |
df_team_sim = df_team.drop(columns=["weekday","attendance","overtime","remarks"])
df_team_sim["winner"] = 0
df_team_sim["loser"] = 0
for i in range(len(df_team_sim)):
if df_team_sim["home_score"][i] > df_team_sim["away_score"][i]:
df_team_sim["winner"][i] = df_team_sim["home_team"][i]
df_team_sim["loser"][i] = df_team_sim["away_team"][i]
elif df_team_sim["home_score"][i] < df_team_sim["away_score"][i]:
df_team_sim["winner"][i] = df_team_sim["away_team"][i]
df_team_sim["loser"][i] = df_team_sim["home_team"][i]
sns.countplot(x = df_team_sim["winner"],color = "orange")
plt.xticks(rotation = 90)
plt.show()
win_lose_times = pd.DataFrame(df_team_sim["winner"].value_counts())
win_lose_times["lose_times"] = df_team_sim["loser"].value_counts()
#会不会自动对齐索引
win_lose_times.head()
winner | lose_times | |
---|---|---|
Phoenix Suns | 63 | 25 |
Utah Jazz | 58 | 25 |
Los Angeles Clippers | 57 | 34 |
Milwaukee Bucks | 57 | 31 |
Philadelphia 76ers | 56 | 28 |
win_lose_times.rename(columns={"winner":"win_times"},inplace = True)
fig= plt.figure(figsize = (20,10),dpi = 100)
win_lose_times.plot(y = ["win_times","lose_times"],kind = "bar"
,use_index = True,title = "2020-2021赛季NBA各队伍胜负场次",
grid = True,)
<AxesSubplot:title={'center':'2020-2021赛季NBA各队伍胜负场次'}>
<Figure size 2000x1000 with 0 Axes>
win_lose_times["wp"] = win_lose_times["win_times"]/(win_lose_times["win_times"]+win_lose_times["lose_times"])
win_lose_times.sort_values(by = "wp",ascending=False)
win_times | lose_times | wp | |
---|---|---|---|
Phoenix Suns | 63 | 25 | 0.715909 |
Utah Jazz | 58 | 25 | 0.698795 |
Philadelphia 76ers | 56 | 28 | 0.666667 |
Brooklyn Nets | 55 | 29 | 0.654762 |
Milwaukee Bucks | 57 | 31 | 0.647727 |
Los Angeles Clippers | 57 | 34 | 0.626374 |
Denver Nuggets | 51 | 31 | 0.621951 |
Atlanta Hawks | 51 | 38 | 0.573034 |
Dallas Mavericks | 45 | 34 | 0.569620 |
Los Angeles Lakers | 45 | 34 | 0.569620 |
Portland Trail Blazers | 44 | 34 | 0.564103 |
New York Knicks | 42 | 35 | 0.545455 |
Golden State Warriors | 39 | 35 | 0.527027 |
Miami Heat | 40 | 36 | 0.526316 |
Memphis Grizzlies | 41 | 38 | 0.518987 |
Boston Celtics | 38 | 40 | 0.487179 |
Indiana Pacers | 35 | 39 | 0.472973 |
Washington Wizards | 36 | 43 | 0.455696 |
San Antonio Spurs | 33 | 40 | 0.452055 |
Charlotte Hornets | 33 | 40 | 0.452055 |
New Orleans Pelicans | 31 | 41 | 0.430556 |
Chicago Bulls | 31 | 41 | 0.430556 |
Sacramento Kings | 31 | 41 | 0.430556 |
Toronto Raptors | 27 | 45 | 0.375000 |
Minnesota Timberwolves | 23 | 49 | 0.319444 |
Oklahoma City Thunder | 22 | 50 | 0.305556 |
Cleveland Cavaliers | 22 | 50 | 0.305556 |
Orlando Magic | 21 | 51 | 0.291667 |
Detroit Pistons | 20 | 52 | 0.277778 |
Houston Rockets | 17 | 55 | 0.236111 |
fig = plt.figure(figsize = (30,20),dpi =100)
x = np.arange(len(win_lose_times.index.values))
width = 0.4
plt.bar(x = x - width/2,height = win_lose_times["win_times"],label = "胜利场次",align="center",width = width)
plt.bar(x = x + width/2,height = win_lose_times["lose_times"],label = "失败场次",align="center",width = width)
plt.xticks(ticks = x,rotation = 90,fontsize =25,labels = win_lose_times.index.values)
plt.yticks(fontsize = 25)
plt.legend(fontsize = 25)
plt.show()
df_player.head()
date | team | player | role | MP | FG | FGA | FG_PCT | FG3 | FG3A | ... | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | PLUS_MINUS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2020-12-22T19:00:00 | Brooklyn Nets | Tyler Johnson | Reserve | 5.083333 | 0 | 1 | 0.000 | 0 | 0 | ... | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | -12.0 |
1 | 2020-12-22T19:00:00 | Brooklyn Nets | Kyrie Irving | Starter | 25.416667 | 10 | 16 | 0.625 | 4 | 7 | ... | 1 | 3 | 4 | 4 | 0 | 0 | 1 | 3 | 26 | 32.0 |
2 | 2020-12-22T19:00:00 | Brooklyn Nets | Kevin Durant | Starter | 24.400000 | 7 | 16 | 0.438 | 1 | 2 | ... | 1 | 4 | 5 | 3 | 3 | 1 | 1 | 3 | 22 | 26.0 |
3 | 2020-12-22T19:00:00 | Brooklyn Nets | Joe Harris | Starter | 20.333333 | 4 | 8 | 0.500 | 2 | 5 | ... | 2 | 5 | 7 | 2 | 0 | 1 | 2 | 2 | 10 | 21.0 |
4 | 2020-12-22T19:00:00 | Brooklyn Nets | Spencer Dinwiddie | Starter | 19.316667 | 2 | 6 | 0.333 | 1 | 3 | ... | 0 | 5 | 5 | 4 | 0 | 0 | 3 | 1 | 9 | 24.0 |
5 rows × 24 columns
#看看詹姆斯这个赛季的数据
df_james = df_player[df_player["player"]=="LeBron James"]
df_james.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 52 entries, 50 to 24083 Data columns (total 24 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date 52 non-null object 1 team 52 non-null object 2 player 52 non-null object 3 role 52 non-null object 4 MP 52 non-null float64 5 FG 52 non-null int64 6 FGA 52 non-null int64 7 FG_PCT 52 non-null float64 8 FG3 52 non-null int64 9 FG3A 52 non-null int64 10 FG3_PCT 52 non-null float64 11 FT 52 non-null int64 12 FTA 52 non-null int64 13 FT_PCT 51 non-null float64 14 ORB 52 non-null int64 15 DRB 52 non-null int64 16 TRB 52 non-null int64 17 AST 52 non-null int64 18 STL 52 non-null int64 19 BLK 52 non-null int64 20 TOV 52 non-null int64 21 PF 52 non-null int64 22 PTS 52 non-null int64 23 PLUS_MINUS 52 non-null float64 dtypes: float64(5), int64(15), object(4) memory usage: 10.2+ KB
#处理一下空值
pd.set_option("display.max_columns",None)
df_james.loc[df_james["FT_PCT"].isnull() == True,:]
date | team | player | role | MP | FG | FGA | FG_PCT | FG3 | FG3A | FG3_PCT | FT | FTA | FT_PCT | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | PLUS_MINUS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
23939 | 2021-06-01T22:00:00 | Los Angeles Lakers | LeBron James | Starter | 31.516667 | 9 | 19 | 0.474 | 6 | 10 | 0.6 | 0 | 0 | NaN | 1 | 4 | 5 | 7 | 0 | 0 | 3 | 1 | 24 | -24.0 |
df_james.fillna(value = 0,inplace = True)
df_james.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 52 entries, 50 to 24083 Data columns (total 24 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date 52 non-null object 1 team 52 non-null object 2 player 52 non-null object 3 role 52 non-null object 4 MP 52 non-null float64 5 FG 52 non-null int64 6 FGA 52 non-null int64 7 FG_PCT 52 non-null float64 8 FG3 52 non-null int64 9 FG3A 52 non-null int64 10 FG3_PCT 52 non-null float64 11 FT 52 non-null int64 12 FTA 52 non-null int64 13 FT_PCT 52 non-null float64 14 ORB 52 non-null int64 15 DRB 52 non-null int64 16 TRB 52 non-null int64 17 AST 52 non-null int64 18 STL 52 non-null int64 19 BLK 52 non-null int64 20 TOV 52 non-null int64 21 PF 52 non-null int64 22 PTS 52 non-null int64 23 PLUS_MINUS 52 non-null float64 dtypes: float64(5), int64(15), object(4) memory usage: 10.2+ KB
#詹姆斯本赛季场均出场时间33.94分钟
fig = plt.figure(figsize=(20,10),dpi= 100)
x = np.arange(len(df_james))
mp_mean = df_james["MP"].mean()
df_james["MP"].plot(kind = "bar")
plt.axhline(mp_mean)
plt.xticks(ticks=x,labels = df_james["date"])
plt.yticks(fontsize=15)
plt.text(x = 0,y = mp_mean+2,s = "Mean:%.2f"%mp_mean,fontsize = 20,)
plt.show()
现在看一下詹姆斯在场上的时候,湖人的胜率
#詹姆斯上场时,湖人的胜率 ''' 这块的计算错了,也放在上面给大家看一下 主要原因是dataframe与字符串的比较 #df_james.date.values[i] def LA_WP(dates): ''' ''' 。。。 ''' win_times=0 lose_times=0 for date in dates: #win_times = 0 #lose_times = 0 winner = df_team_sim[df_team_sim["date"]==date].winner.values if winner.all() == "Los Angeles Lakers": win_times += 1 else : lose_times+=1 return ("胜利场次:%d,失败场次:%d,胜率:%.2f"%(win_times,lose_times,win_times/(win_times+lose_times))) LA_WP(df_james.date.values)
'胜利场次:23,失败场次:29,胜率:0.44'
先不管上面的错误,计算詹姆斯不在场时,湖人的胜率
#詹姆斯不在场时,湖人的胜率
#找出詹姆斯不在场的比赛的时间,用isin取反即可
df_nojames = df_team_sim[~df_team_sim["date"].isin(df_james["date"])]
df_nojames = df_nojames[(df_nojames["home_team"]=="Los Angeles Lakers" )|( df_nojames["away_team"]=="Los Angeles Lakers")]
len(df_nojames)
27
win_games = df_nojames[df_nojames["winner"] == "Los Angeles Lakers"]
lose_games = df_nojames[df_nojames["loser"]=="Los Angeles Lakers"]
wp_nojames = len(win_games)/(len(win_games)+len(lose_games))
wp_nojames
0.4444444444444444
lose_games
date | home_team | home_score | away_team | away_score | winner | loser | |
---|---|---|---|---|---|---|---|
523 | 2021-03-03T22:00:00 | Sacramento Kings | 123 | Los Angeles Lakers | 120 | Sacramento Kings | Los Angeles Lakers |
625 | 2021-03-21T22:00:00 | Phoenix Suns | 111 | Los Angeles Lakers | 94 | Phoenix Suns | Los Angeles Lakers |
635 | 2021-03-23T19:30:00 | New Orleans Pelicans | 128 | Los Angeles Lakers | 111 | New Orleans Pelicans | Los Angeles Lakers |
655 | 2021-03-25T22:00:00 | Los Angeles Lakers | 101 | Philadelphia 76ers | 109 | Philadelphia 76ers | Los Angeles Lakers |
704 | 2021-03-31T22:00:00 | Los Angeles Lakers | 97 | Milwaukee Bucks | 112 | Milwaukee Bucks | Los Angeles Lakers |
731 | 2021-04-04T15:30:00 | Los Angeles Clippers | 104 | Los Angeles Lakers | 86 | Los Angeles Clippers | Los Angeles Lakers |
761 | 2021-04-08T19:30:00 | Miami Heat | 110 | Los Angeles Lakers | 104 | Miami Heat | Los Angeles Lakers |
795 | 2021-04-12T19:30:00 | New York Knicks | 111 | Los Angeles Lakers | 96 | New York Knicks | Los Angeles Lakers |
824 | 2021-04-15T22:00:00 | Los Angeles Lakers | 113 | Boston Celtics | 121 | Boston Celtics | Los Angeles Lakers |
858 | 2021-04-19T22:00:00 | Los Angeles Lakers | 97 | Utah Jazz | 111 | Utah Jazz | Los Angeles Lakers |
881 | 2021-04-22T21:30:00 | Dallas Mavericks | 115 | Los Angeles Lakers | 110 | Dallas Mavericks | Los Angeles Lakers |
894 | 2021-04-24T20:30:00 | Dallas Mavericks | 108 | Los Angeles Lakers | 93 | Dallas Mavericks | Los Angeles Lakers |
923 | 2021-04-28T19:30:00 | Washington Wizards | 116 | Los Angeles Lakers | 107 | Washington Wizards | Los Angeles Lakers |
993 | 2021-05-06T22:00:00 | Los Angeles Clippers | 118 | Los Angeles Lakers | 94 | Los Angeles Clippers | Los Angeles Lakers |
1002 | 2021-05-07T22:00:00 | Portland Trail Blazers | 106 | Los Angeles Lakers | 101 | Portland Trail Blazers | Los Angeles Lakers |
#根据时间和队名的匹配,找出詹姆斯在场的每一个比赛,组合到一个dataframe中
data = pd.DataFrame()
for i in range(len(df_james)):
james_game = df_team_sim[(df_team_sim["date"]==df_james["date"].values[i])
&((df_team_sim["winner"]=="Los Angeles Lakers")|(df_team_sim["loser"]=="Los Angeles Lakers"))]
data = pd.concat([data,james_game])
#詹姆斯在场上的时候胜率wp_james
james_win = len(data[data["winner"]=="Los Angeles Lakers"])
james_lose = len(data[data["loser"]=="Los Angeles Lakers"])
wp_james = james_win/(james_win+james_lose)
wp_james
0.6346153846153846
不在场时胜率40%,在场时胜率60%,可见詹姆斯的对球队胜率的影响还是很大的。后续将继续关注其与戴维斯合作时的相关数据,将更注重数据的可视化展现…
后续更新…
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。