赞
踩
作为全球最受欢迎的体育运动,自然会吸引全世界无数球迷的目光。本文将对世界杯历史数据进行可视化分析。数据集是FIFA官方数据整理的基础数据表,本文数据集来源于天池。本文将对数据集WorldCupMatches进行数据可视化。该数据集包含了1930-2014年共20届世界杯赛事信息,涉及的信息可见“变量介绍”部分。
- opar=par(no.readonly=TRUE)
- par(pin=c(7,10),mar=c(4.5,7,2,6))
- bar5 <- barplot(c2$max_attendance,horiz=TRUE,
- axes=F,col="#E64B35CC",
- cex.names=0.8, #名称大小,即日期大小
- space=0, #每个条的宽度。值越大越细,值越小越粗
- main="历届比赛中最受欢迎的场次",cex.main=1.5,
- xlim=c(0,190000),xlab="现场观众人数(单位:万)",ylab="年份")
- axis(side=1,at=seq(0,190000,19000),labels=seq(0,19,1.9))
- axis(side=2,at=0.5:20.5,labels=c2$Year,las=2)
- text(rep(0,length(c2$Year)),bar5,
- labels=paste0("比赛队伍:",c2$team,"\n","现场观众人数:",c2$max_attendance),pos=4,cex=1)
- par(opar)
- opar=par(no.readonly=TRUE)
- par(pin=c(7,8),mar=c(4,13,1,2))
- bar6 <- barplot(new_order_WorldCupMatches$Attendance,horiz=T,
- axes=F,names.arg="",space=0,
- col=brewer.pal(n,"Set3"),cex.main=1.5,
- xlim=c(0,190000),
- main="历史最受欢迎的前十场比赛",xlab="现场观众人数")
- axis(side=1,at=seq(0,190000,19000),las=1,font=1,
- cex.axis=1,lwd=2,line=0)
- axis(side=2,at=0.5:9.5,labels=new_order_WorldCupMatches$team,
- las=2,cex.axis=1,font=2)
- lab1 <- paste("team:",new_order_WorldCupMatches$team,"\n",
- new_order_WorldCupMatches$new_Datetime,"\n",
- new_order_WorldCupMatches$new_Stadium,"\n",
- "Attendance:",new_order_WorldCupMatches$Attendance,"人",sep="")
- text(rep(0,n),bar6,labels=lab1,pos=4,cex=0.8)
- par(opar)
- # 将变量new_stage转换为有序因子
- new_WorldCupMatches$new_stage <- factor(new_WorldCupMatches$new_stage,
- levels=c("Group","Round of 16","Quarter-finals","Semi-finals","Third place","Final"))
- bar7 <- ggplot(new_WorldCupMatches,aes(x=factor(Year),y=Attendance,fill=new_stage))+
- geom_bar(stat="identity",position="stack")+
- scale_fill_brewer(palette="Set1")+
- # scale_fill_discrete(breaks=c("Group","Round of 16","Quarter-finals","Semi-finals","Third place","Final"))+ #调整图例顺序
- scale_y_continuous(limits=c(0,3600000),breaks=seq(0,3600000,600000),labels=seq(0,36,6)) +
- labs(title="历届比赛不同比赛阶段现场观众总人数",x="年份",y="现场观众人数(单位:百万)")+
- theme(legend.position=c(0.15,0.9),
- legend.title=element_blank(), #移除图例中的标题
- legend.background = element_rect(fill=rgb(1,1,1,alpha=0.001),colour=NA), #不显示图例背景色
- legend.text=element_text(size=12), #设置图例中文本大小
- plot.title = element_text(hjust=0.5, #标题居中
- size=16), #大小。face表示字体
- axis.text.x=element_text(size=13,angle=90,hjust=1,vjust=0.5)) #设置x轴刻度标签居中
- bar8 <- ggplot(new_WorldCupMatches,aes(x=factor(Year),y=Attendance,fill=new_stage))+
- geom_bar(stat="identity",position="fill")+
- scale_fill_brewer(palette="Set1")+
- # scale_fill_discrete(breaks=c("Group","Round of 16","Quarter-finals","Semi-finals","Third place","Final"))+
- scale_y_continuous(labels=seq(0,100,25))+
- labs(title="历届比赛不同阶段的现场观众人数比例",x="年份",y="现场观众人数比例(单位:%)")+
- theme(legend.position="none", #移除图例
- plot.title=element_text(hjust=0.5,size=16),
- axis.text.x=element_text(size=13,angle=90,hjust=1,vjust=0.5))
- grid.arrange(bar7,bar8,ncol=2) #将两幅图整合为一幅图
- ggplot(top10_statt,aes(x=reorder(stadium,attendance),y=attendance,fill=stadium))+
- geom_bar(stat="identity")+
- scale_fill_manual(values = colorRampPalette(brewer.pal(5, "Blues"))(colourCount),
- breaks=top10_statt$stadium)+
- geom_text(aes(x=stadium,y=5000,label=paste0("City:",city)),hjust=0,size=5)+ #fontface="bold"字体加粗
- labs(title="世界杯比赛中平均观众人数排名前十名的体育场",y="现场观众人数",x="体育场")+
- theme(plot.title=element_text(hjust=0.5,size=20,face="bold"),
- axis.title.x=element_text(size=15),
- axis.text=element_text(size=13),
- axis.title.y=element_text(size=18),
- legend.title=element_blank())+
- coord_flip()
- city <- data.frame(table(new_WorldCupMatches$City))
- names(city)[1] <- "city"
- city <- city[order(-city$Freq),]
- ggplot(city[1:10,],aes(x=reorder(city,Freq),y=Freq))+
- geom_segment(aes(x=reorder(city,Freq),xend=reorder(city,Freq),y=0,yend=Freq),colour="grey50")+
- geom_point(size=5,color="red")+
- scale_y_continuous(limits=c(0,25),breaks=seq(0,25,5),labels=seq(0,25,5))+
- theme_bw()+
- labs(title="主办比赛次数排名前十的城市",x="主办城市",y="主办次数")+
- theme(panel.grid.major.x = element_blank(),
- plot.title=element_text(hjust=0.5,size=20,face="bold"))
- ggplot(country_fre01,aes(x=country,y=count,color=group))+
- geom_point()+
- facet_wrap(~group,scale="free")+
- geom_hline(data=t6,aes(yintercept=t5),linetype="dotdash",color="gray50")+
- geom_segment(aes(x=country,xend=country,y=0,yend=count))+
- labs(title="各国参加世界杯比赛的次数")+
- theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5),
- legend.position = "none",
- plot.title=element_text(hjust=0.5,size=20,face="bold"))+
- coord_flip()
- ggplot(count_co[1:10,],aes(x=reorder(country,-Freq),y=Freq,fill=country))+
- geom_bar(stat="identity")+
- scale_fill_manual(values=colorRampPalette(brewer.pal(5,"RdYlBu"))(10),
- breaks=count_co$country)+
- scale_y_continuous(limits=c(0,30),breaks=seq(0,30,5),labels=seq(0,30,5))+
- geom_hline(yintercept=14,color="red",linetype="dotdash")+
- labs(title="21届世界杯比赛中参加次数最多的前十名国家",x="国家",y="次数")+
- theme(legend.position = "none",
- axis.title=element_text(size=16),
- axis.text = element_text(size=15),
- plot.title=element_text(hjust=0.5,size=20,face="bold"))
- ggplot(win_lose_res,aes(x="",y=pro,fill=Var1))+
- geom_bar(stat="identity",position="stack",width=0.5)+
- coord_polar(theta="y")+
- scale_fill_brewer(palette="Set2")+
- labs(title="主客队胜负及平局比例",x="",y="")+ #将x、y轴的标签取消
- theme_bw()+
- theme(legend.position = "top",
- legend.title=element_blank(),
- plot.title=element_text(hjust=0.5,size=20),
- panel.border=element_blank(),
- panel.grid=element_blank(),
- axis.ticks=element_blank(),
- axis.text.x=element_blank())+
- geom_text(aes(label= paste0(Var1,"\n",round(100*pro,2),"%")),
- position=position_stack(vjust=0.5),size=4)
- ggplot(wld,aes(x=country_or,y=Freq,color=group))+
- geom_point(size=3)+
- geom_segment(aes(x=country_or,xend=country_or,y=0,yend=Freq))+
- scale_x_reordered()+
- facet_wrap(.~group,scales="free")+ #此处不能使用facet_grid()
- geom_hline(data=t7,aes(yintercept=ablin),linetype="dotdash")+
- labs(title="各国取得胜负及平局的次数",x="country")+
- theme_bw()+
- theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5),
- plot.title=element_text(size=20,hjust=0.5,face="plain"),
- legend.position="none")+
- coord_flip()
- ggplot(sum_goal[order(-sum_goal$x),][1:20,],aes(reorder(country,x),x))+
- geom_segment(aes(x=reorder(country,x),xend=reorder(country,x),y=0,yend=x),color="gray50")+
- geom_point(size=4,color="red")+
- scale_y_continuous(expand=c(0,0),limits=c(0,250),breaks=seq(0,250,50))+
- gghighlight(x>100,label_key = x)+
- labs(title="21届世界杯中进球总数最多的国家(前二十名)",x="country",y="count")+
- theme(plot.title=element_text(size=20,hjust=0.5,face="plain"),
- panel.grid.major.x = element_blank(),
- panel.grid.minor = element_blank(),
- axis.title=element_text(size=15),
- axis.text=element_text(size=10))+
- coord_flip()
- w_hag <- subset(hag,hag$国家 %in% unique(WorldCupsSummary$Winner))
- w_hags <- subset(hags,hags$国家 %in% unique(WorldCupsSummary$Winner))
- w_haga <- merge(w_hag,w_hags,by="国家") #进球率与失球率
- ggplot(data=w_haga,aes(x=进球率,y=失球率))+
- geom_point(aes(color=国家,size=进球率*失球率),alpha=0.5)+
- scale_size(range=c(15,35))+
- scale_x_continuous(limits=c(1.15,2.3))+
- scale_y_continuous(limits=c(0.85,1.45))+
- geom_vline(xintercept=1.7)+
- geom_hline(yintercept=1.2)+
- labs(x="低<————— 进球率 —————>高",y="低<————— 失球率 —————>高",
- title="夺冠队伍的进失球矩阵分析图")+
- geom_text(aes(x=进球率,y=失球率,label=国家),size=w_haga$失球率*5)+
- theme_minimal()+
- theme(legend.position="none",
- plot.title=element_text(hjust=0.5,size=20,face="plain"))+
- coord_equal(ratio=1)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。