赞
踩
1、读入数据
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import Pie
from pyecharts.globals import ThemeType
plt.rcParams['font.sans-serif'] = ['SimHei'] #用来显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
import os
os.chdir(r'C:\Users\\ABC\Desktop')
%matplotlib inline
# 导入数据
df = pd.read_csv('二手房数据.csv',encoding='utf-8')
df.sample(n=10)
df.drop(['Unnamed: 0'],axis=1,inplace=True)
df
# 替换null
df = df.replace('null',np.nan)
df = df.replace(' ',np.nan)
for i in df.columns:
print(i,df[i].unique())
# 据集中是否有重复值
dupNum = df.shape[0] - df.drop_duplicates().shape[0]
print("数据集中有%s列重复值" % dupNum)
# 缺失值处理 def missing_values_table(df): mis_val = df.isnull().sum() mis_val_percent = 100 * df.isnull().sum() / len(df) mis_val_table = pd.concat([mis_val, mis_val_percent], axis=1) mis_val_table_ren_columns = mis_val_table.rename( columns = {0 : 'Missing Values', 1 : '% of Total Values'}) mis_val_table_ren_columns = mis_val_table_ren_columns[ mis_val_table_ren_columns.iloc[:,1] != 0].sort_values( '% of Total Values', ascending=False).round(1) print ("Your selected dataframe has " + str(df.shape[1]) + " columns.\n" "There are " + str(mis_val_table_ren_columns.shape[0]) + " columns that have missing values.") return mis_val_table_ren_columns # 缺失值统计 missing_values_table(df)
#分类型变量用 众数填补
df = df.fillna(df.mode().max())
df.isna().sum()
# 去掉单位
df["总价"] = df["总价"].apply(lambda x: x.replace("万","")).astype("float")
df["单价"] = df["单价"].apply(lambda x: x.replace("元/平米","")).astype("float")
df["建筑面积"] = df["建筑面积"].apply(lambda x: x.replace("平米","")).astype("float")
画图
zj = pd.pivot_table(df,index=["区域"],values=["总价"],aggfunc=np.mean)
zj
zj.values.tolist()
result = []
for i in zj.values.tolist():
result += i
result
plt.figure(1 , figsize = (15 , 7))
sns.barplot(zj.index.tolist(),
result)
plt.xticks(fontsize=13)
plt.yticks(fontsize=13)
plt.xlabel('区域二手房总价均价分析',fontsize=16)
plt.show()
df['总价'] = df['总价'].apply(lambda x: x[:-1]) df['二手房总价'] = df['总价'].astype(float) import pandas as pd import matplotlib.pyplot as plt # 正常显示中文标签 mpl.rcParams['font.sans-serif'] = ['KaiTi'] # 正常显示负号 mpl.rcParams['axes.unicode_minus'] = False label_font = { 'weight':'bold', 'size':14, 'family':'simsun' } plt.figure(figsize=(15,7)) pd.pivot_table(df,index=["区域"],values=["二手房总价"],aggfunc=np.mean).plot.bar() plt.xticks(fontsize=15,rotation=0) plt.yticks(fontsize=15) plt.legend(prop =label_font) plt.show()
dj = pd.pivot_table(df,index=["区域"],values=["单价"],aggfunc=np.mean)
result = []
for i in dj.values.tolist():
result += i
result
plt.figure(1 , figsize = (15 , 7))
sns.barplot(dj.index.tolist(),
result)
plt.xticks(fontsize=13)
plt.yticks(fontsize=13)
plt.xlabel('区域二手房单价均价分析',fontsize=16)
plt.show()
zf = pd.pivot_table(df,index=["区域"],values=["建筑面积"],aggfunc=np.mean)
result = []
for i in zf.values.tolist():
result += i
result
plt.figure(1 , figsize = (15 , 7))
sns.barplot(zf.index.tolist(),
result)
plt.xticks(fontsize=13)
plt.yticks(fontsize=13)
plt.xlabel('区域二手房建筑面积分析',fontsize=16)
plt.show()
sns.set(rc = {'figure.figsize':(15,7)}) # 正常显示中文标签 mpl.rcParams['font.sans-serif'] = ['KaiTi'] # 正常显示负号 mpl.rcParams['axes.unicode_minus'] = False label_font = { 'weight':'bold', 'size':14, 'family':'simsun' } sns.countplot(x = '楼层',hue = '区域',data = df) plt.xlabel('区域',fontsize=20) plt.ylabel('count',fontsize=20) plt.xticks(fontsize=15) plt.yticks(fontsize=15) plt.legend(prop =label_font) plt.show()
color = sns.color_palette()
plt.rcParams['font.sans-serif'] = ['SimHei'] #用来显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
plt.figure(figsize=(15,7))
sns.countplot(x = '朝向',data = df, order = df['朝向'].value_counts().index)
plt.xlabel('朝向',fontsize=18)
plt.ylabel('count',fontsize=15)
plt.xticks(fontsize=17,rotation=45)
plt.yticks(fontsize=15)
plt.show()
color = sns.color_palette()
plt.rcParams['font.sans-serif'] = ['SimHei'] #用来显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
plt.figure(1 , figsize = (10 , 14))
sns.barplot(df["户型"].value_counts(dropna=False),
df["户型"].value_counts(dropna=False).keys())
plt.xticks(fontsize=13)
plt.yticks(fontsize=13)
plt.xlabel('户型',fontsize=16)
plt.show()
# 销售金额饼图
plt.figure(figsize=(4,4),dpi=150)
dataname = df['楼层']
freq = dataname.value_counts()
colors = ['#99CCFF','#CCFF66','#FFCC99']
plt.pie(freq, labels = freq.index, explode = (0.05, 0, 0), autopct = '%.1f%%', textprops={'fontsize': 12}, colors = colors, startangle = 90, counterclock = False)
plt.axis('square')
plt.legend(loc='upper right', bbox_to_anchor=(1.2, 0.2),prop={'size': 10})
plt.show()
color = sns.color_palette()
sns.countplot(x = '装修',data = df)
plt.xlabel('装修')
plt.xticks(fontsize=13)
plt.yticks(fontsize=13)
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
from palettable.colorbrewer.qualitative import Pastel1_7
plt.rcParams['font.sans-serif'] = ['SimHei'] #用来显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
dataname = df['区域']
freq = dataname.value_counts()
plt.figure(1 , figsize = (10 , 10))
plt.pie(freq, labels = freq.index, colors=Pastel1_7.hex_colors,textprops={'fontsize': 15},wedgeprops=dict(width=0.3, edgecolor='w'))
plt.axis('equal')
plt.show()
plt.figure(1 , figsize = (12 , 8))
sns.distplot(df['总价'],bins=40)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.xlabel('总价',fontsize=16)
plt.show()
plt.figure(1 , figsize = (12 , 8))
sns.distplot(df['单价'],bins=40,color='purple')
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.xlabel('单价',fontsize=16)
plt.show()
#先导入函数 import matplotlib.pyplot as plt import numpy as np #设置中文可显示字体 plt.rcParams["font.family"]="SimHei" #设置画布,添加子图 fig=plt.figure(num=1,figsize=(12,8)) ax=fig.add_subplot(111) dataname = df['区域'] freq = dataname.value_counts() #画条形图 ax.bar(x=freq.index,height=freq) ax.bar(x=freq.index,height=freq,color=["#FF7F50","#FF8C00","#FFA54F","#FFB90F","#FFC1C1","#FFDEAD","#FFE4E1","#FFEFD5","#FFFAFA"]\ ,bottom=0,edgecolor="gold",linewidth=2,width=1,alpha=1) ax.set_title("某二手房区域位置统计图",fontsize=20,backgroundcolor='yellowgreen',\ fontweight='bold',color='white') plt.xticks(fontsize=15) plt.yticks(fontsize=15) plt.show()
import matplotlib.pyplot as plt import warnings warnings.filterwarnings("ignore") from palettable.colorbrewer.qualitative import Pastel1_7 plt.rcParams['font.sans-serif'] = ['SimHei'] #用来显示中文标签 plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号 plt.figure(1 , figsize = (10 , 10)) dataname = df['装修'] freq = dataname.value_counts() colors = ["#d5695d","#377eb8","#4daf4a","#984ea3","#d5695d", "#5d8ca8"] explode = (0.1,0,0,0,0,0) #将某部分爆炸出来 patches, texts, autotexts = plt.pie(freq,labels = freq.index,autopct='%.2f%%', colors = colors, textprops={'color':'#000000',#文本颜色 'fontsize':16,#文本大小 'fontfamily':'Microsoft JhengHei',#设置微软雅黑字体 }, pctdistance=0.9,explode = explode ,shadow=False,startangle=90) texts[0].set_size('25') texts[1].set_color('r') texts[2].set_color('r') texts[3].set_color('r') texts[4].set_color('r') texts[5].set_color('r') patches[0].set_alpha(0.5) patches[1].set_alpha(0.8) patches[2].set_alpha(0.8) patches[3].set_alpha(0.8) patches[4].set_alpha(0.8) patches[5].set_alpha(0.8) patches[1].set_hatch('x') patches[2].set_hatch('|') patches[3].set_hatch('/') patches[4].set_hatch('+') patches[5].set_hatch('|') plt.axis('equal') plt.show()
dataname = df['户型']
freq = dataname.value_counts()
freq
dataname = df['户型'][~df['户型'].isin(['暂无'])] # 删除暂无
freq = dataname.value_counts()
freq
plt.rcParams["font.family"]="SimHei" fig=plt.figure(num=1,figsize=(12,10)) aa = freq[:10] x = aa.values.tolist()[::-1] y = aa.index.tolist()[::-1] ax=fig.add_subplot(111) colors = ["#d5695d","#377eb8","#4daf4a","#984ea3","#d5695d", "#5d8ca8"] colors = ['#f5222d', '#FF33CC', '#fa8c16', '#a0d911', '#1890ff', '#d5695d', '#fadb14'] ax.bar(x=0,height=1,bottom=np.arange(1,20,2),edgecolor="gold",linewidth=2,width=x,orientation="horizontal",color=colors,alpha=0.8) for a,b in zip(x,np.linspace(1,19,10),): ax.text(a+10,b-0.2,"%s"%a,fontsize=14) ax.spines["top"].set_visible(False)#上轴不显示 ax.spines["right"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.spines["left"].set_color("gold") ax.spines["left"].set_linewidth(3) ax.tick_params(pad=4,left=False) ax.tick_params(axis="y",labelrotation=10) ax.set_title("二手房热门户型前10统计图",fontsize=28,backgroundcolor='yellowgreen',\ fontweight='bold',color='white') ax.xaxis.grid(linewidth=0.5,color="black",alpha=0.8) ax.set_axisbelow(True) ax.set_yticks(np.linspace(1,19,10)) ax.set_yticklabels(y,fontsize=15) ax.set_xticklabels(['0',"200","400","600","800","1000"],fontsize=15) plt.show()
data = df[~df['户型'].isin(['暂无'])] # 删除暂无
data
freq = data['户型'].value_counts()
freq[:5]
freq[:5].index.tolist()
data1 = data[data['户型'].isin(freq[:5].index.tolist())] # 删除暂无
data1
import numpy as np import matplotlib.pyplot as plt plt.figure(figsize=(10, 10)) aa = pd.pivot_table(data1,index=["户型"],values=["单价"],aggfunc=np.mean) labels = aa.index.tolist()[::-1] values = aa.values.flatten().tolist() explode = [0.05, 0.01, 0.01, 0.01, 0.01] colors = ['#F5DEB3', '#87CEFA', '#FFB6C1', '#90EE90', '#D3D3D3'] _, l_text, p_text = plt.pie(values, explode=explode, labels=labels, autopct='%1.1f%%', colors=colors) # 设置标签字体大小 for t in l_text: t.set_size(18) # 设置数值字体大小 for t in p_text: t.set_size(18) plt.legend(loc='upper right', bbox_to_anchor=(1.4, 0.65),prop={'size': 18}) plt.show()
import pandas as pd
import matplotlib.pyplot as plt
label_font = {
'weight':'bold',
'size':14,
'family':'simsun'
}
plt.figure(figsize=(10,50))
pd.pivot_table(df,index=["户型"],values=["二手房单价"],aggfunc=np.mean).plot.barh()
plt.xticks(fontsize=15,rotation=0)
plt.yticks(fontsize=15)
plt.legend(prop =label_font)
plt.show()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。