赞
踩
国内不同地点的招聘人数总数
不同类别岗位的平均工作年限分析
不同类别(大类、小类)的职位招聘人数总数,如“技术>测试”表示大类是技术,小类是测试
不同年份/月份发布的职位需求人数分析
不同地点不同大类职位类别的职位招聘人数总数
描述上述1-6数据分析过程中你遇到了哪些问题以及解决方法
用Python进行数据分析,分析职位需求数据,并且进行数据清洗及各种维度的数据分析及可视化
这里就给大家简单举例两个啦,基本上上面的题目在代码中都可以一一表示出来。
import pandas as pd import matplotlib.pyplot as plt import numpy as np #题目1 df=pd.read_excel('job_data.xlsx',header=0)#读取数据 print(df) city_list=list(df['工作地点'].value_counts().keys())#获取工作地点的名称list print(city_list) city_job=[] for city in city_list: df_city=df[df['工作地点'].isin([city])].reset_index()#按照工作地点获取对应的dataframe city_job.append(sum(df_city['招聘人数']))#获取对应的招聘人数总数 print(city_job) plt.rcParams['font.sans-serif']=['SimHei']; #开始画图 plt.rcParams['axes.unicode_minus'] = False plt.title("不同城市招聘人数") plt.xlabel("城市") plt.ylabel("人数 /人") x=np.arange(len(city_list)) plt.xticks(x,city_list,fontsize=10) plt.bar(x,city_job,color='g')#画柱状图 for index in range(len(city_list)): plt.text(x[index],city_job[index],city_job[index]) plt.show() #题目2 job_class=df['职位类别'].value_counts().keys()#获取职位的名称list for index in range(df.shape[0]): if df.loc[index,'工作年限']=='不限': df.loc[index,'工作年限']=0 elif df.loc[index,'工作年限']=='10年以上': df.loc[index,'工作年限']=10 else: df.loc[index,'工作年限']=int(df.loc[index,'工作年限'].split('-')[0]) print(city_list) job_class_worktime=[] for job in job_class: df_job=df[df['职位类别'].isin([job])].reset_index()#按照职位类别获取对应的dataframe job_class_worktime.append(round(sum(df_job['工作年限'])/df_job.shape[0],1))#获取不同职位的平均招聘人数 print(job_class_worktime) plt.rcParams['font.sans-serif']=['SimHei']; #设置支持中文 plt.rcParams['axes.unicode_minus'] = False plt.title("不同岗位平均工作年限") plt.xlabel("工作岗位") plt.ylabel("工作年限 /年") x=np.arange(len(job_class_worktime)) plt.xticks(x,job_class,fontsize=10,rotation=90) plt.bar(x,job_class_worktime,color='b') for index in range(len(job_class)): plt.text(x[index],job_class_worktime[index],job_class_worktime[index]) plt.show() for index in range(len(job_class)): print(f'岗位 {job_class[index]} 平均工作年限为 {job_class_worktime[index]}年')#打印输出不同职位对应工作年限的平均值 #题目3 job_class_recrutment=[] for job in job_class: df_job=df[df['职位类别'].isin([job])].reset_index()#按照职位类别获取对应的dataframe job_class_recrutment.append(sum(df_job['招聘人数']))#获取不同职位的平均招聘人数 job_class_big=[] for job in job_class: job_class_big.append(job.split('>')[0]) job_class_big=set(job_class_big) print(job_class_big) job_class_big_recrutment={}#建立字典,用来存储大类岗位的招聘人数 for item in job_class_big: job_class_big_recrutment[item]=0 for index in range(len(job_class)): for job_name in job_class_big: if job_name in job_class[index]: job_class_big_recrutment[job_name]+=job_class_recrutment[index] break print(job_class_big_recrutment) plt.rcParams['font.sans-serif']=['SimHei']; #设置支持中文 plt.rcParams['axes.unicode_minus'] = False plt.title("大类工作岗位招聘人数") plt.xlabel("工作岗位") plt.ylabel("招聘人数 /人") x=np.arange(len(job_class_big_recrutment.keys())) plt.xticks(x,job_class_big_recrutment.keys(),fontsize=10,rotation=90) plt.bar(x,job_class_big_recrutment.values(),color='y') for index in range(len(job_class_big_recrutment.keys())): plt.text(x[index],list(job_class_big_recrutment.values())[index],list(job_class_big_recrutment.values())[index]) plt.show() #题目4 df1=df['发布时间'].str.split('-',expand=True) df1.columns =['年', '月', '日'] print(df1) df['年']=df1['年'] df['月']=df1['月'] year_recrutment=df['年'].value_counts() month_recrutment=df['月'].value_counts() year_recrutment_people=[] for year in year_recrutment.keys(): df_year=df[df['年'].isin([year])].reset_index()#按照年获取对应的dataframe year_recrutment_people.append(sum(df_year['招聘人数']))#获取不同年的招聘人数 month_recrutment_people=[] for month in month_recrutment.keys(): df_month=df[df['月'].isin([month])].reset_index()#按照月获取对应的dataframe month_recrutment_people.append(sum(df_month['招聘人数']))#获取不同月的招聘人数 print(year_recrutment_people) print(month_recrutment_people) plt.rcParams['font.sans-serif']=['SimHei']; #设置支持中文 plt.rcParams['axes.unicode_minus'] = False plt.title("年 招聘人数") plt.xlabel("年份") plt.ylabel("招聘人数 /人") x=np.arange(len(year_recrutment.keys())) plt.xticks(x,year_recrutment.keys(),fontsize=10,rotation=90) plt.bar(x,year_recrutment_people,color='y') for index in range(len(year_recrutment_people)): plt.text(x[index],year_recrutment_people[index],year_recrutment_people[index]) plt.show() plt.rcParams['font.sans-serif']=['SimHei']; #设置支持中文 plt.rcParams['axes.unicode_minus'] = False plt.title("月 招聘人数") plt.xlabel("月份") plt.ylabel("招聘人数 /人") x=np.arange(len(month_recrutment.keys())) plt.xticks(x,month_recrutment.keys(),fontsize=10,rotation=90) plt.bar(x,month_recrutment_people,color='lightgreen') for index in range(len(month_recrutment_people)): plt.text(x[index],month_recrutment_people[index],month_recrutment_people[index]) plt.show() #题目5 city_job_class_big={}#用来保存不同城市不同工作大类的招聘总人数 for city in city_list: df_city=df[df['工作地点'].isin([city])].reset_index()#按照工作地点获取对应的dataframe job_class_big_recrutment={}#建立字典,用来存储大类岗位的招聘人数 for item in job_class_big:#初始化字典 job_class_big_recrutment[item]=0 for index in range(df_city.shape[0]): for job_name in job_class_big: if job_name in df_city.loc[index,'职位类别']: job_class_big_recrutment[job_name]+=df_city.loc[index,'招聘人数'] break city_job_class_big[city]=job_class_big_recrutment print(city_job_class_big) color_list=['#CD853F','#DC143C','#00FF7F','#FF6347','#8B008B','#00FFFF','#0000FF','#8B0000','#FF8C00', '#1E90FF','#00FF00','#FFD700','#008080','#008B8B','#8A2BE2','#228B22','#FA8072','#808080'] plt.rcParams['font.sans-serif']=['SimHei']; #设置支持中文 plt.rcParams['axes.unicode_minus'] = False plt.title("不同地点大类招聘人数") plt.xlabel("工作岗位大类") plt.ylabel("招聘总人数 /人") x=np.arange(len(job_class_big)) plt.xticks(x,job_class_big,fontsize=10,rotation=90) for index in range(len(city_list)): plt.plot(city_job_class_big[city_list[index]].values(),color=color_list[index],label=city_list[index]) for index1 in range(len(job_class_big)): plt.text(x[index1],list(city_job_class_big[city_list[index]].values())[index1],list(city_job_class_big[city_list[index]].values())[index1]) plt.legend() plt.show() #题目6 education_spread=df['学历要求'].value_counts() education_job_class_big={}#用来保存不同城市不同工作大类的招聘总人数 for job in job_class_big: education_dic={} for education in education_spread.keys():#字典初始化 education_dic[education]=0 education_job_class_big[job]=education_dic for index in range(df.shape[0]): for job_name in job_class_big: if job_name in df.loc[index,'职位类别']: education_job_class_big[job_name][df.loc[index,'学历要求']]+=1 break print(education_job_class_big) color_list=['#CD853F','#DC143C','#00FF7F','#FF6347','#8B008B','#00FFFF','#0000FF','#8B0000','#FF8C00', '#1E90FF','#00FF00','#FFD700','#008080','#008B8B','#8A2BE2','#228B22','#FA8072','#808080'] plt.rcParams['font.sans-serif']=['SimHei']; #设置支持中文 plt.rcParams['axes.unicode_minus'] = False plt.title("不同大类工作学历要求分布") plt.xlabel("工作岗位大类") plt.ylabel("学历要求 /次 ") x=np.arange(len(education_spread.keys())) plt.xticks(x,education_spread.keys(),fontsize=10,rotation=90) for index in range(len(list(job_class_big))): plt.plot(education_job_class_big[list(job_class_big)[index]].values(),color=color_list[index],label=list(job_class_big)[index]) for index1 in range(len(education_spreadkeys())): plt.text(x[index1],list(education_job_class_big[list(job_class_big)[index]].values())[index1],list(education_job_class_big[list(job_class_big)[index]].values())[index1]) plt.legend() plt.show()
这个数据分析还是比较简单的,大家好好看看,还是可以看懂的,有什么问题call me!!!!
“所有的爱都很可贵,但真诚和深情更胜一筹”
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。