赞
踩
本文主要为学习笔记的整合,记录分享的同时不做过多介绍。
- new_data = {'a': {0: 3.75, 1: 3.38, 2: 3.88, 3: 2.88, 4: 2.75, 5: 2.5, 6:
- 4.25, 7: 2.88, 8: 4.12, 9: 4.25, 10: 3.25, 11: 4.38, 12: 4.0, 13: 4.38,
- 14: 3.25, 15: 4.0, 16: 4.25, 17: 4.38, 18: 3.62, 19: 3.62,
- 20: 3.88, 21: 3.88, 22: 3.38, 23: 3.88, 24: 4.0, 25: 3.75,
- 26: 5.75, 27: 3.5, 28: 3.38, 29: 5.62, 30: 3.62,
- 31: 4.25, 32: 3.38, 33: 3.5, 34: 3.12,
- 35: 3.25, 36: 3.12, 37: 3.38, 38: 3.5, 39: 3.12, 40: 2.88,
- 41: 4.0, 42: 3.75, 43: 3.12, 44: 3.88, 45: 3.62, 46: 3.0, 47: 2.62, 48: 4.25, 49: 5.88},
- 'b': {0: 4.2, 1: 4.2, 2: 4.8, 3: 6.0, 4: 3.0, 5: 4.4,
- 6: 4.8, 7: 4.8, 8: 7.0, 9: 7.0, 10: 5.2, 11: 6.0, 12: 6.0,
- 13: 5.8, 14: 6.0, 15: 6.0, 16: 5.6, 17: 6.2, 18: 5.2, 19: 6.0,
- 20: 4.8, 21: 4.8, 22: 6.0, 23: 6.8, 24: 5.4, 25: 5.0, 26: 4.2,
- 27: 5.0, 28: 3.4, 29: 5.8, 30: 4.2, 31: 4.8, 32: 4.0, 33: 3.4,
- 34: 4.0, 35: 3.4, 36: 3.2, 37: 3.6, 38: 3.6, 39: 2.8, 40: 5.2,
- 41: 5.2, 42: 4.8, 43: 5.6, 44: 4.6, 45: 4.6, 46: 5.0, 47: 5.0, 48: 5.0, 49: 5.0},
- 'c': {0: 4.5, 1: 4.88, 2: 3.12, 3: 4.12, 4: 3.38, 5: 3.62, 6: 3.75,
- 7: 5.12, 8: 7.0, 9: 5.25, 10: 5.5, 11: 6.88, 12: 5.38, 13: 5.12,
- 14: 5.62, 15: 6.88, 16: 6.0, 17: 5.75, 18: 5.75, 19: 5.75, 20: 6.0,
- 21: 6.25, 22: 5.62, 23: 6.25, 24: 4.88, 25: 4.5, 26: 4.75, 27: 5.62,
- 28: 5.38, 29: 5.62, 30: 4.88, 31: 5.0, 32: 5.25, 33: 6.38, 34: 5.5,
- 35: 5.62, 36: 6.88, 37: 6.38, 38: 7.0, 39: 5.62, 40: 6.75, 41: 6.75,
- 42: 6.38, 43: 6.62, 44: 5.75, 45: 6.88, 46: 6.88, 47: 6.5, 48: 6.12, 49: 6.62},
- 'd': {0: 4.33, 1: 4.0, 2: 3.5, 3: 3.0, 4: 3.5, 5: 3.5, 6: 4.83, 7: 4.83,
- 8: 6.67, 9: 5.17, 10: 5.83, 11: 6.67, 12: 5.33, 13: 5.0, 14: 6.17,
- 15: 6.5, 16: 6.5, 17: 6.5, 18: 4.83, 19: 4.5, 20: 4.83, 21: 5.33,
- 22: 5.33, 23: 7.0, 24: 4.83, 25: 3.33, 26: 5.67, 27: 4.33, 28: 3.33,
- 29: 4.33, 30: 5.0, 31: 3.67, 32: 5.67, 33: 6.67, 34: 5.33, 35: 6.5,
- 36: 5.5, 37: 4.83, 38: 6.0, 39: 6.5, 40: 5.0, 41: 3.83, 42: 4.83, 43: 6.5,
- 44: 4.5, 45: 4.5, 46: 5.17, 47: 4.83, 48: 6.83, 49: 6.0},
- 'e': {0: 4.8, 1: 2.2, 2: 3.2, 3: 5.0, 4: 3.0, 5: 4.8, 6: 4.4, 7: 5.2, 8: 5.4,
- 9: 5.0, 10: 5.2, 11: 5.0, 12: 5.6, 13: 5.2, 14: 5.8, 15: 6.2, 16: 5.0,
- 17: 5.0, 18: 4.8, 19: 4.6, 20: 5.0, 21: 5.8, 22: 4.8, 23: 6.6, 24: 4.8,
- 25: 3.6, 26: 4.4, 27: 5.4, 28: 4.8, 29: 4.2, 30: 5.4, 31: 5.2, 32: 4.8,
- 33: 4.4, 34: 5.2, 35: 5.0, 36: 6.0, 37: 5.0, 38: 4.8, 39: 5.4, 40: 7.0,
- 41: 6.8, 42: 4.4, 43: 6.2, 44: 5.8, 45: 5.6, 46: 6.0, 47: 6.2, 48: 5.4, 49: 6.4},
- 'f': {0: 2.4, 1: 3.0, 2: 3.8, 3: 2.8, 4: 4.6, 5: 2.6, 6: 2.6, 7: 3.8, 8: 3.6,
- 9: 7.0, 10: 3.2, 11: 6.6, 12: 5.0, 13: 2.8, 14: 5.6, 15: 3.6, 16: 5.6,
- 17: 4.2, 18: 5.8, 19: 3.4, 20: 2.4, 21: 3.0, 22: 3.6, 23: 5.8, 24: 5.0,
- 25: 3.0, 26: 5.4, 27: 4.4, 28: 4.4, 29: 3.2, 30: 5.6, 31: 5.4, 32: 3.6,
- 33: 3.2, 34: 3.2, 35: 3.0, 36: 2.6, 37: 2.6, 38: 2.4, 39: 3.0, 40: 3.8,
- 41: 4.6, 42: 4.0, 43: 5.2, 44: 4.8, 45: 4.6, 46: 4.2, 47: 5.2, 48: 5.4, 49: 6.4},
- 'g': {0: 2.33, 1: 5.5, 2: 3.83, 3: 4.5, 4: 4.83, 5: 3.83, 6: 3.83,
- 7: 4.17, 8: 7.0, 9: 5.83, 10: 5.67, 11: 5.83, 12: 5.67, 13: 5.33,
- 14: 6.0, 15: 4.5, 16: 6.33, 17: 5.67, 18: 5.5, 19: 6.0, 20: 5.5,
- 21: 6.5, 22: 5.33, 23: 6.5, 24: 4.67, 25: 4.83, 26: 5.17, 27: 5.33,
- 28: 5.0, 29: 4.83, 30: 4.83, 31: 4.33, 32: 5.5, 33: 5.67, 34: 6.17,
- 35: 5.83, 36: 6.33, 37: 6.17, 38: 5.5, 39: 6.17, 40: 6.0, 41: 4.67,
- 42: 4.67, 43: 5.5, 44: 4.83, 45: 4.17, 46: 3.83, 47: 5.17, 48: 5.33, 49: 5.33}}
- import pingouin as pg
- import pandas as pd
- import numpy as np
- import seaborn as sns
- import matplotlib.pyplot as plt
- from factor_analyzer import FactorAnalyzer
- from factor_analyzer.factor_analyzer import calculate_kmo
- from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity
-
- '''
- 1.信度检验 2.效度检验 3.验证性因子分析
- '''
- # 构建变量矩阵
- data = pd.DataFrame(new_data)
- test_data = np.array(new_data)
-
- '''
- 1.信度检验:检验量表的可靠性
- 使用克隆巴赫系数检验,a系数类似于可决系数,一般0.7以上方能通过
- 传入的参数需要是datafream
- '''
- reliability ,t_value = pg.cronbach_alpha(data)
-
- '''
- 2.效度检验:检验量表的有效性,即准确反应客观事物属性和特征的程度
- 1. 巴特利特球形检验:检验变量之间的相关程度,主要是检验相关系数矩阵是否为单位矩阵
- 若检验结果的P值小于0.05,说明相关系数矩阵非单位矩阵,原始变量之间存在相关性
- 2. KMO检验:比较变量间简单相关系数和偏相关系数的指标
- KMO值介于0-1之间,其值的判定方式类似于可决系数
- '''
- kafa_value, p_value = calculate_bartlett_sphericity(data)
- kmo_all, kmo_value = calculate_kmo(data)
-
- '''
- 验证性因子分析:主要是描述因子矩阵中的隐性变量
- '''
- # 1.首先建模,传入相应的因子数量
- model = FactorAnalyzer(rotation=None, n_factors=len(data.T), method='principal')
- model.fit(data)
- # 2.获取旋转前的数据,确定公因子的数量
- # 对于var_df,一般取特征值大于1的因子,同时累计方差需要位于0.7以上
- f_contribution_var = model.get_factor_variance()
- var_df = pd.DataFrame()
- var_df["旋转前特征值"] = f_contribution_var[0]
- var_df["旋转前方差贡献率"] = f_contribution_var[1]
- var_df["旋转前方差累计贡献率"] = f_contribution_var[2]
-
- # 3.建立因子分析模型,使用方差最大化因子旋转方式
- new_model = FactorAnalyzer(3, rotation='varimax')
- zd_df = data
- new_model.fit(zd_df)
- # 查看每个变量的公因子方差数据和,即因子载荷系数的共同度,共同度一般大于0.3认为是效果好
- com_var_df = pd.DataFrame(new_model.get_communalities(), index=zd_df.columns)
- # 查看旋转后的特征值,n阶矩阵只有n个特征值
- feature_df = pd.DataFrame(new_model.get_eigenvalues())
- # 查看公因子与原始变量的相关系数矩阵,即因子载荷系数
- corr_df = pd.DataFrame(new_model.loadings_, index=zd_df.columns)
- # 查看公因子方差
- index_list =['公因子名称',"公因子特征值","公因子方差贡献率","公因子累计方差贡献率"]
- factor_var = pd.DataFrame(new_model.get_factor_variance()).T
- factor_var.index = ['factor_'+str(x+1) for x in range(len(factor_var))]
- factor_var = factor_var.reset_index()
- factor_var.columns = index_list
- # 提取出来的公因子的数据
- out_data = pd.DataFrame(new_model.transform(zd_df))
- # 最后,明确公因子与原始因子之间的相关性
- #隐藏变量可视化
- df1 = pd.DataFrame(corr_df, index=zd_df.columns)
- df2 = data.corr()
- #绘图
- plt.figure()
- ax = sns.heatmap(df1, annot=True, cmap="GnBu",linewidths=0.2)
-
- plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
- plt.rcParams['axes.unicode_minus'] = False # 防止中文乱码的处理
- plt.title("公共因子与原始变量相关性")
- plt.ylabel("原始变量")
- plt.xlabel('公共因子')
-
- plt.legend(loc='best')
- plt.show()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。