当前位置:   article > 正文

R主成分分析与因子分析_r-模式主成分荷载得分

r-模式主成分荷载得分
主成分分析
通过将维把多个变量化成几个主成分的方法
基本思想:设法将原先众多具有一定相关性的指标,重新组合为一组新的互相独立的综合指标,并代替原先的指标
student<-data.frame(
X1=c(148, 139, 160, 149, 159, 142, 153, 150, 151, 139,140, 161, 158, 140, 137, 152, 149, 145, 160, 156,151, 147, 157, 147, 157, 151, 144, 141, 139, 148),
X2=c(41, 34, 49, 36, 45, 31, 43, 43, 42, 31,29, 47, 49, 33, 31, 35, 47, 35, 47, 44,42, 38, 39, 30, 48, 36, 36, 30, 32, 38),
X3=c(72, 71, 77, 67, 80, 66, 76, 77, 77, 68,64, 78, 78, 67, 66, 73, 82, 70, 74, 78,73, 73, 68, 65, 80, 74, 68, 67, 68, 70),
X4=c(78, 76, 86, 79, 86, 76, 83, 79, 80, 74,74, 84, 83, 77, 73, 79, 79, 77, 87, 85,82, 78, 80, 75, 88, 80, 76, 76, 73, 78)
)
student.pr <- princomp(student, cor = TRUE)  #等于student.pr <- princomp(~X1+X2+X3+X4, data=student, cor=TRUE)
summary(student.pr, loadings=TRUE)
predict(student.pr)
screeplot(student.pr,type="lines")


X<-data.frame(
x1=c(2959.19, 2459.77, 1495.63, 1046.33, 1303.97, 1730.84,
1561.86, 1410.11, 3712.31, 2207.58, 2629.16, 1844.78,
2709.46, 1563.78, 1675.75, 1427.65, 1783.43, 1942.23,
3055.17, 2033.87, 2057.86, 2303.29, 1974.28, 1673.82,
2194.25, 2646.61, 1472.95, 1525.57, 1654.69, 1375.46,
1608.82),
x2=c(730.79, 495.47, 515.90, 477.77, 524.29, 553.90, 492.42,
510.71, 550.74, 449.37, 557.32, 430.29, 428.11, 303.65,
613.32, 431.79, 511.88, 512.27, 353.23, 300.82, 186.44,
589.99, 507.76, 437.75, 537.01, 839.70, 390.89, 472.98,
437.77, 480.99, 536.05),
x3=c(749.41, 697.33, 362.37, 290.15, 254.83, 246.91, 200.49,
211.88, 893.37, 572.40, 689.73, 271.28, 334.12, 233.81,
550.71, 288.55, 282.84, 401.39, 564.56, 338.65, 202.72,
516.21, 344.79, 461.61, 369.07, 204.44, 447.95, 328.90,
258.78, 273.84, 432.46),
x4=c(513.34, 302.87, 285.32, 208.57, 192.17, 279.81, 218.36,
277.11, 346.93, 211.92, 435.69, 126.33, 160.77, 107.90,
219.79, 208.14, 201.01, 206.06, 356.27, 157.78, 171.79,
236.55, 203.21, 153.32, 249.54, 209.11, 259.51, 219.86,
303.00, 317.32, 235.82),
x5=c(467.87, 284.19, 272.95, 201.50, 249.81, 239.18, 220.69,
224.65, 527.00, 302.09, 514.66, 250.56, 405.14, 209.70,
272.59, 217.00, 237.60, 321.29, 811.88, 329.06, 329.65,
403.92, 240.24, 254.66, 290.84, 379.30, 230.61, 206.65,
244.93, 251.08, 250.28),
x6=c(1141.82, 735.97, 540.58, 414.72, 463.09, 445.20, 459.62,
376.82, 1034.98, 585.23, 795.87, 513.18, 461.67, 393.99,
599.43, 337.76, 617.74, 697.22, 873.06, 621.74, 477.17,
730.05, 575.10, 445.59, 561.91, 371.04, 490.90, 449.69,
479.53, 424.75, 541.30),
x7=c(478.42, 570.84, 364.91, 281.84, 287.87, 330.24, 360.48,
317.61, 720.33, 429.77, 575.76, 314.00, 535.13, 509.39,
371.62, 421.31, 523.52, 492.60, 1082.82, 587.02, 312.93,
438.41, 430.36, 346.11, 407.70, 269.59, 469.10, 249.66,
288.56, 228.73, 344.85),
x8=c(457.64, 305.08, 188.63, 212.10, 192.96, 163.86, 147.76,
152.85, 462.03, 252.54, 323.36, 151.39, 232.29, 160.12,
211.84, 165.32, 182.52, 226.45, 420.81, 218.27, 279.19,
225.80, 223.46, 191.48, 330.95, 389.33, 191.34, 228.19,
236.51, 195.93, 214.40),
row.names=c("北京","天津","河北","山西","内蒙古","辽宁","吉林","黑龙江","上海","江苏","浙江","安徽","福建","江西","山东","河南","湖北",
"湖南","广东","广西","海南","重庆","四川","贵州","云南","西藏","陕西","甘肃","青海","宁夏","新疆")
)
PCA=princomp(X,cor=T) #求相关矩阵特征值
PCA
PCA$loadings  #求主成分载荷
screeplot(PCA,type="lines") #画碎石图确定主成分
PCA$score  #主成分得分-相当于predict( )
结果解释  Z1:日常必需消费开支  Z2:衣着和居住
biplot(PCA,choices=1:2,scale=1)  #成分图
kmeans(PCA$score[,1:2],5)  #聚类


主成分回归 薛毅书P516


因子分析
降维的一种方法,是主成分分析的推广和发展
是用于分析隐藏在表面现象背后的因子作用的统计模型。试图用最少个数的不可测的
公共因子的线性函数与特殊因子之和来描述原来观测的每一分量
例子:各科学习成绩(数学能力,语言能力,运动能力等)
例子:生活满意度(工作满意度,家庭满意度)例子:薛毅书P522
因子分析的主要用途
减少分析变量个数
通过对变量间相关关系的探测,将原始变量分组,即将相关性高的变量分为一组,用
共性因子来代替该变量
使问题背后的业务因素的意义更加清晰呈现
与主成分分析的区别
主成分分析侧重“变异量”,通过转换原始变量为新的组合变量使到数据的“变异量
”最大,从而能把样本个体之间的差异最大化,但得出来的主成分往往从业务场景的
角度难以解释
因子分析更重视相关变量的“共变异量”,组合的是相关性较强的原始变量,目的是
找到在背后起作用的少量关键因子,因子分析的结果往往更容易用业务知识去加以解

因子分析使用了复杂的数学手段
比主成分分析更加复杂的数学模型
求解模型的方法:主成分法,主因子法,极大似然法
结果还可以通过因子旋转,使到业务意义更加明显
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/blog/article/detail/94131
推荐阅读
相关标签
  

闽ICP备14008679号