赞
踩
library(nlme)
## Warning: package 'nlme' was built under R version 3.6.3
library(lme4)
## Warning: package 'lme4' was built under R version 3.6.3
## Loading required package: Matrix
- ##
- ## Attaching package: 'lme4'
- ## The following object is masked from 'package:nlme':
- ##
- ## lmList
library(lmerTest)
## Warning: package 'lmerTest' was built under R version 3.6.3
- ##
- ## Attaching package: 'lmerTest'
- ## The following object is masked from 'package:lme4':
- ##
- ## lmer
- ## The following object is masked from 'package:stats':
- ##
- ## step
- library(AED)
- library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.3
使用示例数据集,比较不同协变量调整下线性回归模型的效能
- data("RIKZ")
-
- mod_lm1<-lm(Richness~NAP,data=RIKZ)
- mod_lm2<-lm(Richness~NAP+Exposure,data=RIKZ)
- mod_lm3<-lm(Richness~NAP+Exposure+Beach,data=RIKZ)
-
- summary(mod_lm1)
- ##
- ## Call:
- ## lm(formula = Richness ~ NAP, data = RIKZ)
- ##
- ## Residuals:
- ## Min 1Q Median 3Q Max
- ## -5.0675 -2.7607 -0.8029 1.3534 13.8723
- ##
- ## Coefficients:
- ## Estimate Std. Error t value Pr(>|t|)
- ## (Intercept) 6.6857 0.6578 10.164 5.25e-13 ***
- ## NAP -2.8669 0.6307 -4.545 4.42e-05 ***
- ## ---
- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
- ##
- ## Residual standard error: 4.16 on 43 degrees of freedom
- ## Multiple R-squared: 0.3245, Adjusted R-squared: 0.3088
- ## F-statistic: 20.66 on 1 and 43 DF, p-value: 4.418e-05

summary(mod_lm2)
- ##
- ## Call:
- ## lm(formula = Richness ~ NAP + Exposure, data = RIKZ)
- ##
- ## Residuals:
- ## Min 1Q Median 3Q Max
- ## -4.3083 -1.7107 -0.8489 0.7674 13.3264
- ##
- ## Coefficients:
- ## Estimate Std. Error t value Pr(>|t|)
- ## (Intercept) 37.2909 5.1878 7.188 7.83e-09 ***
- ## NAP -2.7252 0.4716 -5.779 8.26e-07 ***
- ## Exposure -2.9988 0.5060 -5.926 5.07e-07 ***
- ## ---
- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
- ##
- ## Residual standard error: 3.106 on 42 degrees of freedom
- ## Multiple R-squared: 0.6321, Adjusted R-squared: 0.6146
- ## F-statistic: 36.09 on 2 and 42 DF, p-value: 7.577e-10

summary(mod_lm3)
- ##
- ## Call:
- ## lm(formula = Richness ~ NAP + Exposure + Beach, data = RIKZ)
- ##
- ## Residuals:
- ## Min 1Q Median 3Q Max
- ## -4.0393 -1.6114 -0.5544 0.7196 13.5599
- ##
- ## Coefficients:
- ## Estimate Std. Error t value Pr(>|t|)
- ## (Intercept) 36.3706 5.1212 7.102 1.18e-08 ***
- ## NAP -2.5119 0.4810 -5.223 5.46e-06 ***
- ## Exposure -2.7647 0.5170 -5.348 3.64e-06 ***
- ## Beach -0.3095 0.1906 -1.623 0.112
- ## ---
- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
- ##
- ## Residual standard error: 3.048 on 41 degrees of freedom
- ## Multiple R-squared: 0.6543, Adjusted R-squared: 0.6291
- ## F-statistic: 25.87 on 3 and 41 DF, p-value: 1.484e-09

同时调整NAP,Exposure及Beach时,Beach与Richness的关联无统计学意义
绘制model1的诊断性曲线图
- par(mfrow=c(2,2))
- plot(mod_lm1)
model1的诊断性曲线提示model1中自变量不符合正态性及方差齐性,比较各个model的AIC值
AIC(mod_lm1,mod_lm2,mod_lm3)
- ## df AIC
- ## mod_lm1 3 259.9535
- ## mod_lm2 4 234.6083
- ## mod_lm3 5 233.8053
mod_lm3的AIC值最小,其中Beach无统计学意义,按Beach进行分组,绘制回归曲线
- ggplot(RIKZ,aes(NAP,Richness,group=Beach,color=Beach))+
- geom_point(size=3)+
- geom_smooth(formula = "y~x",method = "lm",se=F,size=0.5)
发现不同Beach分组的回归曲线斜率与截距不一致,提示Beach仍然是Richness的影响因素,单独调整Beach时,结果如下
- mod_lm4<-lm(Richness~NAP+Beach,data = RIKZ)
- summary(mod_lm4)
- ##
- ## Call:
- ## lm(formula = Richness ~ NAP + Beach, data = RIKZ)
- ##
- ## Residuals:
- ## Min 1Q Median 3Q Max
- ## -5.8980 -2.4944 -0.4541 1.2401 14.2386
- ##
- ## Coefficients:
- ## Estimate Std. Error t value Pr(>|t|)
- ## (Intercept) 9.5053 1.2793 7.430 3.55e-09 ***
- ## NAP -2.4363 0.6188 -3.937 0.000305 ***
- ## Beach -0.5939 0.2357 -2.520 0.015622 *
- ## ---
- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
- ##
- ## Residual standard error: 3.923 on 42 degrees of freedom
- ## Multiple R-squared: 0.4133, Adjusted R-squared: 0.3853
- ## F-statistic: 14.79 on 2 and 42 DF, p-value: 1.372e-05

Beach仍有统计学意义,提示Exposure与Beach可能存在交互作用
- par(mfrow=c(2,2))
- plot(mod_lm4)
使用lme函数,添加Beach的随机截距项
- mod_lmm1<-lme(Richness ~NAP,random = ~1|Beach,data=RIKZ)
- summary(mod_lmm1)
- ## Linear mixed-effects model fit by REML
- ## Data: RIKZ
- ## AIC BIC logLik
- ## 247.4802 254.525 -119.7401
- ##
- ## Random effects:
- ## Formula: ~1 | Beach
- ## (Intercept) Residual
- ## StdDev: 2.944065 3.05977
- ##
- ## Fixed effects: Richness ~ NAP
- ## Value Std.Error DF t-value p-value
- ## (Intercept) 6.581893 1.0957618 35 6.006682 0
- ## NAP -2.568400 0.4947246 35 -5.191574 0
- ## Correlation:
- ## (Intr)
- ## NAP -0.157
- ##
- ## Standardized Within-Group Residuals:
- ## Min Q1 Med Q3 Max
- ## -1.4227495 -0.4848006 -0.1576462 0.2518966 3.9793918
- ##
- ## Number of Observations: 45
- ## Number of Groups: 9

预测Richness的值,为fit1
- RIKZ$fit1<-predict(mod_lmm1)
- head(RIKZ)
- ## Sample Richness Exposure NAP Beach fit1
- ## 1 1 11 10 0.045 1 9.087834
- ## 2 2 10 10 -1.036 1 11.864274
- ## 3 3 13 10 -1.336 1 12.634793
- ## 4 4 11 10 0.616 1 7.621278
- ## 5 5 10 10 -0.684 1 10.960197
- ## 6 6 8 8 1.190 2 8.725105
绘制线性回归图
- ggplot(RIKZ,aes(NAP,Richness,color=factor(Beach)))+
- geom_point(size=3,alpha=0.25)+
- geom_abline(aes(intercept=6.5819),slope=-2.5684)+
- geom_point(aes(y=fit1),size=5,alpha=0.5)+
- geom_line(aes(y=fit1),size=1)+
- theme_bw()+
- theme(legend.position = "none")+
- scale_color_brewer(palette = "Set1")
- ## Warning: geom_abline(): Ignoring `mapping` because `slope` and/or `intercept`
- ## were provided.
斜率相同,但截距不同,与线性回归模型进行比较
- RIKZ$fit0<-predict(mod_lm1)
- head(RIKZ)
- ## Sample Richness Exposure NAP Beach fit1 fit0
- ## 1 1 11 10 0.045 1 9.087834 6.556653
- ## 2 2 10 10 -1.036 1 11.864274 9.655722
- ## 3 3 13 10 -1.336 1 12.634793 10.515778
- ## 4 4 11 10 0.616 1 7.621278 4.919680
- ## 5 5 10 10 -0.684 1 10.960197 8.646589
- ## 6 6 8 8 1.190 2 8.725105 3.274107
一般线性回归图
- ggplot(RIKZ,aes(NAP,Richness,color=factor(Beach)))+
- geom_point(size=3,alpha=0.25)+
- geom_abline(aes(intercept=6.5819),slope=-2.5684)+
- geom_point(aes(y=fit0),size=5,alpha=0.5)+
- geom_line(aes(y=fit0),size=1)+
- theme_bw()+
- theme(legend.position = "none")+
- scale_color_brewer(palette = "Set1")
- ## Warning: geom_abline(): Ignoring `mapping` because `slope` and/or `intercept`
- ## were provided.
线性混合模型更能体现数据分布特征,添加随机斜率项目
head(RIKZ,10)
- ## Sample Richness Exposure NAP Beach fit1 fit0
- ## 1 1 11 10 0.045 1 9.087834 6.556653
- ## 2 2 10 10 -1.036 1 11.864274 9.655722
- ## 3 3 13 10 -1.336 1 12.634793 10.515778
- ## 4 4 11 10 0.616 1 7.621278 4.919680
- ## 5 5 10 10 -0.684 1 10.960197 8.646589
- ## 6 6 8 8 1.190 2 8.725105 3.274107
- ## 7 7 9 8 0.820 2 9.675413 4.334842
- ## 8 8 8 8 0.635 2 10.150567 4.865210
- ## 9 9 19 8 0.061 2 11.624829 6.510784
- ## 10 10 17 8 -1.334 2 15.207746 10.510044
- mod_lmm2<-lme(Richness~NAP,random=~1+NAP|Beach,data=RIKZ)
-
- summary(mod_lmm2)
- ## Linear mixed-effects model fit by REML
- ## Data: RIKZ
- ## AIC BIC logLik
- ## 244.3839 254.9511 -116.1919
- ##
- ## Random effects:
- ## Formula: ~1 + NAP | Beach
- ## Structure: General positive-definite, Log-Cholesky parametrization
- ## StdDev Corr
- ## (Intercept) 3.549064 (Intr)
- ## NAP 1.714963 -0.99
- ## Residual 2.702820
- ##
- ## Fixed effects: Richness ~ NAP
- ## Value Std.Error DF t-value p-value
- ## (Intercept) 6.588706 1.264761 35 5.209448 0e+00
- ## NAP -2.830028 0.722940 35 -3.914610 4e-04
- ## Correlation:
- ## (Intr)
- ## NAP -0.819
- ##
- ## Standardized Within-Group Residuals:
- ## Min Q1 Med Q3 Max
- ## -1.8213326 -0.3411043 -0.1674617 0.1921216 3.0397049
- ##
- ## Number of Observations: 45
- ## Number of Groups: 9

绘制混合线性模型回归曲线
- RIKZ$fit2<-predict(mod_lmm2)
-
- ggplot(RIKZ,aes(NAP,Richness,color=factor(Beach)))+
- geom_point(size=3,alpha=0.25)+
- geom_abline(aes(intercept=6.5819),slope=-2.5684)+
- geom_point(aes(y=fit2),size=5,alpha=0.5)+
- geom_line(aes(y=fit2),size=1)+
- theme_bw()+
- theme(legend.position = "none")+
- scale_color_brewer(palette = "Set1")
- ## Warning: geom_abline(): Ignoring `mapping` because `slope` and/or `intercept`
- ## were provided.
可以看到不同NAP组的斜率及结局都不尽相同,与其他模型相比,更能体现数据特征
补充 lme4包实现
- library(lme4)
- mod_lmm4<-lmer(Richness~NAP+(1+NAP|Beach),REML=F,data = RIKZ)
## boundary (singular) fit: see ?isSingular
summary(mod_lmm4)
- ## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
- ## method [lmerModLmerTest]
- ## Formula: Richness ~ NAP + (1 + NAP | Beach)
- ## Data: RIKZ
- ##
- ## AIC BIC logLik deviance df.resid
- ## 246.7 257.5 -117.3 234.7 39
- ##
- ## Scaled residuals:
- ## Min 1Q Median 3Q Max
- ## -1.7985 -0.3418 -0.1827 0.1749 3.1389
- ##
- ## Random effects:
- ## Groups Name Variance Std.Dev. Corr
- ## Beach (Intercept) 10.949 3.309
- ## NAP 2.502 1.582 -1.00
- ## Residual 7.174 2.678
- ## Number of obs: 45, groups: Beach, 9
- ##
- ## Fixed effects:
- ## Estimate Std. Error df t value Pr(>|t|)
- ## (Intercept) 6.5818 1.1883 8.8936 5.539 0.000377 ***
- ## NAP -2.8293 0.6849 7.9217 -4.131 0.003366 **
- ## ---
- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
- ##
- ## Correlation of Fixed Effects:
- ## (Intr)
- ## NAP -0.810
- ## convergence code: 0
- ## boundary (singular) fit: see ?isSingular

AIC(mod_lmm1,mod_lmm2,mod_lmm4)
- ## df AIC
- ## mod_lmm1 4 247.4802
- ## mod_lmm2 6 244.3839
- ## mod_lmm4 6 246.6561
mod_lmm2的AIC最小,模型最好 注意:lme和lmer函数添加随机项目的表达式有点差别,同时,lmer函数中默认REML=TRUE)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。