赞
踩
数据准备:
1.phe 临床信息 dataframe格式 。行名顺序要与表达矩阵样本顺序一致 ,
#####至少包括是否死亡event 生存时间time 以及分类标准(基因高低 肿瘤分期 是否转移等)
2.表达矩阵
临床信息 meta信息
给感兴趣的指标进行赋值
library(survival)
library(survminer)
# 利用ggsurvplot快速绘制漂亮的生存曲线图
sfit <- survfit(data=phe,Surv(time, event)~size)
sfit
summary(sfit)
ggsurvplot(sfit, conf.int=F, pval=TRUE)
ggsurvplot(sfit,palette = c("#E7B800", "#2E9FDF"),
risk.table =TRUE,pval =TRUE,#在底部加table
conf.int =TRUE,xlab ="Time in months",
ggtheme =theme_light(),
ncensor.plot = TRUE)
sfit1=survfit(Surv(time, event)~size, data=phe)
sfit2=survfit(Surv(time, event)~grade, data=phe)
splots <- list()
splots[[1]] <- ggsurvplot(sfit1,pval =TRUE, data = phe, risk.table = TRUE)
splots[[2]] <- ggsurvplot(sfit2,pval =TRUE, data = phe, risk.table = TRUE)
# Arrange multiple ggsurvplots and print the output
arrange_ggsurvplots(splots, print = TRUE, ncol = 2, nrow = 1, risk.table.height = 0.4)
# 可以看到grade跟生存显著相关,而size跟病人生存的关系并不显著。
phe$CBX4=ifelse(exprSet['CBX4',]>median(exprSet['CBX4',]),'high','low')
head(phe)
table(phe$CBX4)
ggsurvplot(survfit(Surv(time, event)~CBX4, data=phe), conf.int=F, pval=TRUE)
方法二
ggsurvplot(survfit(Surv(time, event)~phe[,'CBX4'], data=phe), conf.int=F, pval=TRUE)
画另外一个基因的 高低表达生存分析
phe$CBX6=ifelse(exprSet['CBX6',]>median(exprSet['CBX6',]),'high','low')
head(phe)
table(phe$CBX6)
ggsurvplot(survfit(Surv(time, event)~CBX6, data=phe), conf.int=F, pval=TRUE)
## 批量生存分析 使用 logrank test 方法 mySurv=with(phe,Surv(time, event)) log_rank_p <- apply(exprSet , 1 , function(gene){ #gene=exprSet[1,] phe$group=ifelse(gene>median(gene),'high','low') data.survdiff=survdiff(mySurv~group,data=phe) p.val = 1 - pchisq(data.survdiff$chisq, length(data.survdiff$n) - 1) return(p.val) }) log_rank_p=sort(log_rank_p) head(log_rank_p) boxplot(log_rank_p) phe$H6PD=ifelse(exprSet['H6PD',]>median(exprSet['H6PD',]),'high','low') table(phe$H6PD) ggsurvplot(survfit(Surv(time, event)~H6PD, data=phe), conf.int=F, pval=TRUE) # 前面如果我们使用了WGCNA找到了跟grade相关的基因模块,然后在这里就可以跟生存分析的显著性基因做交集 # 这样就可以得到既能跟grade相关,又有临床预后意义的基因啦。
mySurv=with(phe,Surv(time, event)) for (eachgene in gene_interested) { phe$group=ifelse(exprSet[eachgene,]>median(exprSet[eachgene,]),'high','low') p=ggsurvplot(survfit(mySurv~group, data=phe), conf.int=F, pval=TRUE) pdf(paste0(eachgene, "_surv.pdf"),width = 5, height = 5) print(p, newpage = FALSE) dev.off() 方法二 #批量输出 成功! 不推荐这个方法 if(1==2){ dir.create("G:/r/duqiang_IPF/GSE70866—true—_BAL_IPF_donors_RNA-seq/survival_for_genes-1") setwd("G:/r/duqiang_IPF/GSE70866—true—_BAL_IPF_donors_RNA-seq/survival_for_genes-1") mySurv=with(phe,Surv(time, event)) for (eachgene in gene_interested) { phe[eachgene]=ifelse(exprSet[eachgene,]>median(exprSet[eachgene,]),'high','low') p=ggsurvplot(survfit(mySurv~phe[,eachgene], data=phe), conf.int=F, pval=TRUE) pdf(paste0(eachgene, "_surv.pdf"),width = 5, height = 5) print(p, newpage = FALSE) dev.off() }
colnames(phe) mySurv=with(phe,Surv(time, event)) cox_results <-apply(exprSet , 1 , function(gene){ group=ifelse(gene>median(gene),'high','low') survival_dat <- data.frame(group=group,grade=phe$grade,size=phe$size,stringsAsFactors = F) m=coxph(mySurv ~ grade + size + group, data = survival_dat) beta <- coef(m) se <- sqrt(diag(vcov(m))) HR <- exp(beta) HRse <- HR * se #summary(m) tmp <- round(cbind(coef = beta, se = se, z = beta/se, p = 1 - pchisq((beta/se)^2, 1), HR = HR, HRse = HRse, HRz = (HR - 1) / HRse, HRp = 1 - pchisq(((HR - 1)/HRse)^2, 1), HRCILL = exp(beta - qnorm(.975, 0, 1) * se), HRCIUL = exp(beta + qnorm(.975, 0, 1) * se)), 3) return(tmp['grouplow',]) }) cox_results=t(cox_results) table(cox_results[,4]<0.05) cox_results[cox_results[,4]<0.05,] length(setdiff(rownames(cox_results[cox_results[,4]<0.05,]), names(log_rank_p[log_rank_p<0.05]) )) length(setdiff( names(log_rank_p[log_rank_p<0.05]), rownames(cox_results[cox_results[,4]<0.05,]) )) length(unique( names(log_rank_p[log_rank_p<0.05]), rownames(cox_results[cox_results[,4]<0.05,]) )) save(log_rank_p,cox_results,exprSet,phe,file = 'surviva.Rdata')
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。