赞
踩
1#GWAS与eQTL共定位分析
基本思想:
第一种设想 H0: 表型1(GWAS)和 表型2 (以eQTL为例)与某个基因组区域的所有SNP位点无显著相关;
第二种设想 H1/H2: 表型1(GWAS)或表型2(以eQTL为例)与某个基因组区域的SNP位点显著相关;
第三种设想 H3: 表型1(GWAS)和 表型2 (以eQTL为例)与某个基因组区域的SNP位点显著相关,但由不同的因果变异位点驱动;
第四种设想 H4: 表型1(GWAS)和 表型2 (以eQTL为例)与某个基因组区域的SNP位点显著相关,且由同一个因果变异位点驱动;
2#coloc包分析的区域不能是整个基因组或者一整条染色体,必须是一个区域。并且要包含该区域的所有位点,不能用MAF或其他方式进行筛选;
如何确定选择区域。这里我们借用冻羊的小屋教程内容,可以参考的选择区域如下:
3#leadSNP是指与表型关联最强的SNP,通常是指可能和表型相关的关键位点。冻羊的教程中是采用p值最小的SNP作为leadSNP。我这里采用的SMR分析结果中的topSNP。
4#这里利用SMR里面所描述的方法,寻找目标基因topSNP 1000kb范围内的SNP。Linux中的命令行#双变量var1和var2,通过一个数组的方式,一一对应变化,适应于批量获取多个topSNP目标区域的所有snp。
- var1=(rs2233823
- rs1966494
- rs35675666)
- var2=(ENSG00000112667
- ENSG00000113369
- ENSG00000116288)
- length=${#var1[@]}
- for ((i=0; i<$length; i++))
- do
- ./smr_v1.3.1_linux_x86_64_static --bfile g1000_eur --beqtl-summary ./cis-eQTL/cis-eQTLs-full_eQTLGen_AF_incl_nr_formatted_20191212.new.txt_besd-dense --query 1 --snp ${var1[$i]} --snp-wind 1000 --gene ${var2[$i]} --out ./cancer/eqtl-coloc/${var1[$i]}_${var2[$i]}
- done
- ;
5#topSNP的qtl数据已获取,之后与GWAS数据合并,在R中进行,for循环merge一下。
- library(dplyr)
- library(data.table)
- breast_cancer = fread('./../../GWAS summary data/Breast cancer/Luminal_B.txt',header = T,sep = '\t')
- breast_cancer = breast_cancer[,-8]
- filest = list('rs12506352_ENSG00000038219.txt',
- 'rs17118313_ENSG00000139613.txt'
- )
- filest=as.character(filest)
- filelent <- length(filest)
- newdatat <- c()
- for(i in 1:length(filest)){
- temp <-read.table(filest[i],header = T,sep = "\t")
- newdatat = merge(temp,breast_cancer,by='SNP',all=T)
- newdatat = na.omit(newdatat)
- write.table(x= newdatat,file = filest[i],quote = F,sep = '\t',row.names = F)
- }

6#共定位分析
- library(coloc)
- library(tidyr)
- filest <- list.files(pattern = "*.txt")
- filelent <- length(filest)
- files <- gsub("\\.txt", "", filest)
- for(i in 1:length(filest)){
- data = fread(filest[i],header = T,sep = '\t')
- data = data %>% distinct(SNP,.keep_all = TRUE)
- data = select(data,c('SNP','Freq','b','SE','beta','se'))
- colnames(data) = c('snp','MAF','beta_eqtl','se_eqtl','beta_gwas','se_gwas')
- data = mutate(data,N=1387)
- data =mutate(data,varbeta_eqtl = se_eqtl*se_eqtl)
- data = mutate(data,varbeta_gwas = se_gwas*se_gwas)
- data1 = data[,c('beta_eqtl','varbeta_eqtl','snp','MAF','N')]
- data2 = data[,c('beta_gwas','varbeta_gwas','snp')]
- colnames(data2)=c("beta","varbeta","snp")
- colnames(data1)=c("beta","varbeta","snp","MAF","N")
- data1 = as.list(data1)
- data2 = as.list(data2)
- data2$type = "cc"
- data1$type = "quant"
- res = coloc.abf(data1,data2)
- res_results = res$results
- res_summary = res$summary
- write.table(res_results,file = files[i],row.names = F,sep = '\t',quote = F)
- write.table(res_summary,file = filest[i],row.names = F,sep = '\t',quote = F)
- }

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。