赞
踩
在细胞轨迹分析中,仅需要准备三个数据即可
准备三个数据的主要原因是将seurat对象装换为monocle对象
创建好monocle对象后仅需要走差异基因流程和排序流程就可以进行细胞轨迹追踪
# clear if(T){ rm(list=ls()) setwd("C:\\Users\\yuansh\\Desktop") dir() } # options if(T){ options(stringsAsFactors = F) options(as.is = T) } # packages if(T){ library(stringr) library(magrittr) library(ggplot2) library(Seurat) library(devtools) library(clustree) library(tidyverse) library(gridExtra) library(ggridges) library(ggplot2) library(ggExtra) library(monocle) } # myfunctions if(T){ myhead = function(x){ if(dim(x)[2]>5){return(x[1:5,1:5])} else(head(x)) } myscale = function(x, scale_factor){ (x / sum(x)) *scale_factor } } # set work-dir setwd('C:\\Users\\yuansh\\Desktop\\DLTumorCell\\code') # main # 导入数据并预处理 load("../processfile/S03_Merged_main_filtered_with_neo_osi.RData") sce = main_tiss_filtered1 rm(main_tiss_filtered1) if(T){ sce@meta.data$sample_name <- as.character(sce@meta.data$sample_name) sample_name <- as.character(sce@meta.data$sample_name) # Make table tab.1 <- table(sce@meta.data$sample_name) # Which samples have less than 10 cells samples.keep <- names(which(tab.1 > 10)) metadata_keep <- filter(sce@meta.data, sample_name %in% samples.keep) # Subset Seurat object sce <- subset(sce, cells=as.character(metadata_keep$cell_id)) sce table(sce@meta.data$sample_name) table(sce@meta.data$patient_id) #save(sce, file=paste(dir,"S03_subset_preprocessed.RData", sep="")) }# 剔除组织样本中少于10个细胞的组织 sce # 导入注释信息 pre.label = read.csv("../processfile/train-predict.csv",row.names = 1) # 这里注意一下,轨迹追中只需要上皮细胞 use.cells <- row.names(pre.label) sce <-subset(sce, cells=use.cells) # 从原始数据中获取非免疫细胞表达矩阵 pre.label = pre.label$PredictLabel %>% as.data.frame() %>% set_rownames(rownames(pre.label)) sce = AddMetaData(sce,metadata = pre.label,col.name = 'prelabel') # 轨迹追踪准备数据 # 1.原始count数据 count = sce@assays$RNA@counts %>% as.matrix() # 2.基因信息 gene_ann <- data.frame( gene_short_name = row.names(count), row.names = row.names(count) ) # 3.样本注释信息 sample_ann = data.frame( group = sce@meta.data$prelabel, row.names = rownames(sce@meta.data) ) # 初始化配置文件 pd <- new("AnnotatedDataFrame", data=sample_ann) fd <- new("AnnotatedDataFrame", data=gene_ann) # 穿件对象 cds <- newCellDataSet( count, phenoData = pd, featureData =fd, expressionFamily = negbinomial.size(), lowerDetectionLimit=1) cds # 初始化对象信息 cds <- estimateSizeFactors(cds) cds <- estimateDispersions(cds) # 识别差异基因 # 注意这一步需要很长的时间 if(F){ disp_table <- dispersionTable(cds) unsup_clustering_genes <- subset(disp_table, mean_expression >= 0.1) cds <- setOrderingFilter(cds, unsup_clustering_genes$gene_id) dim(cds) diff_test_res <- differentialGeneTest(cds,fullModelFormulaStr = "~group") # 哪怕仅仅是65个单细胞,monocle的这个differentialGeneTest函数运行也不快。 ordering_genes <- row.names (subset(diff_test_res, qval < 0.01)) save(ordering_genes,file = '../processfile/ordering_genes_by_Biological_Condition_high.Rdata') } #导入轨迹分析需要的差异基因 # load(file = 'ordering_genes_by_Biological_Condition_high.Rdata') cds <- setOrderingFilter(cds, ordering_genes) plot_ordering_genes(cds) # 然后降维 cds <- reduceDimension(cds, max_components = 2, method = 'DDRTree') # 降维是为了更好的展示数据。 # 降维有很多种方法, 不同方法的最后展示的图都不太一样, 其中“DDRTree”是Monocle2使用的默认方法 # 接着对细胞进行排序 cds <- orderCells(cds) ## 最后两个可视化函数 plot_cell_trajectory(cds, color_by = "prelabel") ggsave('../plot/normal-tumor-trace.pdf',width = 8,height = 8) # 可以很明显看到细胞的发育轨迹 # 还有几个其它可视化函数,我们明天介绍 plot_cell_trajectory(cds, color_by = "State") plot_cell_trajectory(cds, color_by = "Pseudotime") plot_cell_trajectory(cds, color_by = "State") + facet_wrap(~State, nrow = 1)
流程非常非常简单,感觉没什么好说的如果有不懂的可以通过以下的方式联系我
Best Regards,
Yuan.SH
---------------------------------------
School of Basic Medical Sciences,
Fujian Medical University,
Fuzhou, Fujian, China.
please contact with me via the following ways:
(a) e-mail :yuansh3354@163.com
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。