library(AUCell)
library(Seurat)
library(SeuratData)
library(tidyverse)
library(patchwork)
celltype.markers <- FindAllMarkers(scobj, assay = "RNA", only.pos = T)
signatures <- celltype.markers %>%
filter(p_val_adj < 0.01) %>%
arrange(desc(avg_log2FC)) %>%
group_by(cluster) %>%
slice(seq(n() * .1))
signatures <- split(signatures$gene, signatures$cluster)
### AUCell评分并分配活性阈值
exprMatrix <- scobj@assays$RNA@data
# 表达矩阵转rank矩阵
cells_rankings <- AUCell_buildRankings(exprMatrix, useNames = FALSE)
# min 1% 5% 10% 50% 100%
# 212.00 325.00 434.95 539.90 816.00 3400.00
#以上数据意味着排名3400以后的基因没有价值
# 参数aucMaxRank的设置最好参考上面的结果,此处先不调整默认参数
cells_AUC <- AUCell_calcAUC(signatures, cells_rankings,
nCores = 1,
aucMaxRank = nrow(cells_rankings) * 0.05)
# 给每个基因集分配活性阈值
#cells_assignment <- AUCell_exploreThresholds(cells_AUC, plotHist=TRUE, assign=TRUE)
## 提取AUC, 可视化作图
sigmat <- getAUC(cells_AUC)
sigmat <- t(as.data.frame(sigmat))
sigmat <- sigmat[rownames(scobj@meta.data),]
colnames(sigmat) <- paste0(colnames(sigmat), "_AUCell")
scobj@meta.data <- cbind(scobj@meta.data, sigmat)
marker_genes <- colnames(scobj@meta.data)[grep("_AUCell", colnames(scobj@meta.data))]
FeaturePlot(scobj, features = marker_genes, order = T, ncol = 3)
DimPlot(scobj, label = T)
## ssGSEA(慢)
library(GSVA)
exprMatrix <- scobj@assays$RNA@data
sigmat <- gsva(exprMatrix, signatures, method = "ssgsea", parallel.sz = 6)
sigmat <- t(as.data.frame(sigmat))
sigmat <- sigmat[rownames(scobj@meta.data),]
colnames(sigmat) <- paste0(colnames(sigmat), "_ssGSEA")
scobj@meta.data <- cbind(scobj@meta.data, sigmat)
marker_genes <- colnames(scobj@meta.data)[grep("_ssGSEA", colnames(scobj@meta.data))]
FeaturePlot(scobj, features = marker_genes, order = T, ncol = 3)
## GSVA(十分慢)
exprMatrix <- scobj@assays$RNA@data
sigmat <- gsva(exprMatrix, signatures, method = "gsva", kcdf = "Gaussian", parallel.sz = 6)
sigmat <- t(as.data.frame(sigmat))
sigmat <- sigmat[rownames(scobj@meta.data),]
colnames(sigmat) <- paste0(colnames(sigmat), "_gsva")
scobj@meta.data <- cbind(scobj@meta.data, sigmat)
marker_genes <- colnames(scobj@meta.data)[grep("_gsva", colnames(scobj@meta.data))]
FeaturePlot(scobj, features = marker_genes, order = T, ncol = 3)
## AddModuleScore
scobj <- AddModuleScore(scobj, features = signatures, name = "seurat", assay = "RNA")
names(scobj@meta.data)[grep("seurat\\d", names(scobj@meta.data))] <- paste0(names(signatures), "_seurat")
marker_genes <- colnames(scobj@meta.data)[grep("_seurat", colnames(scobj@meta.data))]
FeaturePlot(scobj, features = marker_genes, order = T, ncol = 3)
## UCell
## 使用make.name 对features 的名称进行确认, 特殊符号会变成点
library(UCell)
scobj <- AddModuleScore_UCell(scobj, features = signatures, name = "_UCell")
marker_genes <- colnames(scobj@meta.data)[grep("_UCell", colnames(scobj@meta.data))]
FeaturePlot(scobj, features = marker_genes, order = T, ncol = 3)
### 相关性分析
metaNames = colnames(scobj@meta.data)
myMetaNames <- metaNames[grep("FCGR3A", metaNames)]
FeaturePlot(scobj, features = myMetaNames, order = T, ncol = 3)
### 两两相关性
M <- cor(scobj@meta.data[, myMetaNames])
library(corrplot)
corrplot(M, method = "circle")
corrplot(M, method = "pie")
### 两个因素相关性
FeatureScatter(scobj, feature1 = 'CD9', feature2 = 'CD3E')
FeatureScatter(scobj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
FeatureScatter(scobj, feature1 = "Naive.CD4.T_UCell", feature2 = "Memory.CD4.T_UCell")
FeatureScatter(scobj, feature1 = "FCGR3A+ Mono_AUCell", feature2 = "FCGR3A..Mono_UCell")
## 修改名称
original_name = "FCGR3A+ Mono_AUCell"
expected_name = "FCGR3A_Mono_AUCell"
scobj_new = scobj
location = grep(original_name, colnames(scobj_new@meta.data))
location = grep(original_name, colnames(scobj_new@meta.data), fixed = T)
colnames(scobj_new@meta.data)[location] = expected_name
FeatureScatter(scobj_new, feature1 = "FCGR3A_Mono_AUCell", feature2 = "FCGR3A..Mono_UCell")
library(Seurat)
ComputeModuleScore <- function(x, gene.sets, min.size = 20, batch.size = 500, cores = 1, ...) {
if (!is.list(gene.sets)) {
stop("'gene.sets' should be a list or data.frame!")
}
gene.sets <- gene.sets[sapply(gene.sets, length) >= min.size]
n.cells <- ncol(x)
batches <- floor((1:n.cells - 1) / batch.size)
batch.levels <- unique(batches)
aucell <- function(i) {
dge.tmp <- x[, batches == i]
cr <- AUCell::AUCell_buildRankings(dge.tmp, nCores = 1, plotStats = F, verbose = F)
auc <- AUCell::AUCell_calcAUC(gene.sets, cr, nCores = 1, verbose = F)
AUCell::getAUC(auc)
}
auc_scores <- parallel::mclapply(batch.levels, aucell, mc.cores = cores)
do.call(cbind, auc_scores)
}
regulons <- clusterProfiler::read.gmt("data/s4_post_scenic.regulons.gmt")
regulons.list <- split(regulons$gene, regulons$term)
names(regulons.list) <- sub("[0-9]+g", "\\+", names(regulons.list))
scobj <- ComputeModuleScore(scobj, gene.sets = regulons.list, min.size = 10, cores = 1)
### 使用新的降维数据进行umap
DefaultAssay(scobj) <- "AUCell"
scobj <- RunUMAP(scobj,
features = rownames(scobj),
metric = "correlation",
reduction.name = "umap_ras",
reduction.key = "umap_ras")
p1 <- DimPlot(scobj, reduction = "umap", group.by = "celltype") + ggsci::scale_color_d3("category20")
p2 <- DimPlot(scobj, reduction = "umap", group.by = "group")
p1 + p2
## ras-umap
p3 <- DimPlot(scobj, reduction = "umap_ras", group.by = "celltype") + ggsci::scale_color_d3("category20")
p4 <- DimPlot(scobj, reduction = "umap_ras", group.by = "group")
p3 + p4
library(patchwork)
(p1 + p2) / (p3 + p4)
VarDecompose <- function(data, meta.data, vd.vars, genes = "all", cores = -1) {
## check params
if (missing(data) ||
missing(meta.data) ||
missing(vd.vars)) {
stop("Must provide 'data', 'meta.data', and 'vd.vars'.")
}
if (is.null(colnames(meta.data)) || is.null(rownames(meta.data))) {
stop("The row and column names of 'meta.data' should be provided.")
}
if (is.null(colnames(data)) || is.null(rownames(data))) {
stop("The row and column names of 'data' should be provided.")
}
if (!all(rownames(data) == rownames(meta.data))) {
stop("The row names of 'data' and 'meta.data' should be matched.")
}
if (!all(vd.vars %in% colnames(meta.data))) {
vd.vars.404 <- setdiff(vd.vars, colnames(meta.data))
stop(paste("vd.vars:", vd.vars.404, "is(are) not found in 'meta.data'"))
}
if (length(genes) == 1) {
if (genes == "all") {
genes <- colnames(data)
} else {
stop("'genes' should be 'all' or a vector.")
}
} else {
genes.404 <- setdiff(genes, colnames(data))
if (length(genes.404) > 0) {
warning(paste(length(genes.404), "gene(s) are not found in 'data'."))
genes <- setdiff(genes, genes.404)
}
}
cores <- ifelse(cores > 0, cores, parallel::detectCores())
## prepare data for VD
vd.vars.str <- sapply(vd.vars, function(xx) sprintf("(1|%s)", xx))
modelFormulaStr <- paste("expression ~", paste(vd.vars.str, collapse = "+"))
data.use <- cbind(data[, genes], meta.data)
## exe VD
vd.res <- do.call(rbind, parallel::mclapply(genes, function(genename) {
data.model <- data.use[, c(vd.vars, genename)]
colnames(data.model) <- c(vd.vars, "expression")
tryCatch({
model <- suppressWarnings(lme4::lmer(stats::as.formula(modelFormulaStr), data = data.model, REML = TRUE, verbose = FALSE))
results <- as.data.frame(lme4::VarCorr(model))
rownames(results) <- results$grp
results <- results[c(vd.vars, "Residual"),]
frac.var <- results$vcov / sum(results$vcov)
res.tmp <- c("OK", frac.var)
},
error = function(e) {
print(e)
res.tmp <- c("FAIL", rep(-1, length(vd.vars) + 1))
})
names(res.tmp) <- c("status", vd.vars, "residual")
as.data.frame(as.list(res.tmp)) # return
}, mc.cores = cores)
)
rownames(vd.res) <- genes
vd.res %<>% as.data.frame()
vd.res <- vd.res %>% dplyr::mutate(gene = rownames(vd.res), .before = 1)
# vd.res <- vd.res %>% as.data.frame() %>% dplyr::mutate(gene = rownames(.), .before=1)
for (i in 3:ncol(vd.res)) {
vd.res[[i]] %<>% as.numeric()
}
return(vd.res)
}
vd.vars <- c("celltype", "group")
meta.data <- scobj@meta.data[, vd.vars]
ras.data = t(scobj@assays$AUCell@data)
vd.res <- VarDecompose(data = ras.data,
meta.data = meta.data,
vd.vars = vd.vars, cores = 1)
## 作图检测
DefaultAssay(scobj) <- "AUCell"
FeaturePlot(scobj, features = "IRF9(+)", split.by = "group", reduction = "umap")
FeaturePlot(scobj, features = "STAT1(+)", split.by = "group", reduction = "umap")
FeaturePlot(scobj, features = "KLF4(+)", split.by = "group", reduction = "umap")
FeaturePlot(scobj, features = "IRF7(+)", split.by = "group", reduction = "umap")
FeaturePlot(scobj, features = "BACH1(+)", reduction = "umap")
FeaturePlot(scobj, features = "IRF7(+)", reduction = "umap")
## 探索
library(dplyr)
dput(vd.res %>%
arrange(desc(group)) %>%
pull(gene) %>%
head(60))
module <- c("IRF9(+)", "ELF1(+)", "IRF1(+)", "STAT1(+)", "ETV7(+)", "NFE2L3(+)",
"STAT2(+)", "IRF2(+)", "IRF7(+)", "IRF8(+)", "BATF3(+)", "NFYC(+)",
"MAX(+)", "ELK3(+)", "ZNF580(+)", "JUND(+)", "ZSCAN16(+)", "CEBPG(+)",
"JUN(+)", "HOXB2(+)", "THAP11(+)", "TCF7L2(+)", "NR3C1(+)", "CEBPD(+)",
"ZNF76(+)", "CREB1(+)", "TAF1(+)", "HDX(+)", "ETV6(+)", "FOSB(+)",
"ETS1(+)", "ZFHX3(+)", "ATF6B(+)", "PHF20(+)", "TFEC(+)", "ZBTB25(+)",
"SREBF2(+)", "ZBTB7A(+)", "BCLAF1(+)", "ATF4(+)", "TFEB(+)",
"CLOCK(+)", "FLI1(+)", "ATF5(+)", "FOSL2(+)", "YY1(+)", "ATF3(+)",
"POU2F2(+)", "KLF13(+)", "STAT6(+)", "TFE3(+)", "NFE2L2(+)",
"HMGA1(+)", "ELF2(+)", "ING4(+)", "RUNX1(+)", "NFIC(+)", "ELK1(+)",
"MAFB(+)", "NFKB2(+)")
scobj <- RunUMAP(scobj,
features = setdiff(rownames(scobj), module),
metric = "correlation",
reduction.name = "umap_ras2",
reduction.key = "umap_ras2")
p5 <- DimPlot(scobj, reduction = "umap_ras2", group.by = "celltype") + ggsci::scale_color_d3("category20")
p6 <- DimPlot(scobj, reduction = "umap_ras2", group.by = "group")
p5 + p6
### one more time
dput(vd.res %>%
arrange(desc(celltype)) %>%
pull(gene) %>%
head(80))
module <- c("KLF4(+)", "CEBPB(+)", "RXRA(+)", "BACH1(+)", "JUNB(+)", "VDR(+)",
"SPI1(+)", "FOS(+)", "ETS2(+)", "NFKB1(+)", "MAFB(+)", "IRF5(+)",
"ATF5(+)", "CREM(+)", "ATF3(+)", "MITF(+)", "NFE2L2(+)", "SPIB(+)",
"TCF4(+)", "TFEC(+)", "CEBPD(+)", "ZNF467(+)", "CUX1(+)", "ETV6(+)",
"GFI1(+)", "ETS1(+)", "TCF7L2(+)", "MYC(+)", "HMGA1(+)", "EOMES(+)",
"REL(+)", "BCLAF1(+)", "TBX21(+)", "HES1(+)", "NFATC2(+)", "NR3C1(+)",
"ATF4(+)", "ATF1(+)", "GATA3(+)", "LEF1(+)", "GABPB1(+)", "ELF4(+)",
"YY1(+)", "FLI1(+)", "RFX5(+)", "IKZF1(+)", "HOXB2(+)", "PPARG(+)",
"NFKB2(+)", "KLF2(+)", "RUNX3(+)", "ATF2(+)", "RELB(+)", "THAP11(+)",
"ARID3A(+)", "JUN(+)", "ZBTB25(+)", "TFE3(+)", "ELK4(+)", "ELK3(+)",
"ELF2(+)", "XBP1(+)", "KLF13(+)", "TFEB(+)", "HOXB4(+)", "FOSL2(+)",
"MAFF(+)", "CEBPG(+)", "THAP1(+)", "RELA(+)", "IRF8(+)", "ETV3(+)",
"IRF7(+)", "FOSB(+)", "ZBTB7A(+)", "PHF20(+)", "BATF3(+)", "MAX(+)",
"ZBTB7B(+)", "TFAP4(+)")
scobj <- RunUMAP(scobj,
features = setdiff(rownames(scobj), module),
metric = "correlation",
reduction.name = "umap_ras3",
reduction.key = "umap_ras3")
p7 <- DimPlot(scobj, reduction = "umap_ras3", group.by = "celltype") + ggsci::scale_color_d3("category20")
p8 <- DimPlot(scobj, reduction = "umap_ras3", group.by = "group")
p7 + p8
library(patchwork)
(p1 | p2 | p3 | p4) / (p5 | p6 | p7 | p8)