场景:把新的细胞比对到已经注释过的细胞集合上,获取映射后的细胞标签,UMP坐标。
准备:
- 一个分析好的单细胞图谱数据集,作为reference数据集。
- 一个新的单细胞counts矩阵,记为 query数据集。
主要分为两个步骤:1.获取分类标签,2.获取UMAP坐标。
1. R语言代码
library(Seurat)
#1. load data ====
pbmc=readRDS("~/data/scScripts/backup/data/pbmc3k_final.rds")
DimPlot(pbmc, label=T)
# 获取细胞子集:CD3很高的细胞
FeaturePlot(pbmc, features = c("CD3D", "CD3E", "CD3G"))
VlnPlot(pbmc, features = c("CD3D", "CD3E", "CD3G"), pt.size = 0)
small0=subset(pbmc, CD3D>3)
pbmc #2638 #as refer, 已知细胞图谱
small0 #228 #as query, 未知细胞
# 未知数据,没有标签
small=CreateSeuratObject(small0@assays$RNA@counts)
#标准化
small=NormalizeData(small)
#2. find anchor ----
anchors <- FindTransferAnchors(reference = pbmc, query = small,
dims = 1:10, reference.reduction = "pca")
# Retained 1070 anchors
# get predicted ID
predictions <- TransferData(anchorset = anchors, refdata = pbmc$seurat_clusters,
dims = 1:10)
# add metadata
small <- AddMetaData(small, metadata = predictions)
small
table(small$predicted.id)
# 0 1 4
#104 58 66
#DimPlot(small, label=T)
#3. UMAP projection----
pbmc2 <- RunUMAP(pbmc, dims = 1:10, reduction = "pca", return.model = TRUE)
pbmc2$celltype=Idents(pbmc2)
DimPlot(pbmc2, label=T)
small <- MapQuery(anchorset = anchors, reference = pbmc2, query = small,
#refdata = list(seurat_clusters = "seurat_clusters"), #可以映射多个标签
refdata = list(seurat_clusters = "seurat_clusters", celltype="celltype"),
reference.reduction = "pca",
reduction.model = "umap")
# MapQuery() is a wrapper around three functions: TransferData(), IntegrateEmbeddings(), and ProjectUMAP().
p1 <- DimPlot(pbmc2, reduction = "umap", group.by = "seurat_clusters", label = TRUE, label.size = 3,
repel = TRUE) + NoLegend() + ggtitle("Reference")
p2 <- DimPlot(small, reduction = "ref.umap", group.by = "predicted.id", label = TRUE,
label.size = 3, repel = TRUE) + NoLegend() + ggtitle("Query of small")+
xlim(-7,12)+ ylim(-7,12)
p1 + p2
q1 <- DimPlot(pbmc2, reduction = "umap", group.by = "celltype", label = TRUE, label.size = 3,
repel = TRUE) + NoLegend() + ggtitle("Reference")
q2 <- DimPlot(small, reduction = "ref.umap", group.by = "predicted.celltype", label = TRUE,
label.size = 3, repel = TRUE) + NoLegend() + ggtitle("Query of small")+
xlim(-7,12)+ ylim(-7,12)
q1+q2
#结果的一致性比较
rs=table(small0$seurat_clusters, small$predicted.id)
rs[which(rowSums(rs)!=0),]
# 0 1 4
#0 97 1 0
#1 5 56 0
#4 2 1 64
#6 0 0 2
# 左侧是原始标签,顶部是预测的标签
2. 效果图
图2:
这样,就把未知细胞映射到已经注释好的细胞图谱中了。
除了映射 seurat_cluster 外,还能映射 celltype等自定义metadata。
refer
- https://satijalab.org/seurat/articles/multimodal_reference_mapping.html
- https://satijalab.org/seurat/articles/covid_sctmapping