目的:使用bulk 数据,查看HeLa 双胸苷阻断法 细胞同步化 释放 [0, 3, 4.5, 6, 9, 10.5, 12, 15, 18, 19.5, 21, 22.5, 25.5, 30] 小时后 cell cycle 基因的表达情况。
1.结果
S phase
G2M phase
S + G2M phase
不方便看,横过来看:
2.代码
counts_dir="/home/wangjl/data/rsa/HeLa/"
# 1.load data----
## load raw from featureCounts ----
HeLa.raw=read.table( paste0(counts_dir, "counts/featureCounts_HeLa_CellCycle16Points_matrix.txt"),
skip = 1, row.names = 1, header = T)
gene.counts = HeLa.raw[, 6:ncol(HeLa.raw)] #col5 length
colnames(gene.counts) = sub("map.", "", colnames(gene.counts))
colnames(gene.counts) = sub("_Aligned.sortedByCoord.out.bam", "", colnames(gene.counts))
colnames(gene.counts)|>jsonlite::toJSON()
# ["SRR3535826","SRR3535828","SRR3535830","SRR3535832","SRR3535834","SRR3535835","SRR3535836","SRR3535837",
# "SRR3535838","SRR3535839","SRR3535840","SRR3535841","SRR3535842","SRR3535843","SRR3616961","SRR3616962"]
gene.counts[1:5, 1:2]
# rm all 0 rows
dim(gene.counts) #61209 16
gene.counts = gene.counts[ rowSums(gene.counts)>0, ]
dim(gene.counts) #42530 16
gene.counts[1:5, 1:10]
gene.length = HeLa.raw[rownames(gene.counts),'Length']
#2. normalize to TPM ----
gene.tpm=apply(gene.counts, 2, function(x){
x1=x/gene.length
x1/sum(x1)*1e6
})
dim(gene.tpm)
head(gene.tpm)
if(0){
write.table(gene.tpm, paste0(counts_dir, "counts/HeLa_cellcycle16points.gene.tpm.txt") )
write.table(gene.length, paste0(counts_dir, "counts/HeLa_cellcycle16points.gene.length.txt") )
write.table(gene.counts, paste0(counts_dir, "counts/HeLa_cellcycle16points.gene.counts.txt") )
}
# (1c) cycle genes from Seurat
tmp.genes=c(
Seurat::cc.genes.updated.2019$s.genes,
Seurat::cc.genes.updated.2019$g2m.genes
)
laply(cc.genes, length)
# (2) get tpm
setdiff(tmp.genes, rownames(gene.tpm))
dat.heatmap=gene.tpm[ intersect(rownames(gene.tpm), tmp.genes),]
# OR use all matrix
#dat.heatmap=gene.tpm #[ intersect(rownames(gene.tpm), tmp.genes),]
# rm all 0 rows
dat.heatmap=dat.heatmap[rowSums(dat.heatmap)>0,]
head(dat.heatmap[,1:5])
dim(dat.heatmap)
library(pheatmap) #https://www.jianshu.com/p/1c55ea64ff3f
# anno columns
annotation_col <- data.frame(
#type = gene.long$type,
phase = c("S", "G2", "G2", "G2", "M", "G1", "G1", "S", #1-8
"G2", "G2", "M", "M", "G1", "S", "G1", "G1"), #9-16
time=c(0, 3, 4.5, 6, 9, 10.5, 12, 15, 18, 19.5, 21, 22.5, 25.5, 30, 31, 32),
row.names = rownames(gene.long)
)
# set colors
ann_colors = list(
phase = c('G1'="#66C2A5", 'S'="#FC8D62", 'G2'="#8DA0CB", 'M'="deeppink")
#type = c('S'="#ed553b", 'ctrl'="#99b433")
)
pheatmap(
dat.heatmap[,1:14],
#log( dat.heatmap[,1:14] + 1 ),
#log(dat.heatmap[, 1:10] + 1),
# log(dat.heatmap[, c(16, 1:10, 11:15)] + 1),
border_color =NA, # "white",
color = colorRampPalette(c("navy", "white", "firebrick3"))(50), #自定义颜色
scale="row",
cluster_cols = F,
#cluster_rows = F,
annotation_col = annotation_col, #set anno for column
annotation_colors = ann_colors, #set colors
show_colnames = T,
#show_rownames = F,
#angle_col = 315,
#filename =paste0("tmp/HeLa_sync16Timepoints_pheatmap-2.pdf"), width=18.6, height=3.5,
main="HeLa after sync: 14 time points\n(Seurat::cc.genes)",
clustering_method = "ward.D2") #聚类方法