指定顺序展示富集分析的term
调整热图的label角度
h1=ggheatmap(
dat[cg1,],
cluster_rows = T, #是否对行聚类
cluster_cols = T, #是否对列聚类
tree_height_rows = 0.28, #行聚类树高度
tree_height_cols = 0.1, #列聚类树高度
annotation_cols = group_list, #为列添加分组
annotation_color = col,#分组颜色
text_show_rows = FALSE,
# text_show_rows = mark #显示指定的基因标签
scale = "row" #选择对row/column/none进行归一化
) %>% ggheatmap_theme(1,
theme =list(
theme(axis.text.x = element_text(angle = 90,face = "bold",size = 10),
axis.text.y = element_text(colour = "red",face = "bold")),
theme(legend.title = element_text(face = "bold"))
))
特殊字符正则匹配
colnames(exp)
dat=exp[,c(grep(pattern = "Normal",colnames(exp),value = T),
grep(pattern = "LCT\\(14\\+14\\)-MSC-D14",colnames(exp),value = T),
grep(pattern = "LCT\\(14\\+28\\)-MSC-D28",colnames(exp),value = TRUE))
]
富集分析term缩短
p2=ggplot(df, aes(x = Cluster, y = Description)) +
geom_point(aes(color = p.adjust, size = Count)) +
scale_color_gradient(low = "red", high = "blue") +
xlab("Cluster") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, face = "bold", size = 10)) +
# Edit legends
guides(
# Reverse color order (higher value on top)
color = guide_colorbar(reverse = TRUE)
) +
guides(
size = guide_legend(order = 2),
color = guide_colorbar(order = 1)
)+
scale_y_discrete(labels=function(y) str_wrap(y,width = 50))
.libPaths(c("/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2","/home/data/t040413/R/yll/usr/local/lib/R/site-library", "/usr/local/lib/R/library"))
library(dplyr)
library(ggplot2)
#library(tinyarray)
library(edgeR)
library(patchwork)
#devtools::install_github("XiaoLuo-boy/ggheatmap")
library(ggheatmap)
library(tidyr)
library(DESeq2)
library(stringr)
getwd()
dir.create("/home/data/t040413/wpx/wpx_transcriptomics_proteinomics_Metabolomics/transcriptomics/4_d28_vs_d14_treatment_effective_2")
setwd("/home/data/t040413/wpx/wpx_transcriptomics_proteinomics_Metabolomics/transcriptomics/4_d28_vs_d14_treatment_effective_2")
getwd()
load("/home/data/t040413/wpx/wpx_transcriptomics_proteinomics_Metabolomics/transcriptomics/step1_get_matrix_dd.Rdata")
load("/home/data/t040413/wpx/wpx_transcriptomics_proteinomics_Metabolomics/transcriptomics/step1_get_matrix.Rdata")
load("/home/data/t040413/wpx/wpx_transcriptomics_proteinomics_Metabolomics/transcriptomics/1_model_success_2/exp_orders.rds")
colnames(exp)=a
exp=as.matrix(exp)
1 #提取想要比较的组别
ANSWER="LCT(14+14)-MSC-D14 VS Normal control"
{ #LCT(14+14)-MSC-D14 VS Normal control
print(getwd())
print(colnames(exp))
dat=exp[,c(grep(pattern = "Normal",colnames(exp),value = T),
# grep(pattern = "P.*D14",colnames(exp),value = T),
grep(pattern = "LCT\\(14\\+14\\)-MSC-D14",colnames(exp),value = TRUE))
]
dat = log2(cpm(dat)+1) # dat 为使用count数据转化而成的cpm数据,使用这个数据画图,而count数据只是用来做差异分析
{ # contro+shift+c 选中
# # 计算每六列的平均值
# new_matrix <- matrix(0, nrow = nrow(dat), ncol = ceiling(ncol(dat) / 6))
# colnames(new_matrix) <- c("Normal control","LCT(14+0)-NT-D0")
# rownames(new_matrix) <- rownames(dat)
#
# for (i in 1:ncol(new_matrix)) {
# start_col <- (i - 1) * 6 + 1
# end_col <- min(start_col + 5, ncol(dat))
# new_matrix[, i] <- rowMeans(dat[, start_col:end_col], na.rm = TRUE)
# }
# head(new_matrix)
# dat<-new_matrix
}
boxplot(dat,las=2)
dat[dat>15]=15 #可视化 抹去异常值
group_list=data.frame(
row.names = colnames(dat),
group=rep(c("Normal control","LCT(14+14)-MSC-D14"),each=6)
)
group_list
group_for_pca=factor(group_list$group,levels = c("Normal control","LCT(14+14)-MSC-D14"))
#设置组别颜色
group_col2 <- c("#bfb2d5","#f1937f")
names(group_col2) <- c("Normal control","LCT(14+14)-MSC-D14") #LCT(14+0)-NT-D0 VS normal control
col=list(group=group_col2)
col
pca.plot = draw_pca(dat,group_list = group_for_pca);pca.plot
resultsNames(dds)
#res <- results(dds, name="condition_LCT.14.14..MSC.D14_vs_LCT.14.14..NT.D14")
res <- results(dds, contrast = c("condition", "LCT(14+14)-MSC-D14", "Normal control")) #因子名称 谁比上谁
head(res);print("----");head(ids);dim(ids)
{
res$gene_symbol=ids[ids$symbol %in% rownames(res),]$symbol
head(res);dim(res)
DEG1 <- as.data.frame(res)
DEG1 <- DEG1[order(DEG1$pvalue),]
DEG1 = na.omit(DEG1)
head(DEG1);dim(DEG1)
#添加change列标记基因上调下调
logFC_t = 1
pvalue_t = 0.05
k1 = (DEG1$pvalue < pvalue_t)&(DEG1$log2FoldChange < -logFC_t);table(k1)
k2 = (DEG1$pvalue < pvalue_t)&(DEG1$log2FoldChange > logFC_t);table(k2)
DEG1$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))
table(DEG1$change);head(DEG1)
print(paste("下面是padj<0.05----------:"))
print(paste( table((DEG1$padj < pvalue_t)&(DEG1$log2FoldChange < -logFC_t)),
sep = "___",
table((DEG1$padj < pvalue_t)&(DEG1$log2FoldChange > logFC_t))
))
# print(paste(group_name,sep = "_---------:-----_",table(DEG1$change)))
print(table(DEG1$change))
#draw_heatmap(log2(cpm(exp)+1) [cg1,],Group,n_cutoff = 2,legend = TRUE, annotation_legend =TRUE)
cg1 = rownames(DEG1)[DEG1$change !="NOT"]
}
# h1 = draw_heatmap(dat[cg1,],group_list = group_list,n_cutoff = 2,legend = TRUE, annotation_legend =TRUE)
#https://mp.weixin.qq.com/s/Yqs6cmtVVHJjsdKXjj3srw
#热图
{
h1=ggheatmap(
dat[cg1,],
cluster_rows = T, #是否对行聚类
cluster_cols = T, #是否对列聚类
tree_height_rows = 0.28, #行聚类树高度
tree_height_cols = 0.1, #列聚类树高度
annotation_cols = group_list, #为列添加分组
annotation_color = col,#分组颜色
text_show_rows = FALSE,
# text_show_rows = mark #显示指定的基因标签
scale = "row" #选择对row/column/none进行归一化
) %>% ggheatmap_theme(1,
theme =list(
theme(axis.text.x = element_text(angle = 90,face = "bold",size = 10),
axis.text.y = element_text(colour = "red",face = "bold")),
theme(legend.title = element_text(face = "bold"))
))
}
h1
if (F) {
ggheatmap(
dat[cg1,],hclust_method = "average",
cluster_rows = T, #是否对行聚类
cluster_cols = T, #是否对列聚类
tree_height_rows = 0.28, #行聚类树高度
tree_height_cols = 0.1, #列聚类树高度
annotation_cols = group_list, #为列添加分组
annotation_color = col,#分组颜色
text_show_rows = FALSE,
# text_show_rows = mark #显示指定的基因标签
scale = "row" #选择对row/column/none进行归一化
)
}
#火山图
v1 = draw_volcano(DEG1,logFC_cutoff = logFC_t, #adjust = TRUE,
xlab.package = FALSE)+xlab("log2FoldChange")+
scale_x_continuous(limits = c(-3, 3), breaks = seq(-3, 3, by = 1))+
# coord_cartesian(ylim = c(0, 10)) +
scale_y_continuous(expand = expansion(add = c(2, 0)),limits = c(0, 10), breaks = seq(0, 40, by = 2))
v1
#(h1 ) / (v1) +plot_layout(guides = 'collect') &theme(legend.position = "none")
head(DEG1)
table(DEG1$change)
DEG1$group=ANSWER
#plot
{
pdf(paste(gsub("[[:punct:]]", " ", paste(ANSWER,"_heatmap.pdf",sep = "_")),".pdf")
,width = 12,height = 7)
print(h1)
dev.off()
pdf(paste(gsub("[[:punct:]]", " ", paste(ANSWER,"_volcano.pdf",sep = "_")),".pdf"))
print(v1)
dev.off()
pdf(paste(gsub("[[:punct:]]", " ", paste(ANSWER,"_pca.plot.pdf",sep = "_")),".pdf"))
print(pca.plot)
dev.off()
pdf(paste(gsub("[[:punct:]]", " ", paste(ANSWER,"_pca_plot.pdf",sep = "_")),".pdf"))
print(pca.plot)
dev.off()
DEG1$gene=rownames(DEG1)
write.csv(DEG1,file = paste(ANSWER,"DEGS_.CSV"))
}
}
2.#提取想要比较的组别
ANSWER="LCT(14+28)-MSC-D28 VS Normal control"
{ #LCT(14+28)-MSC-D28 VS Normal control
print(colnames(exp))
dat=exp[,c(grep(pattern = "Normal",colnames(exp),value = T),
# grep(pattern = "P.*D14",colnames(exp),value = T),
grep(pattern = "LCT\\(14\\+28\\)-MSC-D28",colnames(exp),value = TRUE))
]
dat = log2(cpm(dat)+1) # dat 为使用count数据转化而成的cpm数据,使用这个数据画图,而count数据只是用来做差异分析
{ # contro+shift+c 选中
# # 计算每六列的平均值
# new_matrix <- matrix(0, nrow = nrow(dat), ncol = ceiling(ncol(dat) / 6))
# colnames(new_matrix) <- c("Normal control","LCT(14+0)-NT-D0")
# rownames(new_matrix) <- rownames(dat)
#
# for (i in 1:ncol(new_matrix)) {
# start_col <- (i - 1) * 6 + 1
# end_col <- min(start_col + 5, ncol(dat))
# new_matrix[, i] <- rowMeans(dat[, start_col:end_col], na.rm = TRUE)
# }
# head(new_matrix)
# dat<-new_matrix
}
boxplot(dat,las=2)
dat[dat>15]=15 #可视化 抹去异常值
group_list=data.frame(
row.names = colnames(dat),
group=rep(c("Normal control","LCT(14+28)-MSC-D28"),each=6)
)
group_list
group_for_pca=factor(group_list$group,levels = c("Normal control","LCT(14+28)-MSC-D28"))
#设置组别颜色
group_col2 <- c("#bfb2d5","#f1937f")
names(group_col2) <- c("Normal control","LCT(14+28)-MSC-D28") #LCT(14+0)-NT-D0 VS normal control
col=list(group=group_col2)
col
pca.plot = draw_pca(dat,group_list = group_for_pca);pca.plot
resultsNames(dds)
#res <- results(dds, name="condition_LCT.14.14..MSC.D14_vs_LCT.14.14..NT.D14")
res <- results(dds, contrast = c("condition", "LCT(14+28)-MSC-D28", "Normal control")) #因子名称 谁比上谁
head(res);print("----");head(ids);dim(ids)
{
res$gene_symbol=ids[ids$symbol %in% rownames(res),]$symbol
head(res);dim(res)
DEG1 <- as.data.frame(res)
DEG1 <- DEG1[order(DEG1$pvalue),]
DEG1 = na.omit(DEG1)
head(DEG1);dim(DEG1)
#添加change列标记基因上调下调
logFC_t = 1
pvalue_t = 0.05
k1 = (DEG1$pvalue < pvalue_t)&(DEG1$log2FoldChange < -logFC_t);table(k1)
k2 = (DEG1$pvalue < pvalue_t)&(DEG1$log2FoldChange > logFC_t);table(k2)
DEG1$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))
table(DEG1$change);head(DEG1)
print(paste("下面是padj<0.05----------:"))
print(paste( table((DEG1$padj < pvalue_t)&(DEG1$log2FoldChange < -logFC_t)),
sep = "___",
table((DEG1$padj < pvalue_t)&(DEG1$log2FoldChange > logFC_t))
))
# print(paste(group_name,sep = "_---------:-----_",table(DEG1$change)))
print(table(DEG1$change))
#draw_heatmap(log2(cpm(exp)+1) [cg1,],Group,n_cutoff = 2,legend = TRUE, annotation_legend =TRUE)
cg1 = rownames(DEG1)[DEG1$change !="NOT"]
}
# h1 = draw_heatmap(dat[cg1,],group_list = group_list,n_cutoff = 2,legend = TRUE, annotation_legend =TRUE)
#https://mp.weixin.qq.com/s/Yqs6cmtVVHJjsdKXjj3srw
#热图
{
h1=ggheatmap(
dat[cg1,],
cluster_rows = T, #是否对行聚类
cluster_cols = T, #是否对列聚类
tree_height_rows = 0.28, #行聚类树高度
tree_height_cols = 0.1, #列聚类树高度
annotation_cols = group_list, #为列添加分组
annotation_color = col,#分组颜色
text_show_rows = FALSE,
# text_show_rows = mark #显示指定的基因标签
scale = "row" #选择对row/column/none进行归一化
) %>% ggheatmap_theme(1,
theme =list(
theme(axis.text.x = element_text(angle = 90,face = "bold",size = 10),
axis.text.y = element_text(colour = "red",face = "bold")),
theme(legend.title = element_text(face = "bold"))
))
}
h1
if (F) {
ggheatmap(
dat[cg1,],hclust_method = "average",
cluster_rows = T, #是否对行聚类
cluster_cols = T, #是否对列聚类
tree_height_rows = 0.28, #行聚类树高度
tree_height_cols = 0.1, #列聚类树高度
annotation_cols = group_list, #为列添加分组
annotation_color = col,#分组颜色
text_show_rows = FALSE,
# text_show_rows = mark #显示指定的基因标签
scale = "row" #选择对row/column/none进行归一化
)
}
#火山图
v1 = draw_volcano(DEG1,logFC_cutoff = logFC_t, #adjust = TRUE,
xlab.package = FALSE)+xlab("log2FoldChange")+
scale_x_continuous(limits = c(-3, 3), breaks = seq(-3, 3, by = 1))+
# coord_cartesian(ylim = c(0, 10)) +
scale_y_continuous(expand = expansion(add = c(2, 0)),limits = c(0, 10), breaks = seq(0, 40, by = 2))
v1
#(h1 ) / (v1) +plot_layout(guides = 'collect') &theme(legend.position = "none")
head(DEG1)
table(DEG1$change)
DEG1$group=ANSWER
#plot
{
pdf(paste(gsub("[[:punct:]]", " ", paste(ANSWER,"_heatmap.pdf",sep = "_")),".pdf")
,width = 12,height = 7)
print(h1)
dev.off()
pdf(paste(gsub("[[:punct:]]", " ", paste(ANSWER,"_volcano.pdf",sep = "_")),".pdf"))
print(v1)
dev.off()
pdf(paste(gsub("[[:punct:]]", " ", paste(ANSWER,"_pca.plot.pdf",sep = "_")),".pdf"))
print(pca.plot)
dev.off()
pdf(paste(gsub("[[:punct:]]", " ", paste(ANSWER,"_pca_plot.pdf",sep = "_")),".pdf"))
print(pca.plot)
dev.off()
DEG1$gene=rownames(DEG1)
write.csv(DEG1,file = paste(ANSWER,"DEGS_.CSV"))
}
}
#--------------------------------------______________韦恩图------------
#安装加载VennDiagram包;
#install.packages("VennDiagram")
#加载VennDiagram包;
library(VennDiagram)
getwd()
setwd("/home/data/t040413/wpx/wpx_transcriptomics_proteinomics_Metabolomics/transcriptomics/4_d28_vs_d14_treatment_effective_2")
path_degs=list.files(all.files = T,full.names = T,pattern = ".CSV")
path_degs
degs_list <- lapply(path_degs, read.csv)
head(degs_list)
names(degs_list)=path_degs
head(degs_list)
# 使用do.call和rbind将它们合并
merged_df <- do.call(rbind, degs_list)
head(merged_df)
table(merged_df$group)
table(merged_df$change)
#---------------------------------------====================------pie plot--------=饼图
if (T) {
{
head(merged_df)
# Calculate the percentage of UP and DOWN changes within each group
group_percentages <- merged_df[merged_df$change!="NOT",] %>%
group_by(group, change) %>%
dplyr:: summarize(count = n()) %>%
ungroup() %>%
group_by(group) %>%
mutate(percentage = count / sum(count) * 100) %>%
mutate(percent = ifelse(is.na(percentage), 0, percentage),
percent_formatted = paste0(round(percent, 2), "%"))
# mutate(text_y = cumsum(as.numeric(percentage)) - as.numeric(percentage) / 2)
group_percentages
# Plot the pie chart
library(ggrepel)
p5= ggplot(group_percentages, aes(x = "", y = percentage, fill = change)) +
# geom_bar(stat = "identity", width = 1) +
geom_col()+
coord_polar(theta = "y") +
facet_wrap(~group) +
theme_void() +
labs(fill = "Change")
ggsave(plot = p5,filename = "pie_plot_nolabel.pdf",width = 7,height = 7,limitsize = FALSE)
openxlsx::write.xlsx(group_percentages,file = "pie_plot_data_group_percentages.xlsx")
}
{
group_percentages <- merged_df[merged_df$change!="NOT",] %>%
group_by(group, change) %>%
dplyr:: summarize(count = n()) %>%
ungroup() %>%
group_by(group) %>%
mutate(percentage = count / sum(count) * 100) %>%
mutate(percent = ifelse(is.na(percentage), 0, percentage),
percent_formatted = paste0(round(percent, 2), "%")) %>%
mutate(text_y = cumsum(as.numeric(percentage)) - as.numeric(percentage) / 2)
# mutate(text_y = cumsum(as.numeric(percentage)) - as.numeric(percentage) / 2)
group_percentages
# Plot the pie chart
library(ggrepel)
p6= ggplot(group_percentages, aes(x = "", y = percentage, fill = change)) +
# geom_bar(stat = "identity", width = 1) +
geom_col()+
coord_polar(theta = "y") +
facet_wrap(~group) +
theme_void() +
labs(fill = "Change")+
geom_label_repel(aes(label = count, y = c(0.01,1,0.01,1)), show.legend = FALSE,
# box.padding = unit(1.8, "lines"),
segment.color = "grey50", segment.size = 0.5)
ggsave(plot = p6,filename = "pie_plot.pdf",width = 7,height = 7,limitsize = FALSE)
}
}
if (F) {
{
# Calculate the percentage of UP and DOWN changes within each group
group_percentages <- merged_df[merged_df$change != "NOT", ] %>%
group_by(group, change) %>%
summarize(count = n()) %>%
ungroup() %>%
group_by(group) %>%
mutate(percentage = count / sum(count) * 100) %>%
mutate(percent = ifelse(is.na(percentage), 0, percentage),
percent_formatted = paste0(round(percent, 2), "%")) %>%
mutate(text_y = cumsum(as.numeric(percentage)) - as.numeric(percentage) / 2)
group_percentages
# Plot the pie chart
ggplot(group_percentages, aes(x = "", y = percentage, fill = change)) +
geom_col() +
coord_polar(theta = "y") +
facet_wrap(~group) +
theme_void() +
labs(fill = "Change") +
geom_label_repel(aes(label = count, y = text_y), show.legend = FALSE,
box.padding = unit(1.8, "lines"),
segment.color = "grey50", segment.size = 0.5)
}
{
head(merged_df)
# Calculate the percentage of UP and DOWN changes within each group
group_percentages <- merged_df[merged_df$change!="NOT",] %>%
group_by(group, change) %>%
summarize(count = n()) %>%
ungroup() %>%
group_by(group) %>%
mutate(percentage = count / sum(count) * 100) %>%
mutate(percent = ifelse(is.na(percentage), 0, percentage),
percent_formatted = paste0(round(percent, 2), "%"))
# mutate(text_y = cumsum(as.numeric(percentage)) - as.numeric(percentage) / 2)
group_percentages
# Plot the pie chart
library(ggrepel)
p5= ggplot(group_percentages, aes(x = "", y = percentage, fill = change)) +
# geom_bar(stat = "identity", width = 1) +
geom_col()+
coord_polar(theta = "y") +
facet_wrap(~group) +
theme_void() +
labs(fill = "Change")
ggsave(plot = p5,filename = "pie_plot_nolabel.pdf")
}
}
#https://zhuanlan.zhihu.com/p/370916031
{
list_for_venn=list()
for ( each_degs in unique(merged_df$group)) {
list_for_venn[[each_degs]]=merged_df[merged_df$group==each_degs
&merged_df$change %in% c("DOWN","UP"),]$gene
}
str(list_for_venn)
#选择韦恩图的颜色
{
#载入个人收藏的wesanderson包颜色列表;
Chevalier1<-c("#355243","#fbca50","#c9d5d4","#baa28a")
FantasticFox1<-c("#d37a20","#dbcb09","#3a9cbc","#dd7208","#a30019")
Moonrise3<-c("#75cbdc","#f0a4af","#8a863a","#c2b479","#f8d068")
Cavalcanti1<-c("#ceab0d","#083215","#919562","#6f997a","#831e11")
Darjeeling2<-c("#e6c09e","#0d5888","#cb8b3e","#9cd6d6","#000000")
Darjeeling1<-c("#fb0007","#139177","#ed9e08","#f56f08","#4caecc")
Royal2<-c("#e4c9b2","#f1c2a5","#f49d98","#fcd68f","#629076")
IsleofDogs2<-c("#e4c9b2","#998273","#a6723d","#2b2523","#151213")
#设置颜色列表,因为最多只能画五个分组的韦恩图,这里最多设置5种颜色;
color_list = Darjeeling1
#依照指定的新分组数选取颜色列;
fill_colors = color_list[1:length(list_for_venn)]
fill_colors
}
#绘制韦恩图,并导出到工作目录;https://cran.r-project.org/web/packages/VennDiagram/VennDiagram.pdf
#VennDiagram::draw.triple.venn
{venn.diagram(list_for_venn,
# col="white",
fill=fill_colors,lwd=.05,
# cat.cex=0.1 ,#size of the category name
resolution = 700,
# cat.pos = c(-20, 0, 20),
# cat.dist = c(1.05, 1.05, 1.02),
cex = 0.5,
cat.cex = 0.3, #字体大小
margin=0.1, #页边距
filename="venn_.tiff",
width=1900,height=1900)
}
#继续以上述3个分组为例,组间交集元素获得
venn_list=list_for_venn
inter <- get.venn.partitions(venn_list) %>%as.data.frame()
# Convert each element in the list to a character vector and encode special characters
#inter<- lapply(inter[-c(5, 6)], function(x) iconv(as.character(x), "UTF-8", "ASCII", sub = ""))
# Modify special characters in the 'inter' object
inter
for (i in 1:nrow(inter)) inter[i,'values'] <- paste(inter[[i,'..values..']], collapse = ', ')
openxlsx::write.xlsx(as.data.frame(inter[-c(5, 6)]), 'venn_inter.xlsx')
#for (i in 1:nrow(inter)) inter[i,'values'] <- paste(inter[[i,'..values..']], collapse = ', ')
#inter <- lapply(inter[-c(5, 6)], function(x) gsub("[:punct:]", "-", as.character(x)))
# 修改inter对象中的斜杠
#inter <- lapply(inter[-c(5, 6)], function(x) gsub("\\\\", "-", as.character(x)))
# Modify backslashes in the specific column of the data frame
#inter[,3] <- gsub("\\\\", "-", inter[,3], fixed = TRUE)
#openxlsx::write.xlsx(inter[-c(5, 6)], 'venn_inter.xlsx', row.names = FALSE, sep = ',', quote = FALSE)
#交集的基因
cg_inter=inter[1,"..values.."]
cg_inter=cg_inter[[1]]
cg_inter
}
#-----------------------------------------------------==########=========------------pheatmap four groups‘ 热图##########################3
figurename="d28_d14_control"
#pheatmap
{
colnames(exp)
dat=exp[,c(grep(pattern = "Normal",colnames(exp),value = T),
grep(pattern = "LCT\\(14\\+14\\)-MSC-D14",colnames(exp),value = T),
grep(pattern = "LCT\\(14\\+28\\)-MSC-D28",colnames(exp),value = TRUE))
]
dim(dat)
dat = log2(cpm(dat)+1) # dat 为使用count数据转化而成的cpm数据,使用这个数据画图,而count数据只是用来做差异分析
boxplot(dat,las=2)
dat[dat>15]=15 #可视化 抹去异常值
#分组信息 phe_Data
group_list=data.frame(
row.names = colnames(dat),
group=rep(c("Normal control","LCT(14+14)-MSC-D14","LCT(14+28)-MSC-D28"),each=6)
)
group_list$group=factor(group_list$group,levels =c("Normal control","LCT(14+14)-MSC-D14","LCT(14+28)-MSC-D28") )
group_list
#设置组别颜色 https://zhuanlan.zhihu.com/p/366674882
group_col2 <- c("#bfb2d5","#f1937f","red")
names(group_col2) <- unique(group_list$group) #LCT(14+28)-NT-D28 VS normal control
col=list(group=group_col2)
col
library(pheatmap)
ph1=pheatmap(dat[cg_inter,], annotation_col = group_list,color = colorRampPalette(c("blue", "white", "red"))(50),
cutree_rows=2,cutree_cols=3, #切分成几份
cluster_cols = FALSE, #是否对列 样本聚类
#annotation_row = annotation_row,
# cellwidth = 20, cellheight = 15,
scale = "row" ,
fontsize_row = 4,
annotation_legend = TRUE)
pdf(paste(figurename,"pheatmap1.pdf"))
print(ph1)
dev.off()
#带聚类的热图
{
ph2=pheatmap(dat[cg_inter,], annotation_col = group_list,color = colorRampPalette(c("blue", "white", "red"))(50),
# cutree_rows=2,cutree_cols=3, #切分成几份
cluster_cols = T,
#clustering_method = "average", #是否对列 样本聚类
#annotation_row = annotation_row,
# cellwidth = 20, cellheight = 15,
scale = "row" ,
fontsize_row = 4,
annotation_legend = TRUE)
pdf(paste(figurename,"pheatmap__clustering.pdf"))
print(ph2)
dev.off()}
}
#六组求平均值 # 计算每六列的平均值 差异基因的热图
if (T) {
{
colnames(exp)
dat=exp[,c(grep(pattern = "Normal",colnames(exp),value = T),
grep(pattern = "LCT\\(14\\+14\\)-MSC-D14",colnames(exp),value = T),
grep(pattern = "LCT\\(14\\+28\\)-MSC-D28",colnames(exp),value = TRUE))
]
dim(dat)
dat = log2(cpm(dat)+1) # dat 为使用count数据转化而成的cpm数据,使用这个数据画图,而count数据只是用来做差异分析
boxplot(dat,las=2)
dat[dat>15]=15 #可视化 抹去异常值
#分组信息 phe_Data
group_list=data.frame(
row.names = colnames(dat),
group=rep(c("Normal control","LCT(14+14)-MSC-D14","LCT(14+28)-MSC-D28"),each=6)
)
group_list$group=factor(group_list$group,levels =c("Normal control","LCT(14+14)-MSC-D14","LCT(14+28)-MSC-D28") )
group_list
# 计算每六列的平均值
group_list
new_matrix <- matrix(0, nrow = nrow(dat), ncol = ceiling(ncol(dat) / 6))
colnames(new_matrix) <- unique(group_list$group)
rownames(new_matrix) <- rownames(dat)
for (i in 1:ncol(new_matrix)) {
start_col <- (i - 1) * 6 + 1
end_col <- min(start_col + 5, ncol(dat))
new_matrix[, i] <- rowMeans(dat[, start_col:end_col], na.rm = TRUE)
}
head(new_matrix)
dat<-new_matrix
boxplot(dat,las=2)
dat[dat>15]=15 #可视化 抹去异常值
ph3=pheatmap(dat[cg_inter,], #annotation_col = group_list,
color = colorRampPalette(c("blue", "white", "red"))(50),
# cutree_rows=2,cutree_cols=3, #切分成几份
cluster_cols = T, #是否对列 样本聚类
#annotation_row = annotation_row,
# cellwidth = 20, cellheight = 15,
scale = "row" ,
fontsize_row = 3,
annotation_legend = TRUE)
pdf(paste(figurename,"pheatmap__average_expression_clustering_differential_degs.pdf"))
print(ph3)
dev.off()
}
}
getwd()
#六组求平均值 # 计算每六列的平均值 seletec_genes的热图
seletec_genes=openxlsx::read.xlsx("/home/data/t040413/wpx/wpx_transcriptomics_proteinomics_Metabolomics/副本wpx-炎症或缺氧相关的因子 (1).xlsx",
sheet = 2)
seletec_genes=seletec_genes$Gene_name %>%tolower() %>% str_to_title()
seletec_genes
{
colnames(exp)
dat=exp[,c(grep(pattern = "Normal",colnames(exp),value = T),
grep(pattern = "LCT\\(14\\+14\\)-MSC-D14",colnames(exp),value = T),
grep(pattern = "LCT\\(14\\+28\\)-MSC-D28",colnames(exp),value = TRUE))
]
dim(dat)
dat = log2(cpm(dat)+1) # dat 为使用count数据转化而成的cpm数据,使用这个数据画图,而count数据只是用来做差异分析
boxplot(dat,las=2)
dat[dat>15]=15 #可视化 抹去异常值
#分组信息 phe_Data
group_list=data.frame(
row.names = colnames(dat),
group=rep(c("Normal control","LCT(14+14)-MSC-D14","LCT(14+28)-MSC-D28"),each=6)
)
group_list$group=factor(group_list$group,levels =c("Normal control","LCT(14+14)-MSC-D14","LCT(14+28)-MSC-D28") )
group_list
# 计算每六列的平均值
group_list
new_matrix <- matrix(0, nrow = nrow(dat), ncol = ceiling(ncol(dat) / 6))
colnames(new_matrix) <- unique(group_list$group)
rownames(new_matrix) <- rownames(dat)
for (i in 1:ncol(new_matrix)) {
start_col <- (i - 1) * 6 + 1
end_col <- min(start_col + 5, ncol(dat))
new_matrix[, i] <- rowMeans(dat[, start_col:end_col], na.rm = TRUE)
}
head(new_matrix)
dat<-new_matrix
boxplot(dat,las=2)
dat[dat>15]=15 #可视化 抹去异常值
print(length(seletec_genes))
ph4=pheatmap(dat[ rownames(dat)[rownames(dat) %in% seletec_genes ] ,],
#annotation_col = group_list,
color = colorRampPalette(c("blue", "white", "red"))(50),
# cutree_rows=2,cutree_cols=3, #切分成几份
cluster_cols = T, #是否对列 样本聚类
#annotation_row = annotation_row,
# cellwidth = 20, cellheight = 15,
scale = "row" ,
fontsize_row = 5,
annotation_legend = TRUE)
pdf(paste(figurename,"pheatmap__average_expression_clustering_selected_genes.pdf"))
print(ph4)
dev.off()
}
print(getwd())
cg_inter
##重新画图 selected pathway
if (TRUE) {
print(getwd())#=--------------------------------===============================#####################_+==============================
#-----------------------https://www.jianshu.com/p/eee2cc315f77-----------------------------------------------------------
library(clusterProfiler)
library(ggplot2)
library(enrichplot)
library(DOSE)
print(getwd())
xx=openxlsx::read.xlsx("./compareCluster-GO_enrichment.xlsx")
gg=openxlsx::read.xlsx("./compareCluster-KEGG_enrichment.xlsx")
xx=xx[xx$excellent=="1" &
!is.na(xx$excellent),]
gg=gg[gg$excellent=="1" &
!is.na(gg$excellent),]
gg
head(xx)
#XX
if (T) {
df=xx
df$GeneRatio <- DOSE::parse_ratio(df$GeneRatio)
# Find unique levels and sort them alphabetically
# unique_descriptions <- sort(unique(df$Description))
# # Convert Description column into a factor with sorted levels
# df$Description <- factor(df$Description, levels = unique_descriptions)
if (T) {
library(dplyr)
# First, ensure the 'Description' column is of type character (if not already)
df$Description <- as.character(df$Description)
# Use add_count() to add a new column 'DescriptionFreq' containing the frequency of each 'Description'
df <- df %>%
add_count(Description, name = "DescriptionFreq")
# Now, arrange the dataframe based on the frequency and the 'Description' column
df <- df %>%
arrange(DescriptionFreq, Description) %>%
select(-DescriptionFreq) # Remove the temporary frequency column if you don't need it in the final result
# Now the 'Description' column is reordered as you desired, with non-repeated descriptions at the beginning, followed by descriptions with one repetition, and so on.
}
df=df %>%
group_by(ID) %>%
add_count() %>% group_by(Description) %>% mutate(number=rownames(df))#%>%ungroup()
xx=df
xx
dim(xx)
xx$GeneRatio=as.numeric(xx$GeneRatio)
library(forcats)
if (T) {
#如何取消ggplot的y轴默认排序,但同时我的y轴又存在重复
# Convert 'Description' to a factor while preserving the order and duplicates
df$Description <- fct_inorder(df$Description)
# Now create the ggplot using the 'Description' column for y-axis
p2=ggplot(df, aes(x = Cluster, y = Description)) +
geom_point(aes(color = p.adjust, size = Count)) +
scale_color_gradient(low = "red", high = "blue") +
xlab("Cluster") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, face = "bold", size = 10)) +
# Edit legends
guides(
# Reverse color order (higher value on top)
color = guide_colorbar(reverse = TRUE)
) +
guides(
size = guide_legend(order = 2),
color = guide_colorbar(order = 1)
)
}
if (F) {
# str(xx)
# p2=ggplot(df,aes(x = Cluster,y = as.character(Description)))+
# geom_point(aes(color = p.adjust,
# size = Count))+
# scale_color_gradient(low = "red", high = "blue") +
# xlab("Cluster")+
# theme_bw()+
# theme(axis.text.x = element_text(angle = 90,face = "bold",size = 10))+
# # scale_y_continuous(labels = NULL) + # Remove custom labels for y-axis
# #edit legends
# guides(
# #reverse color order (higher value on top)
# color = guide_colorbar(reverse = TRUE)) +
# guides( size = guide_legend(order = 2),
# color = guide_colorbar(order = 1))
#
# #reverse size order (higher diameter on top)
# #size = guide_legend(reverse = TRUE))
#
}
ggsave('degs_compareCluster-GO_enrichment-2_selected_go.pdf',
plot = p2,width = 7,height = 13,limitsize = F)
}
#GG
if (T) {
xx=gg
df=xx
df$GeneRatio <- DOSE::parse_ratio(df$GeneRatio)
# Find unique levels and sort them alphabetically
# unique_descriptions <- sort(unique(df$Description))
# # Convert Description column into a factor with sorted levels
# df$Description <- factor(df$Description, levels = unique_descriptions)
if (T) {
library(dplyr)
# First, ensure the 'Description' column is of type character (if not already)
df$Description <- as.character(df$Description)
# Use add_count() to add a new column 'DescriptionFreq' containing the frequency of each 'Description'
df <- df %>%
add_count(Description, name = "DescriptionFreq")
# Now, arrange the dataframe based on the frequency and the 'Description' column
df <- df %>%
arrange(DescriptionFreq, Description) %>%
select(-DescriptionFreq) # Remove the temporary frequency column if you don't need it in the final result
# Now the 'Description' column is reordered as you desired, with non-repeated descriptions at the beginning, followed by descriptions with one repetition, and so on.
}
df=df %>%
group_by(ID) %>%
add_count() %>% group_by(Description) %>% mutate(number=rownames(df))#%>%ungroup()
xx=df
xx
dim(xx)
xx$GeneRatio=as.numeric(xx$GeneRatio)
library(forcats)
if (T) {
#如何取消ggplot的y轴默认排序,但同时我的y轴又存在重复
# Convert 'Description' to a factor while preserving the order and duplicates
df$Description <- fct_inorder(df$Description)
# Now create the ggplot using the 'Description' column for y-axis
p2=ggplot(df, aes(x = Cluster, y = Description)) +
geom_point(aes(color = p.adjust, size = Count)) +
scale_color_gradient(low = "red", high = "blue") +
xlab("Cluster") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, face = "bold", size = 10)) +
# Edit legends
guides(
# Reverse color order (higher value on top)
color = guide_colorbar(reverse = TRUE)
) +
guides(
size = guide_legend(order = 2),
color = guide_colorbar(order = 1)
)
}
if (F) {
# str(xx)
# p2=ggplot(df,aes(x = Cluster,y = as.character(Description)))+
# geom_point(aes(color = p.adjust,
# size = Count))+
# scale_color_gradient(low = "red", high = "blue") +
# xlab("Cluster")+
# theme_bw()+
# theme(axis.text.x = element_text(angle = 90,face = "bold",size = 10))+
# # scale_y_continuous(labels = NULL) + # Remove custom labels for y-axis
# #edit legends
# guides(
# #reverse color order (higher value on top)
# color = guide_colorbar(reverse = TRUE)) +
# guides( size = guide_legend(order = 2),
# color = guide_colorbar(order = 1))
#
# #reverse size order (higher diameter on top)
# #size = guide_legend(reverse = TRUE))
#
}
ggsave('degs_compareCluster-KEGG_enrichment-2_selected_KEGG.pdf',
plot = p2,width = 6,height = 8,limitsize = F)
}
}
############-----------------------------------------------------------富集分析---------
getwd()
#openxlsx::write.xlsx(merged_df,file = "/home/data/t040413/wpx/wpx_transcriptomics_proteinomics_Metabolomics/transcriptomics/4_d28_vs_d14_treatment_effective/merged_df.xlsx")
head(merged_df)
{
##########################----------------------enrichment analysis==================================================
#https://mp.weixin.qq.com/s/WyT-7yKB9YKkZjjyraZdPg
df=merged_df
head(df)
# ##筛选阈值确定:p<0.05,|log2FC|>1
# p_val_adj = 0.05
# avg_log2FC = 0.6
# #根据阈值添加上下调分组标签:
# df$direction <- case_when(
# df$avg_log2FC > avg_log2FC & df$p_val_adj < p_val_adj ~ "up",
# df$avg_log2FC < -avg_log2FC & df$p_val_adj < p_val_adj ~ "down",
# TRUE ~ 'none'
# )
# head(df)
df$direction=df$change
df=df[df$direction!="NOT",]
head(df)
dim(df)
df$mygroup=paste(df$group,df$direction,sep = "_")
head(df)
dim(df)
#https://mp.weixin.qq.com/s/WyT-7yKB9YKkZjjyraZdPg
{
library(clusterProfiler)
# library(org.Hs.eg.db)
# BiocManager::install("org.Rn.eg.db")
# install.packages("org.Rn.eg.db")
library(org.Rn.eg.db)
library(ggplot2)
# degs_for_nlung_vs_tlung$gene=rownames(degs_for_nlung_vs_tlung)
sce.markers=df
head(sce.markers)
ids=bitr(sce.markers$gene,'SYMBOL','ENTREZID','org.Rn.eg.db')
head(ids)
sce.markers=merge(sce.markers,ids,by.x='gene',by.y='SYMBOL')
head(sce.markers)
sce.markers=sce.markers[sce.markers$change!="NOT",]
dim(sce.markers)
head(sce.markers)
sce.markers$cluster=sce.markers$mygroup
gcSample=split(sce.markers$ENTREZID, sce.markers$cluster)
gcSample # entrez id , compareCluster
#https://zhuanlan.zhihu.com/p/561522453
#https://evvail.com/2021/07/13/2456.html
print("===========开始go kegg============")
xx <-clusterProfiler::compareCluster(gcSample, fun="enrichGO",OrgDb="org.Rn.eg.db",
readable=TRUE,
ont = 'ALL', #GO Ontology,可选 BP、MF、CC,也可以指定 ALL 同时计算 3 者
pvalueCutoff=0.05) #organism="hsa", #'org.Hs.eg.db',
gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG",
keyType = 'kegg', #KEGG 富集
organism="rno",
pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部
)
p=dotplot(xx)
p2=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p2
ggsave('degs_compareCluster-GO_enrichment-2.pdf',plot = p2,width = 6,height = 12,limitsize = F)
xx
openxlsx::write.xlsx(xx,file = "compareCluster-GO_enrichment.xlsx")
# -----
p=dotplot(gg)
p4=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p4
print(paste("保存位置",getwd(),sep = " : "))
ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 6,height = 12,limitsize = F)
gg
openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx")
}
getwd()
setwd("/home/data/t040413/wpx/wpx_transcriptomics_proteinomics_Metabolomics/transcriptomics/4_d28_vs_d14_treatment_effective/")
if (F) {
{
go=openxlsx::read.xlsx("/home/data/t040413/wpx/wpx_transcriptomics_proteinomics_Metabolomics/transcriptomics/4_d28_vs_d14_treatment_effective/compareCluster-GO_enrichment.xlsx")
head(go)
# 或者使用一些作图包(如ggplot2)读取输出结果做个展示,不再多说。
library(ggplot2)
go <- xx@compareClusterResult
head(go)
go$term <- paste(go$ID, go$Description, sep = ': ')
# go <- go[order(go$ONTOLOGY, go$p.adjust, decreasing = c(TRUE, TRUE)), ]
go <- go[order( go$p.adjust, decreasing = c( FALSE) ), ]
head(go)
go$term <- factor(go$term, levels = unique(go$term))
print(dim(go))
#library(tidyr)
# install.packages("BiocManager")
# library(BiocManager)
# BiocManager::install("dplyr")
head(go)
go <-go %>%
group_by(ONTOLOGY) %>%
mutate(group_size = n()) %>%
filter(row_number() <= ifelse(group_size > 10, 10, group_size)) %>%
ungroup() %>%
select(-group_size)
go
p3=ggplot(go, aes(term, -log10(p.adjust))) +
geom_col(aes(fill = ONTOLOGY), width = 0.5, show.legend = FALSE) +
scale_fill_manual(values = c('#D06660', '#5AAD36', '#6C85F5')) +
facet_grid(ONTOLOGY~., scale = 'free_y', space = 'free_y') +
theme(panel.grid = element_blank(), panel.background = element_rect(color = 'black', fill = 'transparent')) +
scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
coord_flip() +
labs(x = '', y = '-Log10 P-Value\n')
ggsave(plot = p3,filename = "")
}
}
}