内容如下:
1.外泌体和肝癌TCGA数据下载
2.数据格式整理
3.差异表达基因筛选
4.预后相关外泌体基因确定
5.拷贝数变异及突变图谱
6.外泌体基因功能注释
7.LASSO回归筛选外泌体预后模型
8.预后模型验证
9.预后模型鲁棒性分析
10.独立预后因素分析及与临床的相关性分析
11.列线图,ROC曲线,校准曲线,DCA曲线
12.外部数据集验证
13.外泌体模型与免疫的关系
14.外泌体模型与单细胞测序
########################### 4.预后相关外泌体基因确定 ############################
下面将差异表达基因的表达数据和临床数据进行合并。
准备的数据除了上面的数据外,还有一个临床数据,可以在我的资源下载,也可以在TCGA数据库下载:
代码如下:
setwd("C:\\Users\\ASUS\\Desktop\\自噬")
## install.package("pheatmap")
LIHC <- read.csv("TCGA_LIHC_diff_expression.csv",header = T,sep = ",")
Exorbase <- read.csv("ExoRbase_LIHC_diff_expression.csv",header = T,sep = ",")
LIHC_UP <- LIHC[LIHC$logFC>0.5 & LIHC$adj.P.Val<0.05,]
LIHC_DOWN <- LIHC[LIHC$logFC< c(-0.5) & LIHC$adj.P.Val<0.05,]
Exorbase_UP <- Exorbase[Exorbase$logFC>0.5 & Exorbase$adj.P.Val<0.05,]
Exorbase_DOWN <- Exorbase[Exorbase$logFC< c(-0.5) & Exorbase$adj.P.Val<0.05,]
commonUP <- intersect(LIHC_UP$X,Exorbase_UP$X)
commonUP
commonDOWN <- intersect(LIHC_DOWN$X,Exorbase_DOWN$X)
commonDOWN
gene <- c(commonDOWN,commonUP)
gene
library(pheatmap)
dir()
rt=read.csv("TCGA-LIHC.csv",sep=",",header=T) #??ȡ?ļ?
rt[1:10,1:5]
match <- match(gene,rt$X)
match
data <- rt[match,]
dir()
row.names(data)<-data$X
data[1:10,1:5]
data<-data[,-1]
data[1:5,1:5]
grep <- grep("^TCGA[.]([a-zA-Z0-9]{2})[.]([a-zA-Z0-9]{4})[.]([0][0-9][A-Z])",colnames(data))
length(grep)
grep
tumor <- data[,grep]
tumor
dim(tumor)
tumor <- as.data.frame(t(tumor))
tumor <- tumor[order(rownames(tumor)),]
tumor <- tumor[which(!duplicated(substr(rownames(tumor),1,12))),]
dim(tumor)
rownames(tumor) <- substr(rownames(tumor),1,12)
tumor[1:4,1:4]
clinical <- read.csv("LIHC_clinicalMatrix",header = T,sep = "\t")
clinical[1:5,1:5]
clinical <- clinical[,c("sampleID","OS.time","OS")]
head(clinical)
clinical$sampleID <- substr(clinical$sampleID,1,12)
clinical$sampleID <- gsub("-",".",clinical$sampleID)
dim(clinical)
clinical <- clinical[match(rownames(tumor),clinical$sampleID),]
dim(clinical)
identical(clinical$sampleID,rownames(tumor))
data <- cbind(clinical,tumor)
data[1:5,1:5]
write.csv(data,"LIHC_clinical_mRNA_expression_data.csv",row.names = F)
最终得到的结果就是合并临床预后数据以及外泌体基因的数据。
#> data[1:5,1:5]
# sampleID OS.time OS BACH2 CDHR2
#1 TCGA.2V.A95S NA NA 0.5868962 3.48179570
#2 TCGA.2Y.A9GS 724 1 1.8384745 0.34802353
#3 TCGA.2Y.A9GT 1624 1 1.0528859 0.08873198
#4 TCGA.2Y.A9GU 1939 0 1.1823522 0.19297482
#5 TCGA.2Y.A9GV 2532 1 1.4611460 5.27112276
下一届进行单因素Cox回归分析