Bootstrap验证(Bootstrap Validation):从原始数据中,有放回的抽取样本,抽取样本数量可以设定。根据抽取的样本检验训练模型的性能,可以不断重复Bootstrap验证得出平均的性能结果。
方法1:使用caret
#bootstrap
library(caret)
library(Ecdat)
data(Hedonic)
train.control <-trainControl(method = "boot",
number=1000,#1000次重抽样
classProbs=TRUE,
summaryFunction=twoClassSummary)
set.seed(123)
str(Hedonic)
Hedonic$chas
mod <- train(chas ~ mv + crim + zn + indus
+ age + nox + rm + tax + dis
+ rad + ptratio + blacks + lstat,
data=Hedonic,
trControl=train.control,
method="glm")
方法2:使用boot
library(boot)
library(pROC)
set.seed(123)
data<-data.frame(
chas=rep(c(1,2),10),
mv=rnorm(20),
crim=rnorm(20,10,4),
zn=rnorm(20,5,6),
indus=rnorm(20,5,7)
)
glm_mo<-function(data,indices){
dat<-data[indices,]
vames<-c("mv","crim","zn","indus")
FML <-as.formula(paste('chas~',paste(vames,collapse="+")))
fit<-glm(FML,data=dat)
pr1<-predict(fit,newdata=dat)
suppressMessages(roc_obj<-roc(dat$chas,pr1))
roc=roc_obj[["auc"]] %>% sub(".*:\\s*", "",.) %>% as.numeric %>% round(.,3)
roc
}
results<- boot(data=data, statistic=glm_mo,
R=500)#R重抽样数目
results