在R中使用xgboost
假设X为训练数据,y为label,为0或者1.用xgboost建立分类模型代码如下
调用caret包中的createFolds方法,进行10倍交叉验证
最后画出AUC曲线
library(xgboost)
library(caret)
library(caTools)
library(pROC)
set.seed(123)
folds <- createFolds(y, k = 10)
cv = lapply(folds, function(x) {
training_fold = X[-x, ]
training_fold_y = y[-x]
test_fold = X[x, ]
test_fold_y = y[x]
# we stick our XGBoost classifier in here
classifier = xgboost(
data = as.matrix(training_fold),
label = training_fold_y,
nrounds = 100,
verbose = 0,
params = list(objective = "binary:logistic",
eval_metric = "auc",
max_depth = 5,
eta = 0.4,
min_child_weight = 7,
subsample = 0.8,
colsample_bytree = 0.9))
y_predict = predict(classifier, newdata = as.matrix(test_fold)) # again need a matrix
y_pred = (y_predict >= 0.5) # here we are setting up the binary outcome of 0 or 1
roc <- roc(test_fold_y, y_predict)
return(roc)
})
# plot
plot.roc(cv$Fold01, col = "#8DD3C7")
lines.roc(cv$Fold02, col = "#FFFFB3")
lines.roc(cv$Fold03, col = "#BEBADA")
lines.roc(cv$Fold04, col = "#FB8072")
lines.roc(cv$Fold05, col = "#80B1D3")
lines.roc(cv$Fold06, col = "#FDB462")
lines.roc(cv$Fold07, col = "#B3DE69")
lines.roc(cv$Fold08, col = "#FCCDE5")
lines.roc(cv$Fold09, col = "#D9D9D9")
lines.roc(cv$Fold10, col = "#BC80BD")
legend("bottomright", legend = paste0('Fold',1:10,': auc ',as.vector(unlist(lapply(cv, function(x) round(as.numeric(x$auc),2))))),
col = RColorBrewer::brewer.pal('Set3', n=10),
lty = 1,
box.lty=0, bg="transparent")