### 11.6 基于支持向量机进行类别预测 ###
# 构建数据子集
X <- iris[iris$Species!= 'virginica',2:3] # 自变量:Sepal.Width, Petal.Length
y <- iris[iris$Species != 'virginica','Species'] # 因变量
plot(X,col = y,pch = as.numeric(y)+15,cex = 1.5) # 绘制散点图
# 构建支持向量机分类器
library(e1071)
svm.model <- svm(x = X,y = y,kernel = 'linear',degree = 1,scale = FALSE)
summary(svm.model)
svm.model$index # 查看支持向量的序号
svm.model$nSV # 查看各类的支持向量个数
svm.model$SV # 查看支持向量的自变量值
# 绘制SVM分类器的判别边界实线、支持向量及最大间隔分类
plot_svc_decision_boundary <- function(svm.model,X) {
w = t(svm.model$coefs) %*% svm.model$SV
b = -svm.model$rho
margin = 1/w[2]
abline(a = -b/w[1,2],b=-w[1,1]/w[1,2],col = "red",lwd=2)
points(X[svm.model$index,],col="blue",cex=2.5,lwd = 2)
abline(a = -b/w[1,2]+margin,b=-w[1,1]/w[1,2],col = "grey",lwd=2,lty=2)
abline(a = -b/w[1,2]-margin,b=-w[1,1]/w[1,2],col = "grey",lwd=2,lty=2)
}
# 增加分割线的散点图
plot(X,col = y,pch = as.numeric(y)+15,cex = 1.5) # 绘制散点图
plot_svc_decision_boundary(svm.model,X) # 增加决策边界和标注支持向量
# SVM对特征缩放敏感
Xs <- data.frame(x1 = c(1,5,3,5),
x2 = c(50,20,80,60))
ys <- factor(c(0,0,1,1))
svm_clf <- svm(x = Xs,y = ys,cost=100,
kernel = "linear",scale = FALSE)
Xs_scale <- apply(Xs,2,scale) # 标准化处理
svm_clf1 <- svm(x = Xs_scale,y = ys,cost=100,
kernel = "linear",scale = FALSE)
par(mfrow=c(1,2))
plot(Xs,col=ys,pch=as.numeric(ys)+15,cex=1.5,main='Unscaled')
plot_svc_decision_boundary(svm_clf,Xs)
plot(Xs_scale,col = ys,pch=as.numeric(ys)+15,cex=1.5,main="scaled")
plot_svc_decision_boundary(svm_clf1,Xs_scale)
par(mfrow=c(1,1))
# 将参数scale设置为TRUE
svm_clf2 <- svm(x = Xs,y = ys,cost=100,
kernel = "linear",scale = TRUE)
# 可以查看标准化的中心和标准差
svm_clf2$x.scale
# 查看手工标准化的均值和标准差
apply(Xs,2,function(x) {c('center' = mean(x,na.rm=TRUE),'scale' = sd(x,na.rm=TRUE))})
# 软间隔分类
X = iris[iris$Species!= 'virginica',1:2] # "Sepal.Length" "Sepal.Width"
y = iris[iris$Species != 'virginica','Species']
svm_smallC <- svm(x = X,y = y,cost = 1,
kernel = "linear",scale = FALSE)
svm_largeC <- svm(x = X,y = y,cost = 100,
kernel = "linear",scale = FALSE)
par(mfrow=c(1,2))
plot(X,col=y,pch=as.numeric(y)+15,main='small cost')
plot_svc_decision_boundary(svm_smallC,X)
plot(X,col=y,pch=as.numeric(y)+15,main='large cost')
plot_svc_decision_boundary(svm_largeC,X)
par(mfrow=c(1,1))
# 非线性支持向量机分类
# 导入数据集
moons <- read.csv('moons.csv')
# 查看数据结构
str(moons)
# 编写绘制决策边界函数
visualize_classifier <- function(model,X,y,xlim,ylim,title = NA){
x1s <- seq(xlim[1],xlim[2],length.out=200)
x2s <- seq(ylim[1],ylim[2],length.out=200)
Z <- expand.grid(x1s,x2s)
colnames(Z) <- colnames(X)
y_pred <- predict(model,Z,type = 'class')
y_pred <- matrix(y_pred,length(x1s))
filled.contour(x1s,x2s,y_pred,
nlevels = 2,
col = RColorBrewer::brewer.pal(length(unique(y)),'Pastel1'),
key.axes = FALSE,
plot.axes = {axis(1);axis(2);
points(X[,1],X[,2],pch=as.numeric(y)+16,col=as.numeric(y)+2,cex=1.5)
},
xlab = colnames(X)[1],ylab = colnames(X)[2]
)
title(main = title)
}
xlim <- c(-1.5,2.5)
ylim <- c(-1,1.5)
# 构建线性支持向量机分类
svm_linear <- svm(x = moons[,1:2],y = factor(moons[,3]),
kernel = 'linear',degree = 1,cost = 10)
# 绘制决策边界
visualize_classifier(svm_linear,moons[,1:2],moons[,3],
xlim,ylim,title = '线性支持向量机分类')
# 构建非线支持向量机分类
svm_poly <- svm(x = moons[,1:2],y = factor(moons[,3]),
kernel = 'polynomial',degree = 3,cost = 5)
# 绘制决策边界
visualize_classifier(svm_poly,moons[,1:2],moons[,3],
xlim,ylim,title = '非线性支持向量机分类')
# 多项式核
svm_poly1 <- svm(x = moons[,1:2],y = factor(moons[,3]),
kernel = 'polynomial',degree = 3,cost = 5,coef0 = 1)
visualize_classifier(svm_poly1,moons[,1:2],moons[,3],
xlim,ylim,'多项式核')
# 增加相似性特征
svm_rbf <- svm(x = moons[,1:2],y = factor(moons[,3]),
kernel='radial',gamma = 0.1, cost = 0.01)
svm_rbf1 <- svm(x = moons[,1:2],y = factor(moons[,3]),
kernel='radial',gamma = 0.1, cost = 1000)
svm_rbf2 <- svm(x = moons[,1:2],y = factor(moons[,3]),
kernel='radial',gamma = 5, cost =1000)
visualize_classifier(svm_rbf,moons[,1:2],moons[,3],
xlim,ylim,'gamma = 0.1, cost = 0.01')
visualize_classifier(svm_rbf1,moons[,1:2],moons[,3],
xlim,ylim,'gamma = 0.1, cost = 1000')
visualize_classifier(svm_rbf2,moons[,1:2],moons[,3],
xlim,ylim,'gamma = 5, cost = 1000')
# 调整支持向量机
# 使用tune.svm函数调整支持向量机
moons$y <- as.factor(moons$y)
tuned <- tune.svm(y ~ .,data = moons,
gamma = 10^(-5:-1),cost = 10^(1:3))
summary(tuned) # 得到模型相关信息
# 利用最佳参数设置支持向量机
model.tuned <- svm(y ~ .,data = moons,
gamma = tuned$best.parameters$gamma,
cost = tuned$best.parameters$cost)
# 对训练集进行类别预测
pred <- predict(model.tuned,newdata = moons[,1:2])
#生成混淆矩阵,观察预测精度
table('actual' = moons$y,
'prediction'= pred)