SVM
- 线性可分SVM
- 题目
- 绘制决策边界
- 改变C,观察决策边界
- 代码
- 线性不可分SVM
- 核函数
- 代码
- 寻找最优C、gamma
- 垃圾邮件过滤
线性可分SVM
题目
数据分布
绘制决策边界
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
from scipy.optimize import minimize
import pandas as pd
from sklearn.svm import SVC
def plot_data():
plt.scatter(x[:,0],x[:,1],c = y.flatten(), cmap ='jet')
plt.xlabel('x1')
plt.ylabel('y1')
"""
绘制决策边界
"""
def plot_boundary(model):
x_min,x_max =-0.5,4.5
y_min,y_max =1.3,5
xx,yy = np.meshgrid(np.linspace(x_min,x_max,500),
np.linspace(y_min,y_max,500))
z = model.predict(np.c_[xx.flatten(),yy.flatten()])
zz = z.reshape(xx.shape)
plt.contour(xx,yy,zz)
data=sio.loadmat("E:/学习/研究生阶段/python-learning/吴恩达机器学习课后作业/code/ex6-SVM/data/ex6data1.mat")
x,y=data['X'],data['y']
#
plot_data()
# plt.show()
svc1 = SVC(C=1,kernel='linear')
svc1.fit(x,y.flatten())
svc1.predict(x)
print(svc1.score(x,y.flatten()))
plot_boundary(svc1)
plt.show()
改变C,观察决策边界
svc100 = SVC(C=100,kernel='linear')
svc100.fit(x,y.flatten())
svc100.predict(x)
print(svc100.score(x,y.flatten()))
plot_boundary(svc100)
plt.show()
代码
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
from scipy.optimize import minimize
import pandas as pd
from sklearn.svm import SVC
def plot_data():
plt.scatter(x[:,0],x[:,1],c = y.flatten(), cmap ='jet')
plt.xlabel('x1')
plt.ylabel('y1')
"""
绘制决策边界
"""
def plot_boundary(model):
x_min,x_max =-0.5,4.5
y_min,y_max =1.3,5
xx,yy = np.meshgrid(np.linspace(x_min,x_max,500),
np.linspace(y_min,y_max,500))
z = model.predict(np.c_[xx.flatten(),yy.flatten()])
zz = z.reshape(xx.shape)
plt.contour(xx,yy,zz)
data=sio.loadmat("E:/学习/研究生阶段/python-learning/吴恩达机器学习课后作业/code/ex6-SVM/data/ex6data1.mat")
x,y=data['X'],data['y']
#
plot_data()
# plt.show()
svc1 = SVC(C=1,kernel='linear')
svc1.fit(x,y.flatten())
svc1.predict(x)
print(svc1.score(x,y.flatten()))
plot_boundary(svc1)
# plt.show()
svc100 = SVC(C=100,kernel='linear')
svc100.fit(x,y.flatten())
svc100.predict(x)
print(svc100.score(x,y.flatten()))
plot_boundary(svc100)
plt.show()
线性不可分SVM
核函数
数据集
svc1 = SVC(C=1,kernel='rbf',gamma=1)
svc1.fit(x,y.flatten())
svc1.score(x,y.flatten())
plot_boundary(svc1)
plot_data()
plt.show()
Gamma越小,模型复杂度越低,Gamma越大,模型复杂度越高
gamma=1
gamma=50
gamma=1000
代码
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
from scipy.optimize import minimize
import pandas as pd
from sklearn.svm import SVC
def plot_data():
plt.scatter(x[:,0],x[:,1],c = y.flatten(), cmap ='jet')
plt.xlabel('x1')
plt.ylabel('y1')
def plot_boundary(model):
x_min,x_max =0,1
y_min,y_max =0.4,1
xx,yy = np.meshgrid(np.linspace(x_min,x_max,500),
np.linspace(y_min,y_max,500))
z = model.predict(np.c_[xx.flatten(),yy.flatten()])
zz = z.reshape(xx.shape)
plt.contour(xx,yy,zz)
data=sio.loadmat("E:/学习/研究生阶段/python-learning/吴恩达机器学习课后作业/code/ex6-SVM/data/ex6data2.mat")
x,y=data['X'],data['y']
svc1 = SVC(C=1,kernel='rbf',gamma=1000)
svc1.fit(x,y.flatten())
svc1.score(x,y.flatten())
plot_boundary(svc1)
plot_data()
plt.show()
寻找最优C、gamma
数据集
Cvalues =[0.01,0.03,0.1,0.3,1 ,3,10,30,100]
gammas =[0.01,0.03,0.1, 0.3,1 ,3, 10,30,100]
best_score=0
best_params=(0,0)
best_score = 0
best_params = (0,0)
for c in Cvalues:
for gamma in gammas:
svc = SVC(C=c,kernel='rbf',gamma=gamma)
svc.fit(x,y.flatten())
score = svc.score(xval,yval.flatten())
if score > best_score:
best_score = score
best_params = (c,gamma)
print(best_score,best_params)
结果
0.965 (0.3, 100)
绘制决策边界
svc1 = SVC(C=best_params[0],kernel='rbf',gamma=best_params[1])
svc1.fit(x,y.flatten())
plot_boundary(svc1)
plot_data()
plt.show()
全部代码
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
from scipy.optimize import minimize
import pandas as pd
from sklearn.svm import SVC
def plot_boundary(model):
x_min,x_max =-0.6,0.4
y_min,y_max =-0.7,0.6
xx,yy = np.meshgrid(np.linspace(x_min,x_max,500),
np.linspace(y_min,y_max,500))
z = model.predict(np.c_[xx.flatten(),yy.flatten()])
zz = z.reshape(xx.shape)
plt.contour(xx,yy,zz)
def plot_data():
plt.scatter(x[:,0],x[:,1],c = y.flatten(), cmap ='jet')
plt.xlabel('x1')
plt.ylabel( 'y1')
mat=sio.loadmat("E:/学习/研究生阶段/python-learning/吴恩达机器学习课后作业/code/ex6-SVM/data/ex6data3.mat")
x,y=mat['X'], mat['y']
xval, yval =mat['Xval'], mat['yval']
plot_data()
# plt.show()
Cvalues =[0.01,0.03,0.1,0.3,1 ,3,10,30,100]
gammas =[0.01,0.03,0.1, 0.3,1 ,3, 10,30,100]
best_score=0
best_params=(0,0)
best_score = 0
best_params = (0,0)
for c in Cvalues:
for gamma in gammas:
svc = SVC(C=c,kernel='rbf',gamma=gamma)
svc.fit(x,y.flatten())
score = svc.score(xval,yval.flatten())
if score > best_score:
best_score = score
best_params = (c,gamma)
print(best_score,best_params)
svc1 = SVC(C=best_params[0],kernel='rbf',gamma=best_params[1])
svc1.fit(x,y.flatten())
plot_boundary(svc1)
plot_data()
plt.show()
垃圾邮件过滤
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
from scipy.optimize import minimize
import pandas as pd
from sklearn.svm import SVC
data1=sio.loadmat("E:/学习/研究生阶段/python-learning/吴恩达机器学习课后作业/code/ex6-SVM/data/spamTrain.mat")
x,y=data1['X'], data1['y']
data2=sio.loadmat("E:/学习/研究生阶段/python-learning/吴恩达机器学习课后作业/code/ex6-SVM/data/spamTest.mat")
xtest, ytest =data2['Xtest'], data2['ytest']
Cvalues = [3,10,30,100,0.01, 0.03,0.1,0.3,1]
best_score = 0
best_param = 0
for c in Cvalues:
svc= SVC(C=c,kernel='linear')
svc.fit(x,y.flatten())
score= svc.score(xtest,ytest.flatten())
if score > best_score:
best_score = score
best_param =c
# print(best_score,best_param)
svc2 = SVC(C=best_param,kernel='linear')
svc2.fit(x,y.flatten())
score_train= svc2.score(x,y.flatten())
score_test= svc2.score(xtest,ytest.flatten())
print(score_train,score_test)