实战:使用岭回归模型
完整代码:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.sklearn.datasets import load_diabetes
X, y = make_regression(n_samples=100, n_features=2, n_informative=2, noise=50, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=8)
lr = LinearRegression().fit(X_train, y_train)
print(f"y = {lr.coef_[0]:.3f}X1 + {lr.coef_[1]:.3f}X2 + {lr.intercept_:.3f}")
print(f"score = {lr.score(X_test, y_test)}")
输出:
y = 11.512X1 + -282.514X2 + 152.563
score = 0.4593422174874441
调整岭回归的系数
完整代码:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
data = load_diabetes()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=8)
lr = Ridge(alpha=0.1).fit(X_train, y_train)
print(f"y = {lr.coef_[0]:.3f}x1 + {lr.coef_[1]:.3f}x2 + {lr.intercept_:.3f}")
print(f"score = {lr.score(X_test, y_test)}")
输出:
y = 24.780x1 + -228.334x2 + 152.481
score = 0.4734012479881621
绘制学习曲线
完整代码:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.model_selection import learning_curve, KFold
def plot_learning_curve(est, X, y):
# 将数据拆分20次用来对模型进行评分
training_set_size, train_scores, test_scores = learning_curve(
est,
X,
y,
train_sizes=np.linspace(.1, 1, 20),
cv=KFold(20, shuffle=True, random_state=1)
)
# 获取模型名称
estimator_name = est.__class__.__name__
# 绘制模型评分
line = plt.plot(training_set_size, train_scores.mean(axis=1), "--", label="training " + estimator_name)
plt.plot(training_set_size, test_scores.mean(axis=1), "-", label="test " + estimator_name, c=line[0].get_color())
plt.xlabel("Training set size")
plt.ylabel("Score")
plt.ylim(0, 1.1)
# 加载数据
data = load_diabetes()
X, y = data.data, data.target
# 绘制图形
plot_learning_curve(Ridge(alpha=1), X, y)
plot_learning_curve(LinearRegression(), X, y)
plt.legend(loc=(0, 1.05), ncol=2, fontsize=11)
plt.show()
输出结果如下: