首先导入所需要的库
import sklearn
import matplotlib.pyplot as plt
from matplotlib import font_manager
from matplotlib import rcParams
from sklearn.datasets import load_boston
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
rcParams['font.family'] = 'SimHei'
再进行数据的分割
boston = load_boston(return_X_y=True)
x,y = boston
print(x.shape)
print(y.shape)
x,y的维度分别是
(506, 13) (506,)
进行训练
#切分训练集和测试集
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=2)
#最小二乘法
regression = linear_model.LinearRegression()
#print(x_train.shape)
#print(y_train.shape)
#训练数据
regression.fit(x_train,y_train)
#预测数据
y_predict = regression.predict(x_test)
#print(y_predict)
#print(y_test)
#计算均方误差
mse = mean_squared_error(y_test,y_predict)
print('mse:',mse)
计算得出
mse: 18.495420122448206
画图
plt.scatter(y_test, y_predict, color='blue')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()])
plt.xlabel('真实值')
plt.ylabel('预测值')
plt.title('线性回归')
plt.show()