from sklearn.linear_model import LassoCV, Lasso
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV
# 读取数据
df_stars = pd.read_excel('C:/Users/galax/Desktop/Final_Result.xls')
data_stars = df_stars
# 属性矩阵和预测目标
X = data_stars.iloc[:, 1:14]
y = data_stars.iloc[:, 1]
# 特征名称和目标名称
feature_names = ['KNN_derta_V', 'MOID', 'e', 'a', 'q', 'i', 'node', 'peri', 'M', 'tp',
'period', 'n', 'Price']
target_name = ['Profit_numeric']
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=19)
# 创建Lasso模型
lasso_0 = Lasso(max_iter=60000)
# 拟合Lasso模型
lasso_0.fit(X_train, y_train)
# 定义参数网格
param_grid = {
'alpha': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1, 1, 10, 100, 1000]
}
# 使用GridSearchCV调整参数
lasso_cv = GridSearchCV(lasso_0, param_grid, cv=7, n_jobs=-1)
lasso_cv.fit(X_train, y_train)
#第三个lasso模型
lasso_cv1=Lasso(alpha=0.0001,max_iter=10000)
lasso_cv1.fit(X_train,y_train)
lasso_cv2=Lasso(alpha=0.001,max_iter=10000)
lasso_cv2.fit(X_train,y_train)
lasso_cv3=Lasso(alpha=0.01,max_iter=10000)
lasso_cv3.fit(X_train,y_train)
lasso_cv4=Lasso(alpha=0.1,max_iter=10000)
lasso_cv4.fit(X_train,y_train)
lasso_cv5=Lasso(alpha=1,max_iter=10000)
lasso_cv5.fit(X_train,y_train)
# 用调整后的模型做预测
y_pred_cv = lasso_cv.predict(X_test)
y_pred_cv1 = lasso_cv1.predict(X_test)
y_pred_cv2 = lasso_cv2.predict(X_test)
y_pred_cv3= lasso_cv3.predict(X_test)
y_pred_cv4 = lasso_cv4.predict(X_test)
y_pred_cv5= lasso_cv5.predict(X_test)
print("调参后,LassoCV模型的预测值为:", y_pred_cv)
print("调参后,LassoCV1模型的预测值为:", y_pred_cv1)
#用3个衡量指标查看调参后的模型性能
print("平均绝对误差MAE2=",mean_absolute_error(y_test,y_pred_cv))
print("均方误差MSE2=",mean_squared_error(y_test,y_pred_cv))
print("R平方值2=",r2_score(y_test,y_pred_cv))
print("最佳的alpha=",lasso_cv.best_params_)#alpha=0.0001
#用3个衡量指标查看调参后的模型性能
print("平均绝对误差MAE2=",mean_absolute_error(y_test,y_pred_cv1))
print("均方误差MSE2=",mean_squared_error(y_test,y_pred_cv1))
print("R平方值2=",r2_score(y_test,y_pred_cv1))
print("最佳的alpha=",lasso_cv.best_params_)#alpha=0.0001
import matplotlib.pyplot as plt
# 绘制两个lasso模型的R^2对比曲线
plt.figure(figsize=(10, 6))
plt.plot(['Lasso_cv', 'Lasso_cv1','Lasso_cv2','Lasso_cv3','Lasso_cv4','Lasso_cv5'], [r2_score(y_test, y_pred_cv), r2_score(y_test, y_pred_cv1),r2_score(y_test, y_pred_cv2),r2_score(y_test, y_pred_cv3),r2_score(y_test, y_pred_cv4),r2_score(y_test, y_pred_cv5)], marker='o', label='R^2')
plt.xlabel('Models')
plt.ylabel('R^2 Score')
plt.title('Comparison of Model Performance - R^2 Score')
plt.legend()
plt.grid(True)
plt.show()
import numpy as np
from sklearn.linear_model import LassoCV, Lasso
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV
# 读取数据
df_stars = pd.read_excel('C:/Users/galax/Desktop/Final_Result.xls')
data_stars = df_stars
# 属性矩阵和预测目标
X = data_stars.iloc[:, 1:14]
y = data_stars.iloc[:, 0]
# 特征名称和目标名称
feature_names = ['KNN_derta_V', 'MOID', 'e', 'a', 'q', 'i', 'node', 'peri', 'M', 'tp',
'period', 'n', 'Price']
target_name = ['Profit']
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=19)
# 创建Lasso模型
lasso_0 = Lasso(max_iter=60000)
# 拟合Lasso模型
lasso_0.fit(X_train, y_train)
# 定义参数网格
param_grid = {
'alpha': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1, 1, 10, 100, 1000]
}
# 使用GridSearchCV调整参数
lasso_cv = GridSearchCV(lasso_0, param_grid, cv=7, n_jobs=-1)
lasso_cv.fit(X_train, y_train)
#第三个lasso模型
lasso_cv1=Lasso(alpha=0.0001,max_iter=10000)
lasso_cv1.fit(X_train,y_train)
lasso_cv2=Lasso(alpha=0.001,max_iter=10000)
lasso_cv2.fit(X_train,y_train)
lasso_cv3=Lasso(alpha=0.01,max_iter=10000)
lasso_cv3.fit(X_train,y_train)
lasso_cv4=Lasso(alpha=0.1,max_iter=10000)
lasso_cv4.fit(X_train,y_train)
lasso_cv5=Lasso(alpha=1,max_iter=10000)
lasso_cv5.fit(X_train,y_train)
# 用调整后的模型做预测
y_pred_cv = lasso_cv.predict(X_test)
y_pred_cv1 = lasso_cv1.predict(X_test)
y_pred_cv2 = lasso_cv2.predict(X_test)
y_pred_cv3= lasso_cv3.predict(X_test)
y_pred_cv4 = lasso_cv4.predict(X_test)
y_pred_cv5= lasso_cv5.predict(X_test)
print("调参后,LassoCV模型的预测值为:", y_pred_cv)
print("调参后,LassoCV1模型的预测值为:", y_pred_cv1)
#用3个衡量指标查看调参后的模型性能
print("平均绝对误差MAE2=",mean_absolute_error(y_test,y_pred_cv))
print("均方误差MSE2=",mean_squared_error(y_test,y_pred_cv))
print("R平方值2=",r2_score(y_test,y_pred_cv))
print("最佳的alpha=",lasso_cv.best_params_)#alpha=0.0001
#用3个衡量指标查看调参后的模型性能
print("平均绝对误差MAE2=",mean_absolute_error(y_test,y_pred_cv1))
print("均方误差MSE2=",mean_squared_error(y_test,y_pred_cv1))
print("R平方值2=",r2_score(y_test,y_pred_cv1))
print("最佳的alpha=",lasso_cv.best_params_)#alpha=0.0001
import matplotlib.pyplot as plt
# 绘制两个lasso模型的R^2对比曲线
plt.figure(figsize=(10, 6))
plt.plot(['Lasso_cv', 'Lasso_cv1','Lasso_cv2','Lasso_cv3','Lasso_cv4','Lasso_cv5'], [r2_score(y_test, y_pred_cv), r2_score(y_test, y_pred_cv1),r2_score(y_test, y_pred_cv2),r2_score(y_test, y_pred_cv3),r2_score(y_test, y_pred_cv4),r2_score(y_test, y_pred_cv5)], marker='o', label='R^2')
plt.xlabel('Models')
plt.ylabel('R^2 Score')
plt.title('Comparison of Model Performance - R^2 Score')
plt.legend()
plt.grid(True)
plt.show()
lasso_x=Lasso(alpha=0.1)
#计算Lasso回归路径
alphas=np.logspace(-3,0,100)
coefs=[]
for a in alphas:
lasso_x.set_params(alpha=a)
lasso_x.fit(X_train, y_train)
coefs.append(lasso_x.coef_)
#绘制lasso回归路径图
plt.figure(figsize=(10, 6))
ax=plt.gca()
ax.plot(alphas, coefs, marker='o', label='Lasso_x')
ax.set_xscale('log')
ax.set_xlim(ax.get_xlim()[::-1])
plt.xlabel('alpha')
plt.ylabel('weights')
plt.axis('tight')
plt.show()
#绘制LASSO系数图
plt.bar(range(len(lasso_x.coef_)), lasso_x.coef_)
plt.xticks(range(len(lasso_x.coef_)), feature_names)
plt.show()