深度学习&PyTorch 之 DNN-回归中使用HR数据集进行了实现,但是HR数据集中只有一个变量,这里我们使用多变量在进行模拟一下
流程还是跟前面一样
1.1 数据导入
我们使用波士顿房价预测数据,这是个开源的数据集,所以通用性更强
data = pd.read_csv('./boston_house_prices.csv')
data
1.2 数据拆分
from sklearn.model_selection import train_test_split
train,test = train_test_split(data, train_size=0.7)
train_x = train[['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']].values
test_x = test[['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']].values
train_y = train.MEDV.values.reshape(-1, 1)
test_y = test.MEDV.values.reshape(-1, 1)
1.3 To Tensor
train_x = torch.from_numpy(train_x).type(torch.FloatTensor)
test_x = torch.from_numpy(test_x).type(torch.FloatTensor)
train_y = torch.from_numpy(train_y).type(torch.FloatTensor)
test_y = torch.from_numpy(test_y).type(torch.FloatTensor)
1.4 数据重构
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
train_ds = TensorDataset(X, Y)
train_dl = DataLoader(train_ds, batch_size=batch, shuffle=True)
train_ds = TensorDataset(train_x, train_y)
train_dl = DataLoader(train_ds, batch_size=batch, shuffle=True)
test_ds = TensorDataset(test_x, test_y)
test_dl = DataLoader(test_ds, batch_size=batch * 2)
与之前是一样的
1.5 网络定义
class LinearModel(nn.Module):
def __init__(self):
super(LinearModel, self).__init__()
self.linear = nn.Linear(13, 1)
def forward(self, inputs):
logits = self.linear(inputs)
return logits
我们这里有13个特征变量
1.6 训练
model = LinearModel()
loss_fn = nn.MSELoss()
opt = torch.optim.SGD(model.parameters(), lr=lr) # 定义优化器
train_loss = []
train_acc = []
test_loss = []
test_acc = []
for epoch in range(epochs+1):
model.train()
for xb, yb in train_dl:
pred = model(xb)
loss = loss_fn(pred, yb)
loss.backward()
opt.step()
opt.zero_grad()
if epoch%10==0:
model.eval()
with torch.no_grad():
train_epoch_loss = sum(loss_fn(model(xb), yb) for xb, yb in train_dl)
test_epoch_loss = sum(loss_fn(model(xb), yb) for xb, yb in test_dl)
train_loss.append(train_epoch_loss.data.item() / len(train_dl))
test_loss.append(test_epoch_loss.data.item() / len(test_dl))
template = ("epoch:{:2d}, 训练损失:{:.5f}, 验证损失:{:.5f}")
print(template.format(epoch, train_epoch_loss.data.item() / len(train_dl), test_epoch_loss.data.item() / len(test_dl)))
print('训练完成')
epoch: 0, 训练损失:469.15608, 验证损失:440.95737
epoch:10, 训练损失:101.80890, 验证损失:109.48333
epoch:20, 训练损失:91.18239, 验证损失:100.17014
epoch:30, 训练损失:100.83169, 验证损失:97.70323
epoch:40, 训练损失:89.96843, 验证损失:97.37273
epoch:50, 训练损失:94.20027, 验证损失:96.82300
…
epoch:480, 训练损失:74.97700, 验证损失:81.29946
epoch:490, 训练损失:74.74702, 验证损失:80.76858
epoch:500, 训练损失:89.31947, 验证损失:83.06767
训练完成
1.7 结果展示
import matplotlib.pyplot as plt
plt.plot(range(len(train_loss)), train_loss, label='train_loss')
plt.plot(range(len(test_loss)), test_loss, label='test_loss')
plt.legend()