nn.LSTM() 函数详解
nn.LSTM 是 PyTorch 中用于创建长短期记忆(Long Short-Term Memory,LSTM)模型的类。LSTM 是一种循环神经网络(Recurrent Neural Network,RNN)的变体,用于处理序列数据,能够有效地捕捉长期依赖关系。
语法
torch.nn.LSTM(input_size, hidden_size, num_layers=1,
bias=True, batch_first=False,
dropout=0, bidirectional=False)
●input_size: 输入特征的维度。
●hidden_size: 隐藏状态的维度,也是输出特征的维度。
●num_layers(可选参数): LSTM 层的数量,默认为 1。
●bias(可选参数): 是否使用偏置,默认为 True。
●batch_first(可选参数): 如果为 True,则输入和输出张量的形状为 (batch_size, seq_len, feature_size),默认为 False。
●dropout(可选参数): 如果非零,将在 LSTM 层的输出上应用 dropout,防止过拟合。默认为 0。
●bidirectional(可选参数): 如果为 True,则使用双向 LSTM,输出维度将翻倍。默认为 False。
示例
import torch
import torch.nn as nn
# 定义一个单向 LSTM 模型
input_size = 10
hidden_size = 20
num_layers = 2
batch_size = 3
seq_len = 5
lstm = nn.LSTM(input_size, hidden_size, num_layers)
# 构造一个输入张量
input_tensor = torch.randn(seq_len, batch_size, input_size)
# 初始化隐藏状态和细胞状态
h0 = torch.randn(num_layers, batch_size, hidden_size)
c0 = torch.randn(num_layers, batch_size, hidden_size)
# 将输入传递给 LSTM 模型
output, (hn, cn) = lstm(input_tensor, (h0, c0))
print("Output shape:", output.shape) # 输出特征的形状
print("Hidden state shape:", hn.shape) # 最后一个时间步的隐藏状态的形状
print("Cell state shape:", cn.shape) # 最后一个时间步的细胞状态的形状
代码输出
Output shape: torch.Size([5, 3, 20])
Hidden state shape: torch.Size([2, 3, 20])
Cell state shape: torch.Size([2, 3, 20])
注意事项
●input_size 指定了输入数据的特征维度,hidden_size 指定了 LSTM 层的隐藏状态维度,num_layers 指定了 LSTM 的层数。
●LSTM 的输入张量的形状通常是 (seq_len, batch_size, input_size),但如果设置了 batch_first=True,则形状为 (batch_size, seq_len, input_size)。
●LSTM 的输出包括输出张量和最后一个时间步的隐藏状态和细胞状态。
●可以通过 bidirectional=True 参数创建双向 LSTM,它会将输入序列分别从前向和后向传播,并将两个方向的隐藏状态拼接在一起作为输出。
●在使用 LSTM 时,通常需要注意输入数据的序列长度,以及是否需要对输入数据进行填充或截断,以保证输入数据的长度是一致的。
要求:
1.了解LSTM是什么,并使用其构建一个完整的程序
2.R2达到0.83拔高:
1.使用第1 ~ 8个时刻的数据预测第9 ~ 10个时刻的温度数据
我的环境:
●语言环境:Python3.8
●编译器:Jupyter Lab
●深度学习框架:torch 1.10.2 (cpu)
●数据:火灾温度数据集
一、前期准备工作
import torch.nn.functional as F
import numpy as np
import pandas as pd
import torch
from torch import nn
- 导入数据
data = pd.read_csv("./R2/woodpine2.csv")
data
代码输出
Time | Tem1 | CO 1 | Soot 1 | |
---|---|---|---|---|
0 | 0.000 | 25.0 | 0.000000 | 0.000000 |
1 | 0.228 | 25.0 | 0.000000 | 0.000000 |
2 | 0.456 | 25.0 | 0.000000 | 0.000000 |
3 | 0.685 | 25.0 | 0.000000 | 0.000000 |
4 | 0.913 | 25.0 | 0.000000 | 0.000000 |
... | ... | ... | ... | ... |
5943 | 366.000 | 295.0 | 0.000077 | 0.000496 |
5944 | 366.000 | 294.0 | 0.000077 | 0.000494 |
5945 | 367.000 | 292.0 | 0.000077 | 0.000491 |
5946 | 367.000 | 291.0 | 0.000076 | 0.000489 |
5947 | 367.000 | 290.0 | 0.000076 | 0.000487 |
5948 rows × 4 columns
- 数据集可视化
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['savefig.dpi'] = 500 #图片像素
plt.rcParams['figure.dpi'] = 500 #分辨率
fig, ax =plt.subplots(1,3,constrained_layout=True, figsize=(14, 3))
sns.lineplot(data=data["Tem1"], ax=ax[0])
sns.lineplot(data=data["CO 1"], ax=ax[1])
sns.lineplot(data=data["Soot 1"], ax=ax[2])
plt.show()
代码输出
dataFrame = data.iloc[:,1:]
dataFrame
代码输出
Tem1 | CO 1 | Soot 1 | |
---|---|---|---|
0 | 25.0 | 0.000000 | 0.000000 |
1 | 25.0 | 0.000000 | 0.000000 |
2 | 25.0 | 0.000000 | 0.000000 |
3 | 25.0 | 0.000000 | 0.000000 |
4 | 25.0 | 0.000000 | 0.000000 |
... | ... | ... | ... |
5943 | 295.0 | 0.000077 | 0.000496 |
5944 | 294.0 | 0.000077 | 0.000494 |
5945 | 292.0 | 0.000077 | 0.000491 |
5946 | 291.0 | 0.000076 | 0.000489 |
5947 | 290.0 | 0.000076 | 0.000487 |
5948 rows × 3 columns
二、构建数据集
- 数据集预处理
from sklearn.preprocessing import MinMaxScaler
dataFrame = data.iloc[:,1:].copy()
sc = MinMaxScaler(feature_range=(0, 1)) #将数据归一化,范围是0到1
for i in ['CO 1', 'Soot 1', 'Tem1']:
dataFrame[i] = sc.fit_transform(dataFrame[i].values.reshape(-1, 1))
dataFrame.shape
代码输出
(5948, 3)
- 设置X、y
width_X = 8
width_y = 1
##取前8个时间段的Tem1、CO 1、Soot 1为X,第9个时间段的Tem1为y。
X = []
y = []
in_start = 0
for _, _ in data.iterrows():
in_end = in_start + width_X
out_end = in_end + width_y
if out_end < len(dataFrame):
X_ = np.array(dataFrame.iloc[in_start:in_end , ])
y_ = np.array(dataFrame.iloc[in_end :out_end, 0])
X.append(X_)
y.append(y_)
in_start += 1
X = np.array(X)
y = np.array(y).reshape(-1,1,1)
X.shape, y.shape
代码输出
((5939, 8, 3), (5939, 1, 1))
检查数据集中是否有空值
print(np.any(np.isnan(X)))
print(np.any(np.isnan(y)))
代码输出
False
False
- 划分数据集
X_train = torch.tensor(np.array(X[:5000]), dtype=torch.float32)
y_train = torch.tensor(np.array(y[:5000]), dtype=torch.float32)
X_test = torch.tensor(np.array(X[5000:]), dtype=torch.float32)
y_test = torch.tensor(np.array(y[5000:]), dtype=torch.float32)
X_train.shape, y_train.shape
代码输出
(torch.Size([5000, 8, 3]), torch.Size([5000, 1, 1]))
from torch.utils.data import TensorDataset, DataLoader
train_dl = DataLoader(TensorDataset(X_train, y_train),
batch_size=64,
shuffle=False)
test_dl = DataLoader(TensorDataset(X_test, y_test),
batch_size=64,
shuffle=False)
三、模型训练
- 构建模型
class model_lstm(nn.Module):
def __init__(self):
super(model_lstm, self).__init__()
self.lstm0 = nn.LSTM(input_size=3 ,hidden_size=320,
num_layers=1, batch_first=True)
self.lstm1 = nn.LSTM(input_size=320 ,hidden_size=320,
num_layers=1, batch_first=True)
self.fc0 = nn.Linear(320, 1)
def forward(self, x):
out, hidden1 = self.lstm0(x)
out, _ = self.lstm1(out, hidden1)
out = self.fc0(out)
return out[:, -1:, :] #取2个预测值,否则经过lstm会得到8*2个预测
model = model_lstm()
model
代码输出
model_lstm(
(lstm0): LSTM(3, 320, batch_first=True)
(lstm1): LSTM(320, 320, batch_first=True)
(fc0): Linear(in_features=320, out_features=1, bias=True)
)
让我们看看模型的输出数据集格式是什么
model(torch.rand(30,8,3)).shape
代码输出
torch.Size([30, 1, 1])
- 定义训练函数
# 训练循环
import copy
def train(train_dl, model, loss_fn, opt, lr_scheduler=None):
size = len(train_dl.dataset)
num_batches = len(train_dl)
train_loss = 0 # 初始化训练损失和正确率
for x, y in train_dl:
x, y = x.to(device), y.to(device)
# 计算预测误差
pred = model(x) # 网络输出
loss = loss_fn(pred, y) # 计算网络输出和真实值之间的差距
# 反向传播
opt.zero_grad() # grad属性归零
loss.backward() # 反向传播
opt.step() # 每一步自动更新
# 记录loss
train_loss += loss.item()
if lr_scheduler is not None:
lr_scheduler.step()
print("learning rate = {:.5f}".format(opt.param_groups[0]['lr']), end=" ")
train_loss /= num_batches
return train_loss
- 定义测试函数
def test (dataloader, model, loss_fn):
size = len(dataloader.dataset) # 测试集的大小
num_batches = len(dataloader) # 批次数目
test_loss = 0
# 当不进行训练时,停止梯度更新,节省计算内存消耗
with torch.no_grad():
for x, y in dataloader:
x, y = x.to(device), y.to(device)
# 计算loss
y_pred = model(x)
loss = loss_fn(y_pred, y)
test_loss += loss.item()
test_loss /= num_batches
return test_loss
- 正式训练模型
#设置GPU训练
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
device
代码输出
device(type='cpu')
#训练模型
model = model_lstm()
model = model.to(device)
loss_fn = nn.MSELoss() # 创建损失函数
learn_rate = 1e-1 # 学习率
opt = torch.optim.SGD(model.parameters(),lr=learn_rate,weight_decay=1e-4)
epochs = 50
train_loss = []
test_loss = []
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(opt,epochs, last_epoch=-1)
for epoch in range(epochs):
model.train()
epoch_train_loss = train(train_dl, model, loss_fn, opt, lr_scheduler)
model.eval()
epoch_test_loss = test(test_dl, model, loss_fn)
train_loss.append(epoch_train_loss)
test_loss.append(epoch_test_loss)
template = ('Epoch:{:2d}, Train_loss:{:.5f}, Test_loss:{:.5f}')
print(template.format(epoch+1, epoch_train_loss, epoch_test_loss))
print("="*20, 'Done', "="*20)
learning rate = 0.09990 Epoch: 1, Train_loss:0.00131, Test_loss:0.01248
learning rate = 0.09961 Epoch: 2, Train_loss:0.01448, Test_loss:0.01209
learning rate = 0.09911 Epoch: 3, Train_loss:0.01415, Test_loss:0.01168
learning rate = 0.09843 Epoch: 4, Train_loss:0.01377, Test_loss:0.01124
learning rate = 0.09755 Epoch: 5, Train_loss:0.01334, Test_loss:0.01074
learning rate = 0.09649 Epoch: 6, Train_loss:0.01283, Test_loss:0.01019
learning rate = 0.09524 Epoch: 7, Train_loss:0.01222, Test_loss:0.00956
learning rate = 0.09382 Epoch: 8, Train_loss:0.01150, Test_loss:0.00886
learning rate = 0.09222 Epoch: 9, Train_loss:0.01067, Test_loss:0.00810
learning rate = 0.09045 Epoch:10, Train_loss:0.00972, Test_loss:0.00727
learning rate = 0.08853 Epoch:11, Train_loss:0.00867, Test_loss:0.00640
learning rate = 0.08645 Epoch:12, Train_loss:0.00754, Test_loss:0.00552
learning rate = 0.08423 Epoch:13, Train_loss:0.00638, Test_loss:0.00467
learning rate = 0.08187 Epoch:14, Train_loss:0.00523, Test_loss:0.00387
learning rate = 0.07939 Epoch:15, Train_loss:0.00417, Test_loss:0.00316
learning rate = 0.07679 Epoch:16, Train_loss:0.00322, Test_loss:0.00255
learning rate = 0.07409 Epoch:17, Train_loss:0.00242, Test_loss:0.00205
learning rate = 0.07129 Epoch:18, Train_loss:0.00178, Test_loss:0.00166
learning rate = 0.06841 Epoch:19, Train_loss:0.00129, Test_loss:0.00136
learning rate = 0.06545 Epoch:20, Train_loss:0.00093, Test_loss:0.00113
learning rate = 0.06243 Epoch:21, Train_loss:0.00067, Test_loss:0.00097
learning rate = 0.05937 Epoch:22, Train_loss:0.00049, Test_loss:0.00085
learning rate = 0.05627 Epoch:23, Train_loss:0.00036, Test_loss:0.00077
learning rate = 0.05314 Epoch:24, Train_loss:0.00028, Test_loss:0.00071
learning rate = 0.05000 Epoch:25, Train_loss:0.00022, Test_loss:0.00066
learning rate = 0.04686 Epoch:26, Train_loss:0.00018, Test_loss:0.00063
learning rate = 0.04373 Epoch:27, Train_loss:0.00016, Test_loss:0.00060
learning rate = 0.04063 Epoch:28, Train_loss:0.00014, Test_loss:0.00058
learning rate = 0.03757 Epoch:29, Train_loss:0.00013, Test_loss:0.00057
learning rate = 0.03455 Epoch:30, Train_loss:0.00012, Test_loss:0.00056
learning rate = 0.03159 Epoch:31, Train_loss:0.00012, Test_loss:0.00055
learning rate = 0.02871 Epoch:32, Train_loss:0.00011, Test_loss:0.00054
learning rate = 0.02591 Epoch:33, Train_loss:0.00011, Test_loss:0.00054
learning rate = 0.02321 Epoch:34, Train_loss:0.00011, Test_loss:0.00053
learning rate = 0.02061 Epoch:35, Train_loss:0.00011, Test_loss:0.00053
learning rate = 0.01813 Epoch:36, Train_loss:0.00012, Test_loss:0.00053
learning rate = 0.01577 Epoch:37, Train_loss:0.00012, Test_loss:0.00053
learning rate = 0.01355 Epoch:38, Train_loss:0.00012, Test_loss:0.00054
learning rate = 0.01147 Epoch:39, Train_loss:0.00012, Test_loss:0.00054
learning rate = 0.00955 Epoch:40, Train_loss:0.00013, Test_loss:0.00055
learning rate = 0.00778 Epoch:41, Train_loss:0.00013, Test_loss:0.00055
learning rate = 0.00618 Epoch:42, Train_loss:0.00013, Test_loss:0.00056
learning rate = 0.00476 Epoch:43, Train_loss:0.00014, Test_loss:0.00056
learning rate = 0.00351 Epoch:44, Train_loss:0.00014, Test_loss:0.00057
learning rate = 0.00245 Epoch:45, Train_loss:0.00014, Test_loss:0.00057
learning rate = 0.00157 Epoch:46, Train_loss:0.00014, Test_loss:0.00057
learning rate = 0.00089 Epoch:47, Train_loss:0.00014, Test_loss:0.00057
learning rate = 0.00039 Epoch:48, Train_loss:0.00014, Test_loss:0.00057
learning rate = 0.00010 Epoch:49, Train_loss:0.00014, Test_loss:0.00057
learning rate = 0.00000 Epoch:50, Train_loss:0.00014, Test_loss:0.00057
==================== Done ====================
四、模型评估
- LOSS图
import matplotlib.pyplot as plt
plt.figure(figsize=(5, 3),dpi=120)
plt.plot(train_loss , label='LSTM Training Loss')
plt.plot(test_loss, label='LSTM Validation Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()
代码输出
- 调用模型进行预测
predicted_y_lstm = sc.inverse_transform(model(X_test).detach().numpy().reshape(-1,1)) # 测试集输入模型进行预测
y_test_1 = sc.inverse_transform(y_test.reshape(-1,1))
y_test_one = [i[0] for i in y_test_1]
predicted_y_lstm_one = [i[0] for i in predicted_y_lstm]
plt.figure(figsize=(5, 3),dpi=120)
# 画出真实数据和预测数据的对比曲线
plt.plot(y_test_one[:2000], color='red', label='real_temp')
plt.plot(predicted_y_lstm_one[:2000], color='blue', label='prediction')
plt.title('Title')
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.show()
代码输出
- R2值评估
from sklearn import metrics
"""
RMSE :均方根误差 -----> 对均方误差开方
R2 :决定系数,可以简单理解为反映模型拟合优度的重要的统计量
"""
RMSE_lstm = metrics.mean_squared_error(predicted_y_lstm_one, y_test_1)**0.5
R2_lstm = metrics.r2_score(predicted_y_lstm_one, y_test_1)
print('均方根误差: %.5f' % RMSE_lstm)
print('R2: %.5f' % R2_lstm)
代码输出
均方根误差: 6.77961
R2: 0.84096
五、拔高尝试
使用第1 ~ 8个时刻的数据预测第 9 ~10个时刻的温度数据,实际上也是把原来的单步预测修改为多步预测,主要改动部分在模型输出部分,再添加预测代码。
将model_lstm的“return out[:, -1:, :]”修改为“return out[:, -2:, :]”,就修改一个地方。
class model_lstm(nn.Module):
def __init__(self):
super(model_lstm, self).__init__()
self.lstm0 = nn.LSTM(input_size=3 ,hidden_size=320,
num_layers=1, batch_first=True)
self.lstm1 = nn.LSTM(input_size=320 ,hidden_size=320,
num_layers=1, batch_first=True)
self.fc0 = nn.Linear(320, 1)
def forward(self, x):
out, hidden1 = self.lstm0(x)
out, _ = self.lstm1(out, hidden1)
out = self.fc0(out)
#return out[:, -1:, :] #取2个预测值,否则经过lstm会得到8*2个预测
#将return out[:, -1:, :]修改如下,就修改一个地方
return out[:, -2:, :]
model = model_lstm()
model
添加预测代码
#拔高练习
test_1 = torch.tensor(dataFrame.iloc[:8 , ].values,dtype=torch.float32).reshape(1,-1,3)
pred_ = model(test_1)
pred_ = np.round(sc.inverse_transform(pred_.detach().numpy().reshape(1,-1)).reshape(-1),2) #NumPy中的round函数将结果四舍五入保留两位小数
real_tem = data.Tem1.iloc[:2].values
print(f"第9~10时刻的温度预测:", pred_)
print("第9~10时刻的真实温度:", real_tem)
代码输出
第9~10时刻的温度预测: [31.4 29.51]
第9~10时刻的真实温度: [25. 25.]