RNN Cell:
h0和x1生成h1,把h1作为输出送到下一次的RNN Cell里面。(h1=linear(h0,x1))
RNN计算过程:
输入先做线性变换,循环神经网络常用的激活函数是tanh(±1区间)。
构造RNN Cell:
代码:
import torch
batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
# Construction of RNNCell
cell = torch.nn.RNNCell(input_size=input_size, hidden_size=hidden_size)
# Wrapping the sequence into:(seqLen,batchSize,InputSize)
dataset = torch.randn(seq_len, batch_size, input_size) # (3,1,4) ; 序列数据
# Initializing the hidden to zero
hidden = torch.zeros(batch_size, hidden_size) # (1,2) ;隐层,全0
for idx, input in enumerate(dataset):
print('=' * 20, idx, '=' * 20) #分割线,20个=号
print('Input size:', input.shape) # (batch_size, input_size)
# 按序列依次输入到cell中,seq_len=3,故循环3次
hidden = cell(input, hidden) # 返回的hidden是下一次的输入之一,循环使用同一个cell;这一次的输入+上一次的隐层
#隐层维度是batch_size×hidden_size,就是“torch.Size([1,2])”
print('output size:', hidden.shape) # (batch_size, hidden_size)
print(hidden)
RNN本质是一个线性层。
用RNN首先要把维度搞清楚,数据集的维度多了序列这样一个维度。
每一层都有对应输出。同样颜色的RNN Cell都是一个线性层,也就是说一共只有3个线性层。
代码:
import torch
batch_size = 1 #参数构造
seq_len = 3
input_size = 4
hidden_size = 2
num_layers = 1 # RNN层数
# Construction of RNN
rnn = torch.nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
# Wrapping the sequence into:(seqLen,batchSize,InputSize)
inputs = torch.randn(seq_len, batch_size, input_size) # (3,1,4)
# Initializing the hidden to zero
hidden = torch.zeros(num_layers, batch_size, hidden_size) # (1,1,2),隐层维数
output, hidden = rnn(inputs, hidden) # RNN内部包含了循环,故这里只需把整个序列输入即可
print('Output size:', output.shape) # (seq_len, batch_size, hidden_size)
print('Output:', output)
print('Hidden size:', hidden.shape) # (num_layers, batch_size, hidden_size)
print('Hidden:', hidden)
预测字符串(RNN Cell):
import torch
# 1、确定参数
input_size = 4
hidden_size = 4
batch_size = 1
# 2、准备数据
index2char = ['e', 'h', 'l', 'o'] #字典
x_data = [1, 0, 2, 2, 3] #用字典中的索引(数字)表示来表示hello
y_data = [3, 1, 2, 3, 2] #标签:ohlol
one_hot_lookup = [[1, 0, 0, 0], # 用来将x_data转换为one-hot向量的参照表
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]]
x_one_hot = [one_hot_lookup[x] for x in x_data] #将x_data转换为one-hot向量
inputs = torch.Tensor(x_one_hot).view(-1, batch_size, input_size) #(𝒔𝒆𝒒𝑳𝒆𝒏,𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆,𝒊𝒏𝒑𝒖𝒕𝑺𝒊𝒛𝒆)
labels = torch.LongTensor(y_data).view(-1, 1) # (seqLen*batchSize,𝟏).计算交叉熵损失时标签不需要我们进行one-hot编码,其内部会自动进行处理
# 3、构建模型
class Model(torch.nn.Module):
def __init__(self, input_size, hidden_size, batch_size):
super(Model, self).__init__()
self.batch_size = batch_size
self.input_size = input_size
self.hidden_size = hidden_size
self.rnncell = torch.nn.RNNCell(input_size=self.input_size, hidden_size=self.hidden_size)
def forward(self, input, hidden):
hidden = self.rnncell(input, hidden)
return hidden
def init_hidden(self): #生成初始化隐藏层,需要batch_size。
return torch.zeros(self.batch_size, self.hidden_size)
net = Model(input_size, hidden_size, batch_size)
# 4、损失和优化器
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.1) # Adam优化器
# 5、训练
for epoch in range(15):
loss = 0
optimizer.zero_grad() #梯度清零
hidden = net.init_hidden() # 初始化隐藏层
print('Predicted string:', end='')
for input, label in zip(inputs, labels): #每次输入一个字符,即按序列次序进行循环。(input=seq×b×inputsize)
hidden = net(input, hidden)
loss += criterion(hidden, label) # 计算损失,不用item(),因为后面还要反向传播
_, idx = hidden.max(dim=1) # 选取最大值的索引
print(index2char[idx.item()], end='') # 打印预测的字符
loss.backward() # 反向传播
optimizer.step() # 更新参数
print(', Epoch [%d/15] loss: %.4f' % (epoch + 1, loss.item()))
预测字符串(RNN):
import torch
# 1、确定参数
seq_len = 5
input_size = 4
hidden_size = 4
batch_size = 1
# 2、准备数据
index2char = ['e', 'h', 'l', 'o'] #字典。将来可以根据索引把字母拿出来
x_data = [1, 0, 2, 2, 3] #用字典中的索引(数字)表示来表示hello
y_data = [3, 1, 2, 3, 2] #标签:ohlol
one_hot_lookup = [[1, 0, 0, 0], # 用来将x_data转换为one-hot向量的参照表
[0, 1, 0, 0], # 独热向量
[0, 0, 1, 0],
[0, 0, 0, 1]]
x_one_hot = [one_hot_lookup[x] for x in x_data] #将x_data转换为one-hot向量
inputs = torch.Tensor(x_one_hot).view(seq_len, batch_size,
input_size) #(𝒔𝒆𝒒𝑳𝒆𝒏,𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆,𝒊𝒏𝒑𝒖𝒕𝑺𝒊𝒛𝒆)
labels = torch.LongTensor(y_data)
# 3、构建模型
class Model(torch.nn.Module):
def __init__(self, input_size, hidden_size, batch_size, num_layers=1):
super(Model, self).__init__()
self.num_layers = num_layers # 1
self.batch_size = batch_size # 1
self.input_size = input_size # 4
self.hidden_size = hidden_size # 4
self.rnn = torch.nn.RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=num_layers)
def forward(self, input):
hidden = torch.zeros(self.num_layers, self.batch_size, self.hidden_size)
out, _ = self.rnn(input, hidden) # out: tensor of shape (seq_len, batch, hidden_size)
return out.view(-1, self.hidden_size) # 将输出的三维张量转换为二维张量,(𝒔𝒆𝒒𝑳𝒆𝒏×𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆,𝒉𝒊𝒅𝒅𝒆𝒏𝑺𝒊𝒛𝒆)
def init_hidden(self): #初始化隐藏层,需要batch_size
return torch.zeros(self.batch_size, self.hidden_size)
net = Model(input_size, hidden_size, batch_size)
# 4、损失和优化器
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05) # Adam优化器
# 5、训练步骤
for epoch in range(15):
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
_, idx = outputs.max(dim=1)
idx = idx.data.numpy()
print('Predicted string: ', ''.join([index2char[x] for x in idx]), end='')
print(', Epoch [%d/15] loss: %.4f' % (epoch + 1, loss.item()))
使用embedding and linear layer:
import torch
# 1、确定参数
num_class = 4
input_size = 4
hidden_size = 8
embedding_size = 10
num_layers = 2
batch_size = 1
seq_len = 5
# 2、准备数据
index2char = ['e', 'h', 'l', 'o'] #字典
x_data = [[1, 0, 2, 2, 3]] # (batch_size, seq_len) 用字典中的索引(数字)表示来表示hello
y_data = [3, 1, 2, 3, 2] # (batch_size * seq_len) 标签:ohlol
inputs = torch.LongTensor(x_data) # (batch_size, seq_len)
labels = torch.LongTensor(y_data) # (batch_size * seq_len)
# 3、构建模型
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.emb = torch.nn.Embedding(num_class, embedding_size)
self.rnn = torch.nn.RNN(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layers,
batch_first=True)
self.fc = torch.nn.Linear(hidden_size, num_class)
def forward(self, x):
hidden = torch.zeros(num_layers, x.size(0), hidden_size) # (num_layers, batch_size, hidden_size)
x = self.emb(x) # 返回(batch_size, seq_len, embedding_size)
x, _ = self.rnn(x, hidden) # 返回(batch_size, seq_len, hidden_size)
x = self.fc(x) # 返回(batch_size, seq_len, num_class)
return x.view(-1, num_class) # (batch_size * seq_len, num_class)
net = Model()
# 4、损失和优化器
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05) # Adam优化器
# 5、训练
for epoch in range(15):
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
_, idx = outputs.max(dim=1)
idx = idx.data.numpy()
print('Predicted string: ', ''.join([index2char[x] for x in idx]), end='')
print(', Epoch [%d/15] loss: %.4f' % (epoch + 1, loss.item()))
番外:LSTM