声明
本文章为个人学习使用,版面观感若有不适请谅解,文中知识仅代表个人观点,若出现错误,欢迎各位批评指正。
二十一、池化层
1、 最大池化层和平均池化层
与互相关运算符一样,汇聚窗口从输入张量的左上角开始,从左往右、从上往下的在输入张量内滑动。在汇聚窗口到达的每个位置,它计算该窗口中输入子张量的最大值或平均值。计算最大值或平均值是取决于使用了最大汇聚层还是平均汇聚层。
2、 torch.stack 和 torch.cat 的区别
-
维度创建:
torch.stack 会在堆叠时创建一个新的维度,将输入张量序列沿着这个新维度进行堆叠。这意味着堆叠后的张量的维度比输入张量序列的维度多一。
torch.cat 不会引入新的维度,只在现有的某个维度上对输入张量进行拼接。 -
拼接方式:
torch.stack 会将输入张量序列按照指定维度进行逐个元素的堆叠,生成一个新的张量。这要求所有输入张量的形状必须相同。
torch.cat 则会对输入张量进行连接,不关心元素的位置,只要各个张量的拼接维度匹配即可。这种连接方式更加灵活,因为它不要求所有输入张量的形状完全相同,只要在拼接的维度上尺寸一致即可。
3、 代码演示
import torch
from torch import nn
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def pool2d(X, pool_size, mode='max'):
p_h, p_w = pool_size
Y = torch.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w + 1)).to(device)
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
if mode == 'max':
Y[i, j] = X[i: i + p_h, j: j + p_w].max().to(device)
elif mode == 'avg':
Y[i, j] = X[i: i + p_h, j: j + p_w].mean().to(device)
return Y
X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
print("pool_size = (2, 2) 时 max : ", pool2d(X, (2, 2)))
print("pool_size = (2, 2) 时 avg : ", pool2d(X, (2, 2), 'avg'))
X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4)).to(device)
print(f'X : {X}')
pool2d = nn.MaxPool2d(3)
print("pool = (3, 3) : ", pool2d(X))
pool2d = nn.MaxPool2d(3, padding=1, stride=2)
print("pool = (3, 3), padding = 1, stride = 2 : ", pool2d(X))
pool2d = nn.MaxPool2d((2, 3), stride=(2, 3), padding=(0, 1))
print("pool = (2, 3), padding = (2, 3), stride = (0, 1) : ", pool2d(X))
##### 多个通道 #####
X = torch.cat((X, X + 1), 1)
print(f'X : {X}')
pool2d = nn.MaxPool2d(3, padding=1, stride=2)
print("pool = (3, 3), padding = 1, stride = 2 : ", pool2d(X))
二十二、卷积神经网络(LeNet)
import torch
import torchvision
import time
from torch import nn
from IPython import display
import matplotlib.pyplot as plt
from matplotlib_inline import backend_inline
from torchvision import transforms
from torch.utils import data
mydevice = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def accuracy(y_hat, y): # 定义一个函数来为预测正确的数量计数
"""计算预测正确的数量"""
if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
y_hat = y_hat.argmax(axis=1)
cmp = y_hat.type(y.dtype) == y # bool 类型,若预测结果与实际结果一致,则为 True
return float(cmp.type(y.dtype).sum())
def evaluate_accuracy_gpu(net, data_iter, device=None):
"""使用GPU计算模型在数据集上的精度"""
if isinstance(net, nn.Module):
net.eval() # 设置为评估模式
if not device:
device = next(iter(net.parameters())).device
# 正确预测的数量,总预测的数量
metric = Accumulator(2)
with torch.no_grad():
for X, y in data_iter:
if isinstance(X, list):
# BERT微调所需的(之后将介绍)
X = [x.to(device) for x in X]
else:
X = X.to(device)
y = y.to(device)
metric.add(accuracy(net(X), y), y.numel())
return metric[0] / metric[1]
def set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend):
axes.set_xlabel(xlabel), axes.set_ylabel(ylabel)
axes.set_xscale(xscale), axes.set_yscale(yscale)
axes.set_xlim(xlim), axes.set_ylim(ylim)
if legend:
axes.legend(legend)
axes.grid()
class Accumulator: # 定义一个实用程序类 Accumulator,用于对多个变量进行累加
"""在n个变量上累加"""
def __init__(self, n):
self.data = [0.0] * n
def add(self, *args):
self.data = [a + float(b) for a, b in zip(self.data, args)]
def reset(self):
self.data = [0.0] * len(self.data)
def __getitem__(self, idx):
return self.data[idx]
class Animator: # 定义一个在动画中绘制数据的实用程序类 Animator
"""在动画中绘制数据"""
def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,
ylim=None, xscale='linear', yscale='linear',
fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,
figsize=(3.5, 2.5)):
# 增量地绘制多条线
if legend is None:
legend = []
backend_inline.set_matplotlib_formats('svg')
self.fig, self.axes = plt.subplots(nrows, ncols, figsize=figsize)
if nrows * ncols == 1:
self.axes = [self.axes, ]
# 使用lambda函数捕获参数
self.config_axes = lambda: set_axes(
self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
self.X, self.Y, self.fmts = None, None, fmts
def add(self, x, y):
# Add multiple data points into the figure
if not hasattr(y, "__len__"):
y = [y]
n = len(y)
if not hasattr(x, "__len__"):
x = [x] * n
if not self.X:
self.X = [[] for _ in range(n)]
if not self.Y:
self.Y = [[] for _ in range(n)]
for i, (a, b) in enumerate(zip(x, y)):
if a is not None and b is not None:
self.X[i].append(a)
self.Y[i].append(b)
self.axes[0].cla()
for x, y, fmt in zip(self.X, self.Y, self.fmts):
self.axes[0].plot(x, y, fmt)
self.config_axes()
display.display(self.fig)
# 通过以下两行代码实现了在PyCharm中显示动图
plt.draw()
plt.pause(interval=0.001)
display.clear_output(wait=True)
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
class Timer:
def __init__(self):
self.times = []
self.start()
def start(self):
self.tik = time.time()
def stop(self):
self.times.append(time.time() - self.tik)
return self.times[-1]
def sum(self):
"""Return the sum of time."""
return sum(self.times)
def load_data_fashion_mnist(batch_size, resize=None):
"""下载 Fashion-MNIST 数据集,然后将其加载到内存中"""
trans = [transforms.ToTensor()]
if resize:
trans.insert(0, transforms.Resize(resize))
trans = transforms.Compose(trans)
mnist_train = torchvision.datasets.FashionMNIST(
root="../data", train=True, transform=trans, download=False)
mnist_test = torchvision.datasets.FashionMNIST(
root="../data", train=False, transform=trans, download=False)
return (data.DataLoader(mnist_train, batch_size, shuffle=True,
num_workers=4),
data.DataLoader(mnist_test, batch_size, shuffle=False,
num_workers=4))
net = nn.Sequential(
nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Flatten(),
nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(),
nn.Linear(120, 84), nn.Sigmoid(),
nn.Linear(84, 10))
""" 尝试更换不同的激活函数及将平均池化层替换为最大池化层 """
net_demo = nn.Sequential(
nn.Conv2d(1, 8, kernel_size=5, padding=2), nn.Tanh(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(8, 32, kernel_size=5), nn.Tanh(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Flatten(),
nn.Linear(32 * 5 * 5, 128), nn.Tanh(),
nn.Linear(128, 84), nn.Tanh(),
nn.Linear(84, 10))
X = torch.rand(size=(1, 1, 28, 28), dtype=torch.float32)
for layer in net:
X = layer(X)
print(layer.__class__.__name__,'output shape: \t\t',X.shape)
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size=batch_size)
def train(net, train_iter, test_iter, num_epochs, lr, device):
def init_weights(m):
if type(m) == nn.Linear or type(m) == nn.Conv2d:
nn.init.xavier_uniform_(m.weight)
net.apply(init_weights)
print('training on', torch.cuda.get_device_name(device))
net.to(device)
optimizer = torch.optim.SGD(net.parameters(), lr=lr)
loss = nn.CrossEntropyLoss()
animator = Animator(xlabel='epoch', xlim=[1, num_epochs],
legend=['train loss', 'train acc', 'test acc'])
timer, num_batches = Timer(), len(train_iter)
for epoch in range(num_epochs):
# 训练损失之和,训练准确率之和,样本数
metric = Accumulator(3)
net.train()
for i, (X, y) in enumerate(train_iter):
timer.start()
optimizer.zero_grad()
X, y = X.to(device), y.to(device)
y_hat = net(X)
l = loss(y_hat, y)
l.backward()
optimizer.step()
with torch.no_grad():
metric.add(l * X.shape[0], accuracy(y_hat, y), X.shape[0])
timer.stop()
train_l = metric[0] / metric[2]
train_acc = metric[1] / metric[2]
if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1:
animator.add(epoch + (i + 1) / num_batches,
(train_l, train_acc, None))
test_acc = evaluate_accuracy_gpu(net, test_iter)
animator.add(epoch + 1, (None, None, test_acc))
plt.title(f'loss {train_l:.3f}, train acc {train_acc:.3f}, test acc {test_acc:.3f}\n'
f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec on {str(device)}')
plt.show()
lr, num_epochs = 0.9, 10
train(net, train_iter, test_iter, num_epochs, lr, mydevice)
def show_images(imgs, num_rows, num_cols, titles=None, scale=1.5):
figsize = (num_cols * scale, num_rows * scale)
_, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
axes = axes.flatten()
for i, (ax, img) in enumerate(zip(axes, imgs)):
try:
numpy = lambda x, *args, **kwargs: x.detach().numpy(*args, **kwargs)
img = numpy(img)
except:
pass
ax.imshow(img)
ax.axes.get_xaxis().set_visible(False)
ax.axes.get_yaxis().set_visible(False)
if titles:
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
plt.title(titles)
plt.show()
return axes
def show_activate(net, train_iter, num_epochs, lr, device):
def init_weights(m):
if type(m) == nn.Linear or type(m) == nn.Conv2d:
nn.init.xavier_uniform_(m.weight)
net.apply(init_weights)
print('training on', torch.cuda.get_device_name(device))
net.to(device)
optimizer = torch.optim.SGD(net.parameters(), lr=lr)
loss = nn.CrossEntropyLoss()
Animator(xlabel='epoch', xlim=[1, num_epochs], legend=['train loss', 'train acc', 'test acc'])
for epoch in range(num_epochs):
net.train()
for i, (X, y) in enumerate(train_iter):
optimizer.zero_grad()
X, y = X.to(device), y.to(device)
y_hat = net(X)
l = loss(y_hat, y)
l.backward()
optimizer.step()
torch.no_grad()
x_first_Sigmoid_layer = net[0:2](X)[0:9, 1, :, :]
show_images(x_first_Sigmoid_layer.reshape(9, 28, 28).cpu().detach(), 1, 9, titles=f'第一次 {net[1]}')
x_second_Sigmoid_layer = net[0:5](X)[0:9, 1, :, :]
show_images(x_second_Sigmoid_layer.reshape(9, 10, 10).cpu().detach(), 1, 9, titles=f'第二次 {net[4]}')
""" 可多尝试不同情况 """
show_activate(net_demo, train_iter, num_epochs, lr, mydevice)
net = nn.Sequential( nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(), nn.AvgPool2d(kernel_size=2, stride=2), nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(), nn.AvgPool2d(kernel_size=2, stride=2), nn.Flatten(), nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(), nn.Linear(120, 84), nn.Sigmoid(), nn.Linear(84, 10))
net_demo = nn.Sequential( nn.Conv2d(1, 8, kernel_size=5, padding=2), nn.Sigmoid(), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(8, 32, kernel_size=5), nn.Sigmoid(), nn.MaxPool2d(kernel_size=2, stride=2), nn.Flatten(), nn.Linear(32 * 5 * 5, 128), nn.Sigmoid(), nn.Linear(128, 84), nn.Sigmoid(), nn.Linear(84, 10))
net_demo = nn.Sequential( nn.Conv2d(1, 8, kernel_size=5, padding=2), nn.Tanh(), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(8, 32, kernel_size=5), nn.Sigmoid(), nn.MaxPool2d(kernel_size=2, stride=2), nn.Flatten(), nn.Linear(32 * 5 * 5, 128), nn.Sigmoid(), nn.Linear(128, 84), nn.Sigmoid(), nn.Linear(84, 10))
文中部分知识参考:B 站 —— 跟李沐学AI;百度百科