本文主要参考沐神的视频教程
https://www.bilibili.com/video/BV1Ao4y117Pd/spm_id_from=autoNext&vd_source=c7bfc6ce0ea0cbe43aa288ba2713e56d
文档教程 https://zh-v2.d2l.ai/
本文的主要内容对沐神提供的代码中个人不太理解的内容进行笔记记录,内容不会特别严谨仅供参考。
1.函数目录
1.1 python
python | 位置 |
---|---|
可变参数解包 | 3.1 |
2. VGG
- AleNet比LeNet更深更大来得到更好的精度
- 能不能更深和更大呢?
- 选项
-
- 更多的全连接层(太贵)
-
- 更多的卷积层
-
- 将卷积层组合成块
2.1 VGG块
- 深VS宽?
-
- 5x5卷积
-
- 3x3卷积
-
- 深但窄效果更好
- VGG块
-
- 3x3卷积(填充为1)(n层,m通道)
-
- 2x2最大池化层(步幅2)
- 2x2最大池化层(步幅2)
2.2 VGG架构
- 多个VGG块后面接全连接层
- 不同次数的重复块得到不同的架构VGG-16、VGG-19
从AlexNet到VGG,它们本质上都是块设计。
3 代码实现
3.1 VGG块
3.1.1 可变参数解包
- 函数定义时使用 *
当在函数定义中使用 * 时,表示函数可以接受任意数量的位置参数,并将这些参数存储为一个元组。例如:
def foo(*args):
for arg in args:
print(arg)
foo(1, 2, 3) # Output: 1 2 3
- 函数调用时使用 *
在函数调用时使用 * 可以将一个可迭代对象解包为单独的参数传递给函数。例如:
def foo(a, b, c):
print(a, b, c)
values = (1, 2, 3)
foo(*values) # Output: 1 2 3
values = [1, 2, 3]
foo(*values) # Output: 1 2 3
*values 将列表 values 解包为三个单独的参数传递给函数 foo。
def vgg_block(num_convs, in_channels, out_channels):
layers = []
for _ in range(num_convs):
layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3,padding=1))
layers.append(nn.ReLU())
in_channels = out_channels
layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
return nn.Sequential(*layers)
3.2 VGG网络
原始VGG网络有5个卷积块,其中前两个块各有一个卷积层,后三个块各包含两个卷积层。 第一个模块有64个输出通道,每个后续模块将输出通道数量翻倍,直到该数字达到512。由于该网络使用8个卷积层和3个全连接层,因此它通常被称为VGG-11。
import torch
from torch import nn
def vgg_block(num_convs, in_channels, out_channels):
layers = []
for _ in range(num_convs):
layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3,padding=1))
layers.append(nn.ReLU())
in_channels = out_channels
layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
return nn.Sequential(*layers)
conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))
def vgg(conv_arch):
conv_blks = []
in_channels = 1
for (num_convs, out_channels) in conv_arch:
conv_blks.append(vgg_block(num_convs, in_channels, out_channels))
in_channels = out_channels
return nn.Sequential(
*conv_blks, nn.Flatten(),
nn.Linear(out_channels*7*7, 4096), nn.ReLU(), nn.Dropout(p=0.5),
nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(p=0.5),
nn.Linear(4096, 10)
)
# net = vgg(conv_arch)
# X = torch.rand((1,1,224,224), dtype=torch.float32)
# for layer in net:
# X = layer(X)
# print(layer.__class__.__name__, 'output shape:\t',X.shape)
3.3 训练
import torch
from torch import nn
import model
import tools
from model import vgg
from d2l import torch as d2l
import pandas as pd
from tools import *
if __name__ == "__main__":
batch_size = 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size,resize=224)
conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))
ratio = 4
small_conv_arch = [(pair[0], pair[1]//ratio) for pair in conv_arch]
net = vgg(small_conv_arch)
lr, num_epochs = 0.05, 10
train_process = train_ch6(net,train_iter,test_iter,num_epochs,lr,tools.try_gpu())
tools.matplot_acc_loss(train_process)
tools模块
import pandas as pd
import torch
import matplotlib.pyplot as plt
from torch import nn
import time
import numpy as np
class Timer: #@save
"""记录多次运行时间"""
def __init__(self):
self.times = []
self.start()
def start(self):
"""启动计时器"""
self.tik = time.time()
def stop(self):
"""停止计时器并将时间记录在列表中"""
self.times.append(time.time() - self.tik)
return self.times[-1]
def avg(self):
"""返回平均时间"""
return sum(self.times) / len(self.times)
def sum(self):
"""返回时间总和"""
return sum(self.times)
def cumsum(self):
"""返回累计时间"""
return np.array(self.times).cumsum().tolist()
argmax = lambda x, *args, **kwargs: x.argmax(*args, **kwargs) #返回最大值的索引下标
astype = lambda x, *args, **kwargs: x.type(*args, **kwargs) # 转换数据类型
reduce_sum = lambda x, *args, **kwargs: x.sum(*args, **kwargs) # 求和
# 对多个变量累加
class Accumulator:
"""For accumulating sums over `n` variables."""
def __init__(self, n):
"""Defined in :numref:`sec_utils`"""
self.data = [0.0] * n
def add(self, *args):
self.data = [a + float(b) for a, b in zip(self.data, args)]
def reset(self):
self.data = [0.0] * len(self.data)
def __getitem__(self, idx):
return self.data[idx]
# 计算正确预测的数量
def accuracy(y_hat, y):
"""Compute the number of correct predictions.
Defined in :numref:`sec_utils`"""
if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
y_hat = argmax(y_hat, axis=1)
cmp = astype(y_hat, y.dtype) == y
return float(reduce_sum(astype(cmp, y.dtype)))
# 单轮训练
def train_epoch(net, train_iter, loss, trainer):
if isinstance(net, nn.Module):
net.train()
metric_train = Accumulator(3)
for X, y in train_iter:
y_hat = net(X)
l = loss(y_hat, y)
if isinstance(trainer, torch.optim.Optimizer):
trainer.zero_grad()
l.mean().backward()
trainer.step()
else:
l.sum().backward()
trainer(X.shape[0])
metric_train.add(float(l.sum()), accuracy(y_hat, y), y.numel())
#返回训练损失和训练精度
return metric_train[0]/metric_train[2], metric_train[1]/metric_train[2]
# 单轮训练
def train_epoch_gpu(net, train_iter, loss, trainer,device):
if isinstance(net, nn.Module):
net.train()
metric_train = Accumulator(3)
for i, (X, y) in enumerate(train_iter):
X, y = X.to(device), y.to(device)
y_hat = net(X)
l = loss(y_hat, y)
if isinstance(trainer, torch.optim.Optimizer):
trainer.zero_grad()
l.backward()
trainer.step()
else:
l.sum().backward()
trainer(X.shape[0])
metric_train.add(l * X.shape[0], accuracy(y_hat, y), X.shape[0])
#返回训练损失和训练精度
return metric_train[0]/metric_train[2], metric_train[1]/metric_train[2]
# 用于计算验证集上的准确率
def evalution_loss_accuracy(net, data_iter, loss):
if isinstance(net, torch.nn.Module):
net.eval()
meteric = Accumulator(3)
with torch.no_grad():
for X, y in data_iter:
l = loss(net(X), y)
meteric.add(float(l.sum())*X.shape[0], accuracy(net(X), y), X.shape[0])
return meteric[0]/meteric[2], meteric[1]/meteric[2]
# 用于计算验证集上的准确率
def evalution_loss_accuracy_gpu(net, data_iter, loss, device='None'):
if isinstance(net, torch.nn.Module):
net.eval()
if not device:
#将net层的第一个元素拿出来看其在那个设备上
device = next(iter(net.parameters())).device
meteric = Accumulator(3)
with torch.no_grad():
for X, y in data_iter:
if isinstance(X, list):
X = [x.to(device) for x in X]
else:
X = X.to(device) # 赋值给 X,将数据移动到GPU中
y = y.to(device) # 赋值给 y,将数据移动到GPU中
l = loss(net(X), y)
meteric.add(l * X.shape[0], accuracy(net(X), y), X.shape[0])
# meteric.add(float(l.sum()), accuracy(net(X), y), y.numel()) # 转为浮点数
return meteric[0]/meteric[2], meteric[1]/meteric[2]
def matplot_acc_loss(train_process):
# 显示每一次迭代后的训练集和验证集的损失函数和准确率
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(train_process['epoch'], train_process.train_loss_all, "ro-", label="Train loss")
plt.plot(train_process['epoch'], train_process.val_loss_all, "bs-", label="Val loss")
plt.legend()
plt.xlabel("epoch")
plt.ylabel("Loss")
plt.subplot(1, 2, 2)
plt.plot(train_process['epoch'], train_process.train_acc_all, "ro-", label="Train acc")
plt.plot(train_process['epoch'], train_process.val_acc_all, "bs-", label="Val acc")
plt.xlabel("epoch")
plt.ylabel("acc")
plt.legend()
plt.show()
def gpu(i=0):
"""Get a GPU device.
Defined in :numref:`sec_use_gpu`"""
return torch.device(f'cuda:{i}')
def cpu():
"""Get the CPU device.
Defined in :numref:`sec_use_gpu`"""
return torch.device('cpu')
def num_gpus():
"""Get the number of available GPUs.
Defined in :numref:`sec_use_gpu`"""
return torch.cuda.device_count()
def try_gpu(i=0):
"""Return gpu(i) if exists, otherwise return cpu().
Defined in :numref:`sec_use_gpu`"""
if num_gpus() >= i + 1:
return gpu(i)
return cpu()
def train_ch6(net, train_iter, test_iter, num_epochs, lr, device):
"""用GPU训练模型(在第六章定义)"""
#模型参数初始化
def init_weights(m):
if type(m) == nn.Linear or type(m) == nn.Conv2d:
nn.init.xavier_uniform_(m.weight)
net.apply(init_weights)
print("training on", device)
net.to(device)
# 定义优化器
ptimizer = torch.optim.SGD(net.parameters(), lr=lr)
# 定义损失函数
loss = nn.CrossEntropyLoss()
# 训练集损失函数
# 训练集损失列表
train_loss_all = []
train_acc_all = []
# 验证集损失列表
val_loss_all = []
val_acc_all = []
timer = Timer()
timer.start()
for epoch in range(num_epochs):
train_loss, train_acc = train_epoch_gpu(net, train_iter, loss, ptimizer, device)
val_loss, val_acc = evalution_loss_accuracy_gpu(net, test_iter, loss, device)
train_loss_all.append(train_loss)
train_acc_all.append(train_acc)
val_loss_all.append(val_loss)
val_acc_all.append(val_acc)
print("{} train loss:{:.4f} train acc: {:.4f}".format(epoch, train_loss_all[-1], train_acc_all[-1]))
print("{} val loss:{:.4f} val acc: {:.4f}".format(epoch, val_loss_all[-1], val_acc_all[-1]))
print("训练和验证耗费的时间{:.0f}m{:.0f}s".format(timer.stop() // 60, timer.stop() % 60))
train_process = pd.DataFrame(data={"epoch": range(num_epochs),
"train_loss_all": train_loss_all,
"val_loss_all": val_loss_all,
"train_acc_all": train_acc_all,
"val_acc_all": val_acc_all, })
return train_process
4 QA
问题20:在视觉领域人工特征的研究还没有进展?研究如何设计更好的特征是不是也还有意义?尤其是提升研究能力方面
目前还是很少的论文去做人工特征的研究,特别是在主流的视觉领域期刊上。
问题23:训练loss一直下降 测试loss 从开始起就一点不降 成水平状 是什么原因呢?
可能原因1:代码写错了
可能原因2:过拟合了