文章目录
- 什么是LeNet
- 代码实现网络架构
什么是LeNet
LeNet是一种经典的卷积神经网络,由Yann LeCun等人在1998年提出。它是深度学习中第一个成功应用于手写数字识别的卷积神经网络,并且被认为是现代卷积神经网络的基础。
LeNet模型包含了多个卷积层和池化层,以及最后的全连接层用于分类。其中,每个卷积层都包含了一个卷积操作和一个非线性激活函数,用于提取输入图像的特征。池化层则用于缩小特征图的尺寸,减少模型参数和计算量。全连接层则将特征向量映射到类别概率上。
代码实现网络架构
如何搭建网络模型参考博客:Pytorch学习笔记(模型训练)
我们采用CIFAR-10数据集进行训练测试,上面网络模型是1个channel的32x32,而我们的数据集是3个channel的32x32,模型结构不变,改变一下输入输出大小。
model.py
:
import torch
from torch import nn
# 搭建网络模型
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.model = nn.Sequential(
nn.Conv2d(3, 16, kernel_size=5, stride=1, padding=0),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=0),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
nn.Flatten(),
nn.Linear(32 * 5 * 5, 120),
nn.ReLU(),
nn.Linear(120, 84),
nn.ReLU(),
nn.Linear(84, 10),
)
def forward(self, x):
x = self.model(x)
return x
# 测试
if __name__ == '__main__':
leNet = LeNet()
input = torch.ones((64, 3, 32, 32))
output = leNet(input)
print(output.shape)
train.py
import torch.optim
import torchvision.datasets
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from learning.lenet.model import LeNet
# 1. 数据集
dataset_train = torchvision.datasets.CIFAR10("./data", train=True, transform=torchvision.transforms.ToTensor(),
download=True)
dataset_test = torchvision.datasets.CIFAR10("./data", train=True, transform=torchvision.transforms.ToTensor(),
download=True)
train_data_size = len(dataset_train)
test_data_size = len(dataset_test)
# 2. 加载数据集
dataloader_train = DataLoader(dataset_train, batch_size=64)
dataloader_test = DataLoader(dataset_test, batch_size=64)
# 3. 搭建model
leNet = LeNet()
if torch.cuda.is_available():
leNet = leNet.cuda()
# 4. 创建损失函数
loss_fn = nn.CrossEntropyLoss()
if torch.cuda.is_available():
loss_fn = loss_fn.cuda()
# 5. 优化器
learning_rate = 0.1
optimizer = torch.optim.SGD(leNet.parameters(), lr=learning_rate) # 随机梯度下降
# 6. 设置训练网络的一些参数
total_train_step = 0 # 记录训练次数
total_test_step = 0 # 训练测试次数
epoch = 5 # 训练轮数
# 补充tensorboard
writer = SummaryWriter("../../logs")
# 开始训练
for i in range(epoch):
print(f"--------第{i+1}轮训练开始--------")
# 训练
leNet.train()
for data in dataloader_train:
imgs, targets = data
if torch.cuda.is_available():
imgs = imgs.cuda()
targets = targets.cuda()
outputs = leNet(imgs)
loss = loss_fn(outputs, targets)
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_train_step += 1
if total_train_step % 100 == 0:
print(f"训练次数:{total_train_step}---loss:{loss.item()}")
writer.add_scalar("train_loss", loss.item(), total_train_step)
# 测试
leNet.eval()
total_test_loss = 0 # 总体的误差
total_accuracy = 0 # 总体的正确率
with torch.no_grad():
for data in dataloader_test:
imgs, targets = data
if torch.cuda.is_available():
imgs = imgs.cuda()
targets = targets.cuda()
outputs = leNet(imgs)
loss = loss_fn(outputs, targets)
total_test_loss += loss.item()
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy += accuracy
print(f"整体测试集上的loss:{total_test_loss}")
print(f"整体测试集上的准确率:{total_accuracy/test_data_size}")
writer.add_scalar("test_loss", total_test_loss, total_test_step)
writer.add_scalar("total_accuracy", total_accuracy/test_data_size, total_test_step)
total_test_step += 1
# 保存每一轮训练的模型
torch.save(leNet, f"leNet_{i+1}.pth")
print("模式已保存")
writer.close()
5轮训练中,第5轮的准确率是最高的,采用第5轮的模型进行测试:
test.py
import torch
import torchvision.transforms
from PIL import Image
from learning.lenet.model import LeNet
# 需要测试的图片
image_path = "../../imgs/airplane.png"
image = Image.open(image_path)
image = image.convert('RGB') # png图片多了一个透明度通道,修改成rgb三个通道
transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)),
torchvision.transforms.ToTensor()])
image = transform(image)
print(image.shape)
# 引入网络架构
# 读取网络模型 如果保存的模型是通过gpu训练出来的,需要添加 map_location=torch.device("cpu")
model_load = torch.load("leNet_5.pth", map_location=torch.device("cpu"))
# 原有的图片是没有bitch-size的,而我们的输入是需要的
image = torch.reshape(image, (1, 3, 32, 32))
model_load.eval()
with torch.no_grad():
outputs = model_load(image)
print(outputs)
classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
print(classes[outputs.argmax(1)])