案例-图像分类
网络结构: 卷积+BN+激活+池化
数据集介绍
CIFAR-10数据集5万张训练图像、1万张测试图像、10个类别、每个类别有6k个图像,图像大小32×32×3。下图列举了10个类,每一类随机展示了10张图片:
特征图计算
在卷积层和池化层结束后, 将特征图变形成一行n列数据, 计算特征图进行变化, 映射到全连接层时输入层特征为最后一层卷积层经池化后的特征图各维度相乘
具体流程-# Acc: 0.728
# 导包 import numpy as np import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader from torchsummary import summary from torchvision.datasets import CIFAR10 from torchvision.transforms import ToTensor, Compose # Compose: 数据增强(扩充数据集) import time import matplotlib.pyplot as plt batch_size = 16 # 创建数据集 def create_dataset(): torch.manual_seed(21) train = CIFAR10( root='data', train=True, transform=Compose([ToTensor()]) ) test = CIFAR10( root='data', train=False, transform=Compose([ToTensor()]) ) return train, test # 创建模型 class ImgCls(nn.Module): # 定义网络结构 def __init__(self): super(ImgCls, self).__init__() # 定义网络层:卷积层+池化层 self.conv1 = nn.Conv2d(3, 16, stride=1, kernel_size=3) self.batch_norm_layer1 = nn.BatchNorm2d(num_features=16, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True) self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv2 = nn.Conv2d(16, 32, stride=1, kernel_size=3) self.batch_norm_layer2 = nn.BatchNorm2d(num_features=32, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True) self.pool2 = nn.MaxPool2d(kernel_size=2, stride=1) self.conv3 = nn.Conv2d(32, 64, stride=1, kernel_size=3) self.batch_norm_layer3 = nn.BatchNorm2d(num_features=64, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True) self.pool3 = nn.MaxPool2d(kernel_size=2, stride=1) self.conv4 = nn.Conv2d(64, 128, stride=1, kernel_size=2) self.batch_norm_layer4 = nn.BatchNorm2d(num_features=128, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True) self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv5 = nn.Conv2d(128, 256, stride=1, kernel_size=2) self.batch_norm_layer5 = nn.BatchNorm2d(num_features=256, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True) self.pool5 = nn.MaxPool2d(kernel_size=2, stride=1) # 全连接层 self.linear1 = nn.Linear(1024, 2048) self.linear2 = nn.Linear(2048, 1024) self.linear3 = nn.Linear(1024, 512) self.linear4 = nn.Linear(512, 256) self.linear5 = nn.Linear(256, 128) self.out = nn.Linear(128, 10) # 定义前向传播 def forward(self, x): # 第1层: 卷积+BN+激活+池化 x = self.conv1(x) x = self.batch_norm_layer1(x) x = torch.rrelu(x) x = self.pool1(x) # 第2层: 卷积+BN+激活+池化 x = self.conv2(x) x = self.batch_norm_layer2(x) x = torch.rrelu(x) x = self.pool2(x) # 第3层: 卷积+BN+激活+池化 x = self.conv3(x) x = self.batch_norm_layer3(x) x = torch.rrelu(x) x = self.pool3(x) # 第4层: 卷积+BN+激活+池化 x = self.conv4(x) x = self.batch_norm_layer4(x) x = torch.rrelu(x) x = self.pool4(x) # 第5层: 卷积+BN+激活+池化 x = self.conv5(x) x = self.batch_norm_layer5(x) x = torch.rrelu(x) x = self.pool5(x) # 将特征图做成以为向量的形式:相当于特征向量 x = x.reshape(x.size(0), -1) # 将3维特征图转化为1维向量(1, n) # 全连接层 x = torch.rrelu(self.linear1(x)) x = torch.rrelu(self.linear2(x)) x = torch.rrelu(self.linear3(x)) x = torch.rrelu(self.linear4(x)) x = torch.rrelu(self.linear5(x)) # 返回输出结果 return self.out(x) # 训练 def train(model, train_dataset, epochs): torch.manual_seed(21) loss = nn.CrossEntropyLoss() opt = optim.Adam(model.parameters(), lr=1e-4) for epoch in range(epochs): dataloader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size) loss_total = 0 iter = 0 stat_time = time.time() for x, y in dataloader: output = model(x.to(device)) loss_value = loss(output, y.to(device)) opt.zero_grad() loss_value.backward() opt.step() loss_total += loss_value.item() iter += 1 print(f'epoch:{epoch + 1:4d}, loss:{loss_total / iter:6.4f}, time:{time.time() - stat_time:.2f}s') torch.save(model.state_dict(), 'model/img_cls_model.pth') # 测试 def test(valid_dataset, model, batch_size): # 构建数据加载器 dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False) # 计算精度 total_correct = 0 # 遍历每个batch的数据,获取预测结果,计算精度 for x, y in dataloader: output = model(x.to(device)) y_pred = torch.argmax(output, dim=-1) total_correct += (y_pred == y.to(device)).sum() # 打印精度 print(f'Acc: {(total_correct.item() / len(valid_dataset))}') if __name__ == '__main__': batch_size = 16 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 获取数据集 train_data, test_data = create_dataset() # # 查看数据集 # print(f'数据集类别: {train_data.class_to_idx}') # print(f'训练集: {train_data.data.shape}') # print(f'验证集: {test_data.data.shape}') # print(f'类别数量: {len(np.unique(train_data.targets))}') # # 展示图像 # plt.figure(figsize=(8, 8)) # plt.imshow(train_data.data[0]) # plt.title(train_data.classes[train_data.targets[0]]) # plt.show() # 实例化模型 model = ImgCls().to(device) # 查看网络结构 summary(model, (3, 32, 32), device='cuda', batch_size=batch_size) # 模型训练 train(model, train_data, epochs=60) # 加载训练好的模型参数 model.load_state_dict(torch.load('model/img_cls_model.pth')) model.eval() # 模型评估 test(test_data, model, batch_size=16) # Acc: 0.728
调整网络结构
第一次调整: 训练50轮, Acc: 0.71
第二次调整: 训练30轮, Acc:0.7351
第三次调整: batch_size=8, epoch=50 => Acc: 0.7644
# 导包 import numpy as np import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader from torchsummary import summary from torchvision.datasets import CIFAR10 from torchvision.transforms import ToTensor, Compose # Compose: 数据增强(扩充数据集) import time import matplotlib.pyplot as plt batch_size = 16 # 创建数据集 def create_dataset(): torch.manual_seed(21) train = CIFAR10( root='data', train=True, transform=Compose([ToTensor()]) ) test = CIFAR10( root='data', train=False, transform=Compose([ToTensor()]) ) return train, test # 创建模型 class ImgCls(nn.Module): # 定义网络结构 def __init__(self): super(ImgCls, self).__init__() # 定义网络层:卷积层+池化层 self.conv1 = nn.Conv2d(3, 16, stride=1, kernel_size=3, padding=1) self.batch_norm_layer1 = nn.BatchNorm2d(num_features=16, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True) self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv2 = nn.Conv2d(16, 32, stride=1, kernel_size=3, padding=1) self.batch_norm_layer2 = nn.BatchNorm2d(num_features=32, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True) self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv3 = nn.Conv2d(32, 64, stride=1, kernel_size=3, padding=1) self.batch_norm_layer3 = nn.BatchNorm2d(num_features=64, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True) self.pool3 = nn.MaxPool2d(kernel_size=2, stride=1) self.conv4 = nn.Conv2d(64, 128, stride=1, kernel_size=3, padding=1) self.batch_norm_layer4 = nn.BatchNorm2d(num_features=128, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True) self.pool4 = nn.MaxPool2d(kernel_size=2, stride=1) self.conv5 = nn.Conv2d(128, 256, stride=1, kernel_size=3) self.batch_norm_layer5 = nn.BatchNorm2d(num_features=256, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True) self.pool5 = nn.MaxPool2d(kernel_size=2, stride=2) # 全连接层 self.linear1 = nn.Linear(1024, 2048) self.linear2 = nn.Linear(2048, 1024) self.linear3 = nn.Linear(1024, 512) self.linear4 = nn.Linear(512, 256) self.linear5 = nn.Linear(256, 128) self.out = nn.Linear(128, 10) # 定义前向传播 def forward(self, x): # 第1层: 卷积+BN+激活+池化 x = self.conv1(x) x = self.batch_norm_layer1(x) x = torch.relu(x) x = self.pool1(x) # 第2层: 卷积+BN+激活+池化 x = self.conv2(x) x = self.batch_norm_layer2(x) x = torch.relu(x) x = self.pool2(x) # 第3层: 卷积+BN+激活+池化 x = self.conv3(x) x = self.batch_norm_layer3(x) x = torch.relu(x) x = self.pool3(x) # 第4层: 卷积+BN+激活+池化 x = self.conv4(x) x = self.batch_norm_layer4(x) x = torch.relu(x) x = self.pool4(x) # 第5层: 卷积+BN+激活+池化 x = self.conv5(x) x = self.batch_norm_layer5(x) x = torch.rrelu(x) x = self.pool5(x) # 将特征图做成以为向量的形式:相当于特征向量 x = x.reshape(x.size(0), -1) # 将3维特征图转化为1维向量(1, n) # 全连接层 x = torch.relu(self.linear1(x)) x = torch.relu(self.linear2(x)) x = torch.relu(self.linear3(x)) x = torch.relu(self.linear4(x)) x = torch.rrelu(self.linear5(x)) # 返回输出结果 return self.out(x) # 训练 def train(model, train_dataset, epochs): torch.manual_seed(21) loss = nn.CrossEntropyLoss() opt = optim.Adam(model.parameters(), lr=1e-4) for epoch in range(epochs): dataloader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size) loss_total = 0 iter = 0 stat_time = time.time() for x, y in dataloader: output = model(x.to(device)) loss_value = loss(output, y.to(device)) opt.zero_grad() loss_value.backward() opt.step() loss_total += loss_value.item() iter += 1 print(f'epoch:{epoch + 1:4d}, loss:{loss_total / iter:6.4f}, time:{time.time() - stat_time:.2f}s') torch.save(model.state_dict(), 'model/img_cls_model1.pth') # 测试 def test(valid_dataset, model, batch_size): # 构建数据加载器 dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False) # 计算精度 total_correct = 0 # 遍历每个batch的数据,获取预测结果,计算精度 for x, y in dataloader: output = model(x.to(device)) y_pred = torch.argmax(output, dim=-1) total_correct += (y_pred == y.to(device)).sum() # 打印精度 print(f'Acc: {(total_correct.item() / len(valid_dataset))}') if __name__ == '__main__': batch_size = 8 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 获取数据集 train_data, test_data = create_dataset() # # 查看数据集 # print(f'数据集类别: {train_data.class_to_idx}') # print(f'训练集: {train_data.data.shape}') # print(f'验证集: {test_data.data.shape}') # print(f'类别数量: {len(np.unique(train_data.targets))}') # # 展示图像 # plt.figure(figsize=(8, 8)) # plt.imshow(train_data.data[0]) # plt.title(train_data.classes[train_data.targets[0]]) # plt.show() # 实例化模型 model = ImgCls().to(device) # 查看网络结构 summary(model, (3, 32, 32), device='cuda', batch_size=batch_size) # 模型训练 train(model, train_data, epochs=50) # 加载训练好的模型参数 model.load_state_dict(torch.load('model/img_cls_model1.pth', weights_only=True)) model.eval() # 模型评估 test(test_data, model, batch_size=16) # Acc: 0.7644