AlexNet是由Alex Krizhevsky、Ilya Sutskever和Geoffrey Hinton在2012年ImageNet图像分类竞赛中提出的一种经典的卷积神经网络。当时,AlexNet在 ImageNet 大规模视觉识别竞赛中取得了优异的成绩,把深度学习模型在比赛中的正确率提升到一个前所未有的高度。因此,它的出现对深度学习发展具有里程碑式的意义。
基本结构
AlexNet输入为RGB三通道的224 × 224 × 3大小的图像(也可填充为227 × 227 × 3 )。AlexNet 共包含5 个卷积层(包含3个池化)和 3 个全连接层。其中,每个卷积层都包含卷积核、偏置项、ReLU激活函数和局部响应归一化(LRN)模块。第1、2、5个卷积层后面都跟着一个最大池化层,后三个层为全连接层。最终输出层为softmax,将网络输出转化为概率值,用于预测图像的类别。
由于ImageNet数据集太大,本文以MNIST数据集进行代替,修改网络参数,输入通道为1,输出结果为10个。
代码实现
model.py
import torch
from torch import nn
class AlexNet(nn.Module):
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.model = nn.Sequential(
nn.Conv2d(1,96,kernel_size=11,stride=4,padding=1),nn.ReLU(),
nn.MaxPool2d(kernel_size=3,stride=2),
nn.Conv2d(96,256,kernel_size=5,padding=2),nn.ReLU(),
nn.MaxPool2d(kernel_size=3,stride=2),
nn.Conv2d(256,384,kernel_size=3,padding=1),nn.ReLU(),
nn.Conv2d(384,384,kernel_size=3,padding=1),nn.ReLU(),
nn.Conv2d(384,256,kernel_size=3,padding=1),nn.ReLU(),
nn.MaxPool2d(kernel_size=3,stride=2),
nn.Flatten(),
nn.Linear(6400,4096),nn.ReLU(),
nn.Dropout(p=0.5),
nn.Linear(4096,4096),nn.ReLU(),
nn.Dropout(p=0.5),
nn.Linear(4096,10)
)
def forward(self,x):
return self.model(x)
# 验证网络正确性
if __name__ == '__main__':
net = AlexNet()
my_input = torch.ones((64,1,28,28))
my_output = net(my_input)
print(my_output.shape)
train.py
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets
from torchvision.transforms import transforms
from model import AlexNet
# 扫描数据次数
epochs = 10
# 分组大小
batch = 64
# 学习率
learning_rate = 0.01
# 训练次数
train_step = 0
# 测试次数
test_step = 0
# 定义图像转换
transform = transforms.Compose([
transforms.Resize(224),
transforms.ToTensor()
])
# 读取数据
train_dataset = datasets.MNIST(root="./dataset",train=True,transform=transform,download=True)
test_dataset = datasets.MNIST(root="./dataset",train=False,transform=transform,download=True)
# 加载数据
train_dataloader = DataLoader(train_dataset,batch_size=batch,shuffle=True,num_workers=0)
test_dataloader = DataLoader(test_dataset,batch_size=batch,shuffle=True,num_workers=0)
# 数据大小
train_size = len(train_dataset)
test_size = len(test_dataset)
print("训练集大小:{}".format(train_size))
print("验证集大小:{}".format(test_size))
# GPU
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(device)
# 创建网络
net = AlexNet()
net = net.to(device)
# 定义损失函数
loss = nn.CrossEntropyLoss()
loss = loss.to(device)
# 定义优化器
optimizer = torch.optim.SGD(net.parameters(),lr=learning_rate)
writer = SummaryWriter("logs")
# 训练
for epoch in range(epochs):
print("-------------------第 {} 轮训练开始-------------------".format(epoch))
net.train()
for data in train_dataloader:
train_step = train_step + 1
images,targets = data
images = images.to(device)
targets = targets.to(device)
outputs = net(images)
loss_out = loss(outputs,targets)
optimizer.zero_grad()
loss_out.backward()
optimizer.step()
if train_step%100==0:
writer.add_scalar("Train Loss",scalar_value=loss_out.item(),global_step=train_step)
print("训练次数:{},Loss:{}".format(train_step,loss_out.item()))
# 测试
net.eval()
total_loss = 0
total_accuracy = 0
with torch.no_grad():
for data in test_dataloader:
test_step = test_step + 1
images, targets = data
images = images.to(device)
targets = targets.to(device)
outputs = net(images)
loss_out = loss(outputs, targets)
total_loss = total_loss + loss_out
accuracy = (targets == torch.argmax(outputs,dim=1)).sum()
total_accuracy = total_accuracy + accuracy
# 计算精确率
print(total_accuracy)
accuracy_rate = total_accuracy / test_size
print("第 {} 轮,验证集总损失为:{}".format(epoch+1,total_loss))
print("第 {} 轮,精确率为:{}".format(epoch+1,accuracy_rate))
writer.add_scalar("Test Total Loss",scalar_value=total_loss,global_step=epoch+1)
writer.add_scalar("Accuracy Rate",scalar_value=accuracy_rate,global_step=epoch+1)
torch.save(net,"./model/net_{}.pth".format(epoch+1))
print("模型net_{}.pth已保存".format(epoch+1))