文章目录
- 摘要
- Abstract
- 1 完整模型训练套路及模型验证套路
- 1.1 模型及训练代码
- 1.2 利用GPU训练模型
- 1.3 完整的模型验证(测试)套路
- 2 CNN 实现mnist手写数字识别
- 2.1 网络模型搭建
- 2.2 测试训练好的模型
- 总结
摘要
通过学习CNN模型的训练及验证套路,对模型训练以及模型验证套路有了基本认识,并趁热打铁使用CNN模型实现mnist手写数字识别的实操
Abstract
By learning the training and verification routines of CNN model, I have a basic understanding of the training and verification routines of the model, and use the CNN model to realize the practical operation of mnist handwritten digit recognition
1 完整模型训练套路及模型验证套路
完整模型训练套路(以CIFAR10数据集为例)
1.1 模型及训练代码
model.py
import torch
import torch.nn as nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
# model
class model(nn.Module):
def __init__(self):
super(model, self).__init__()
self.m = Sequential(
Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2),
MaxPool2d(2),
Conv2d(32, 32, 5, 1, 2),
MaxPool2d(2),
Conv2d(32, 64, 5, 1, 2),
MaxPool2d(2),
Flatten(),
Linear(64*4*4, 64),
Linear(64, 10)
)
def forward(self,x):
return self.m(x)
# 在该模块中测试model
if __name__ == '__main__':
m = model()
input = torch.ones([64, 3, 32, 32])
output = m(input)
print(output.shape)
补充:
- argmax()使用
import torch
output = torch.tensor([[0.1, 0.5],
[0.2, 0.4]])
# dim = 1 数组横向比较中较大的下标
print(output.argmax(dim=1)) # tensor([1, 1])
# dim = 0 数组纵向比较中较大的下标
print(output.argmax(dim=0)) # tensor([1, 0])
train,py
import torch
import torchvision
from torch.utils.data import DataLoader
from torch.nn import CrossEntropyLoss
from torch.utils.tensorboard import SummaryWriter
# 引入网络模型
from model import *
# 训练数据集
train_data = torchvision.datasets.CIFAR10("dataset2", train=True, transform=torchvision.transforms.ToTensor())
# 测试数据集
test_data = torchvision.datasets.CIFAR10("dataset2", train=False, transform=torchvision.transforms.ToTensor())
# 数据长度
train_data_size = len(train_data)
test_data_size =len(test_data)
print("训练集数据长度为:{}".format(train_data_size)) # 50000
print("测试集数据长度为:{}".format(test_data_size)) # 10000
# 利用DataLoader来加载数据集
train_Dataloader = DataLoader(train_data, batch_size=64)
test_Dataloader = DataLoader(test_data, batch_size=64)
# 创建网络模型
mm = model()
# 损失函数
loss_fn = CrossEntropyLoss()
# 优化器
# 学习率:learing_rate = 0.01
# 1e-2 = 1×(10)^(-2)=1/100 = 0.01
learning_rate = 1e-2
optimizer = torch.optim.SGD(mm.parameters(),lr=learning_rate)
# 设置训练网络的一些参数
# 记录训练的次数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 训练的轮数
epoch = 20
# 添加tensorboard
writer = SummaryWriter("logs_train")
# 训练的轮数
for i in range(epoch):
print("--------第{}轮训练开始------".format(i+1))
# 训练步骤开始
mm.train()
for data in train_Dataloader:
imgs, target = data
outputs = mm(imgs)
loss = loss_fn(outputs,target)
# 优化器优化模型
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_train_step = total_train_step + 1
# 训练次数逢百,记录一下
if total_train_step % 100 == 0:
print("训练次数:{},Loss:{}".format(total_train_step, loss))
writer.add_scalar("train_loss", loss.item(), global_step=total_train_step)
# 测试步骤开始
mm.eval()
# 记录整个测试集上的loss
total_test_loss = 0
# 整体正确的个数
total_accuracy = 0
with torch.no_grad():
for data in test_Dataloader:
imgs, target = data
outputs = mm(imgs)
loss = loss_fn(outputs, target)
total_test_loss = total_test_loss + loss.item()
accuracy = (outputs.argmax(1) == target).sum()
total_accuracy = total_accuracy + accuracy
print("整体测试集上的Loss:{}".format(total_test_loss))
print("整体测试集上的正确率:{}".format(total_accuracy/test_data_size))
writer.add_scalar("test_loss",total_test_loss, total_test_step)
writer.add_scalar("test_accuracy", total_accuracy/test_data_size, total_test_step)
total_test_step = total_test_step + 1
torch.save(mm, "model_{}.pth".format(i))
print("模型已保存")
writer.close()
注:在train.py代码中,在模型开始训练和开始测试前分别加上model.train() 和 model.eval() ,在模型中有dropout、BatchNorm等网络层时,会起作用
1.2 利用GPU训练模型
xx = xx.cuda()
if torch.cuda.is_available():
# 网络模型使用GPU
module = module.cuda()
if torch.cuda.is_available():
# 损失函数使用GPU
loss_func = loss_func.cuda()
# 训练数据使用GPU
if torch.cuda.is_available():
imgs = imgs.cuda()
targets = targets.cuda()
# 测试数据使用GPU
if torch.cuda.is_available():
imgs = imgs.cuda()
targets = targets.cuda()
xx = xx.to(device)
# 定义训练的设备
# device = torch.device("cuda:0")
# device = torch.device("cuda:1")
device = torch.device("cuda")
# 网络模型使用GPU
module = module.to(device)
# 损失函数使用GPU
loss_func = loss_func.to(device)
# 训练数据使用GPU
imgs = imgs.to(device)
targets = targets.to(device)
# 测试数据使用GPU
imgs = imgs.to(device)
targets = targets.to(device)
1.3 完整的模型验证(测试)套路
利用已经训练好的模型,给它提供输入
# 测试model
import torch
from PIL import Image
import torch.nn as nn
import torchvision
# 读取测试图片
img_path = "./images/horse.png"
img = Image.open(img_path)
print(img) # <PIL.PngImagePlugin.PngImageFile image mode=RGBA size=500x299 at 0x1DB5DC6E880>
transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)),
torchvision.transforms.ToTensor()])
# 将其改为三通道
# 将img转换为RGB的形式
img = img.convert('RGB')
img = transform(img)
# torch.Size([3, 32, 32])
print(img.shape)
# 加载已训练好的模型
# model
class model(nn.Module):
def __init__(self):
super(model, self).__init__()
self.m = Sequential(
Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2),
MaxPool2d(2),
Conv2d(32, 32, 5, 1, 2),
MaxPool2d(2),
Conv2d(32, 64, 5, 1, 2),
MaxPool2d(2),
Flatten(),
Linear(64*4*4, 64),
Linear(64, 10)
)
def forward(self,x):
return self.m(x)
m = torch.load("model_19.pth", map_location=torch.device('cpu'))
print(model)
img = torch.reshape(img, (1, 3, 32, 32))
# 验证开始
m.eval()
# 不进行反向传播,计算梯度
with torch.no_grad():
result = m(img)
print(result)
print(result.argmax(1).item())
测试图片:
输入数组中,下标7的值最大,对应为horse
# 使用gpu训练保存的模型在cpu上使用
model = torch.load("XXXX.pth",map_location= torch.device("cpu"))
2 CNN 实现mnist手写数字识别
MNIST手写数字数据集来源于是美国国家标准与技术研究所,是著名的公开数据集之一。数据集中的数字图片是由250个不同职业的人纯手写绘制
MNIST手写数字数据集中包含了70000张图片,其中60000张为训练数据,10000为测试数据
2.1 网络模型搭建
CNN模型搭建(借用参考文章网络结构图)
model.py
import torch.nn as nn
from torch.nn import Conv2d, MaxPool2d,Flatten, Linear,Sequential
class model(nn.Module):
def __init__(self):
super(model, self).__init__()
self.m = Sequential(
Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=0),
MaxPool2d(2),
Conv2d(32, 64, kernel_size=3, stride=1, padding=0),
MaxPool2d(2),
Flatten(),
Linear(64*5*5, 64),
Linear(64, 10)
)
def forward(self, input):
return self.m(input)
# 在该模块中测试model
if __name__ == '__main__':
m = model()
print(m)
train.py 进行网络模型的训练
import torch
from torch.utils.data import DataLoader
import torchvision
from model import *
from torch.nn import CrossEntropyLoss
from torch.utils.tensorboard import SummaryWriter
transforms = torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.1307,), (0.3081,)) #归一化
])
# 获取训练集
train_datas = torchvision.datasets.MNIST(root="dataset", train=True,transform=transforms ,download=True)
# 获取测试集
test_datas = torchvision.datasets.MNIST(root="dataset", train=False, transform=transforms, download=True)
'''
img, target = train_datas[0]
print(img.shape) # torch.Size([1, 28, 28])
'''
# 数据长度
train_datas_size = len(train_datas)
test_datas_size = len(test_datas)
print("训练集数据长度为:{}".format(train_datas_size)) # 60000
print("测试集数据长度:{}".format(test_datas_size)) # 10000
# 使用DataLoader加载数据
train_DataLoader = DataLoader(train_datas, batch_size=64)
test_DataLoader = DataLoader(test_datas, batch_size=64)
# 创建网络模型
m = model()
if torch.cuda.is_available():
m = m.cuda()
# 损失函数
loss_fn = CrossEntropyLoss()
if torch.cuda.is_available():
loss_fn = loss_fn.cuda()
# 优化器
# 学习率:learing_rate = 0.0001
learning_rate = 1e-3
optimizer = torch.optim.SGD(m.parameters(), lr=learning_rate)
# 设置训练网络的一些参数
# 记录训练的次数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 训练的轮数
epoch = 60
# 添加tensorboard
writer = SummaryWriter("logs_train")
# 训练
for i in range(epoch):
print("-------第{}轮训练开始------".format(i+1))
# 训练步骤开始
m.train()
for data in train_DataLoader:
img, target = data
if torch.cuda.is_available():
img =img.cuda()
target = target.cuda()
output = m(img)
loss = loss_fn(output, target)
# 优化器优化模型
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_train_step = total_train_step+1
#没训练一百次z, 记录一下
if total_train_step % 100 == 0:
print("训练次数:{},Loss:{}".format(total_train_step, loss))
writer.add_scalar("train_loss", loss.item(), global_step=total_train_step)
#测试步骤开始
m.eval()
total_test_loss = 0
# 整体正确的个数
total_accuracy = 0
with torch.no_grad():
for data in test_DataLoader:
img, target = data
if torch.cuda.is_available():
img = img.cuda()
target = target.cuda()
output = m(img)
loss = loss_fn(output, target)
total_test_loss = total_test_loss + loss
accuracy = (output.argmax(1) == target).sum()
total_accuracy = total_accuracy + accuracy
print("整体测试集上的Loss:{}".format(total_test_loss))
print("整体测试集上的准确率:{}".format(total_accuracy/test_datas_size))
writer.add_scalar("test_loss", total_test_loss, total_test_step)
writer.add_scalar("test_accuracy", total_accuracy/test_datas_size, total_test_step)
total_test_step = total_test_step + 1
torch.save(m, "model_{}.pth".format(i))
print("模型已保存")
writer.close()
2.2 测试训练好的模型
import torch
from PIL import Image
import torchvision
from model import *
imgPath = "images/test.png"
image = Image.open(imgPath)
print(image)
print(image.mode) # RGBA 四通道
transform = torchvision.transforms.ToTensor()
image =image.convert('RGB')
print(image.mode) # RGB 三通道
# 通道转换
def change_image_channels(image):
# 3通道转单通道
if image.mode == 'RGB':
r, g, b = image.split()
return r, g, b
image1, image2, image3 = change_image_channels(image)
print(image1) # <PIL.Image.Image image mode=L size=28x28 at 0x1EC38C14DF0>
print(image2) # <PIL.Image.Image image mode=L size=28x28 at 0x1EC38B76B80>
print(image3) # <PIL.Image.Image image mode=L size=28x28 at 0x1EC478718E0>
# image1.show()
image1 = transform(image1)
print(image1.shape)
image1 = torch.reshape(image1,[-1, 1, 28, 28])
print(image1.shape)
model = torch.load("model_59.pth", map_location=torch.device('cpu'))
with torch.no_grad():
output = model(image1)
print(output)
print(output.argmax().item())
总结
通过学习模型的基本训练套路和验证套路,对代码的使用有了基本的认识,但还是要多加训练和使用。