基于卷积神经网络实现手写数字识别
基于卷积神经网络实现手写数字识别。具体过程如下:
(1) 定义ConvNet结构类及其前向传播方式
(2) 设置超参数以及导入相关的包。
(3) 定义训练网络函数和绘图函数,并在main函数中完成调用过程
程序
import os
import numpy as np
#from sklearn.datasets import fetch_openml # 引入openml数据源
from matplotlib import pyplot as plt # 引入绘图工具
import torch
from torchvision.datasets import mnist
#from mnist_models import AlexNet, ConvNet
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.autograd import Variable
BASE_PATH = os.path.dirname(__file__)
# 设置模型超参数
EPOCHS = 50
SAVE_PATH = './models'
'''
# 载入MNIST数据集并显示部分样本
def load_mnist():
# 从openml源载入MNIST数据集
mnist = fetch_openml('mnist_784', version=1, data_home=os.path.join(BASE_PATH, './dataset'))
X, y = mnist['data'], mnist['target']
#X = mnist['data']#.astype(np.float32)
#y = mnist['target']#.astype(np.int32)
print('MNIST数据集大小:{}'.format(X.shape))
# 显示其中25张样本图片
for i in range(25):
#print(i)
digit = X.iloc[i * 2500]
# 将图片恢复到28*28大小
digit_image = digit.values.reshape(28, 28)
# 绘制图片
plt.subplot(5, 5, i + 1)
# 隐藏坐标轴
plt.axis('off')
# 按灰度图绘制图片
plt.imshow(digit_image, cmap='gray')
# 显示图片
plt.show()
return X, y
'''
# 定义卷积网络结构
class ConvNet(torch.nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.conv1 = torch.nn.Sequential(
torch.nn.Conv2d(1, 10, 5, 1, 1),
torch.nn.MaxPool2d(2),
torch.nn.ReLU(),
torch.nn.BatchNorm2d(10)
)
self.conv2 = torch.nn.Sequential(
torch.nn.Conv2d(10, 20, 5, 1, 1),
torch.nn.MaxPool2d(2),
torch.nn.ReLU(),
torch.nn.BatchNorm2d(20)
)
self.fc1 = torch.nn.Sequential(
torch.nn.Linear(500, 60),
torch.nn.Dropout(0.5),
torch.nn.ReLU()
)
self.fc2 = torch.nn.Sequential(
torch.nn.Linear(60, 20),
torch.nn.Dropout(0.5),
torch.nn.ReLU()
)
self.fc3 = torch.nn.Linear(20, 10)
# 定义网络前向传播方式
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(-1, 500)
x = self.fc1(x)
x = self.fc2(x)
x = self.fc3(x)
return x
# 定义AlexNet结构
class AlexNet(torch.nn.Module):
def __init__(self, num_classes=10):
super(AlexNet, self).__init__()
self.features = torch.nn.Sequential(
torch.nn.Conv2d(1, 64, kernel_size=5, stride=1, padding=2),
torch.nn.ReLU(inplace=True),
torch.nn.MaxPool2d(kernel_size=3, stride=1),
torch.nn.Conv2d(64, 192, kernel_size=3, padding=2),
torch.nn.ReLU(inplace=True),
torch.nn.MaxPool2d(kernel_size=3, stride=2),
torch.nn.Conv2d(192, 384, kernel_size=3, padding=1),
torch.nn.ReLU(inplace=True),
torch.nn.Conv2d(384, 256, kernel_size=3, padding=1),
torch.nn.ReLU(inplace=True),
torch.nn.Conv2d(256, 256, kernel_size=3, padding=1),
torch.nn.ReLU(inplace=True),
torch.nn.MaxPool2d(kernel_size=3, stride=2)
)
self.classifier = torch.nn.Sequential(
torch.nn.Dropout(),
torch.nn.Linear(256 * 6 * 6, 4096),
torch.nn.ReLU(inplace=True),
torch.nn.Dropout(),
torch.nn.Linear(4096, 4096),
torch.nn.ReLU(inplace=True),
torch.nn.Linear(4096, num_classes)
)
# 定义AlexNet前向传播过程
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), 256 * 6 * 6)
x = self.classifier(x)
return x
# 训练网络函数
def train_net(net, train_data, test_data):
losses = []
acces = []
# 测试集上Loss变化情况
eval_losses = []
eval_acces = []
# 损失函数设置为交叉熵函数
criterion = torch.nn.CrossEntropyLoss()
# 优化方法选用SGD,初始学习率为1e-2
optimizer = torch.optim.SGD(net.parameters(), 1e-2)
for e in range(EPOCHS):
train_loss = 0
train_acc = 0
# 将网络设置为训练模型
net.train()
for image, label in train_data:
image = Variable(image)
label = Variable(label)
# 前向传播
out = net(image)
loss = criterion(out, label)
# 反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 记录误差
train_loss += loss.data
# 计算分类的准确率
_, pred = out.max(1)
num_correct = (np.array(pred, dtype=np.int32) == np.array(label, dtype=np.int32)).sum()
acc = num_correct / image.shape[0]
train_acc += acc
train_loss_rate = train_loss / len(train_data)
train_acc_rate = train_acc / len(train_data)
losses.append(train_loss_rate)
acces.append(train_acc_rate)
# 在测试集上检验效果
eval_loss = 0
eval_acc = 0
net.eval() # 将模型改为预测模式
for image, label in test_data:
image = Variable(image)
label = Variable(label)
out = net(image)
loss = criterion(out, label)
# 记录误差
eval_loss += loss.data
# 记录准确率
_, pred = out.max(1)
num_correct = (np.array(pred, dtype=np.int32) == np.array(label, dtype=np.int32)).sum()
acc = num_correct / image.shape[0]
eval_acc += acc
eval_loss_rate = eval_loss / len(test_data)
eval_acc_rate = eval_acc / len(test_data)
eval_losses.append(eval_loss_rate)
eval_acces.append(eval_acc_rate)
print('epoch:{}, Train Loss: {:.6f}, Train Acc:{:.6f}, Eval Loss:{:.6f}, Eval Acc:{:.6f}'.format(e, train_loss_rate, train_acc_rate, eval_loss_rate, eval_acc_rate))
torch.save(net.state_dict(), os.path.join(BASE_PATH, SAVE_PATH, 'Alex_model_epoch' + str(e) + '.pkl'))
return eval_losses, eval_acces
def draw_result(eval_losses, eval_acces):
x = range(1, EPOCHS + 1)
fig, left_axis = plt.subplots()
p1, = left_axis.plot(x, eval_losses, 'ro-')
right_axis = left_axis.twinx()
p2, = right_axis.plot(x, eval_acces, 'bo-')
plt.xticks(x, rotation=0)
# 设置左坐标轴以及右坐标轴的范围、精度
left_axis.set_ylim(0, 0.5)
left_axis.set_yticks(np.arange(0, 0.5, 0.1))
right_axis.set_ylim(0.9, 1.01)
right_axis.set_yticks(np.arange(0.9, 1.01, 0.02))
# 设置坐标及标题的大小、颜色
left_axis.set_xlabel('Labels')
left_axis.set_ylabel('Loss', color='r')
left_axis.tick_params(axis='y', colors='r')
right_axis.set_ylabel('Accuracy', color='b')
right_axis.tick_params(axis='y', colors='b')
plt.show()
if __name__ == '__main__':
#x, y = load_mnist()
print("基于卷积神经网络实现手写数字识别")
train_set = mnist.MNIST('./data', train=True, download=True, transform=transforms.ToTensor())//需要转化成tensor数据格式
test_set = mnist.MNIST('./data', train=False, download=True, transform=transforms.ToTensor())
train_data = DataLoader(train_set, batch_size=64, shuffle=True)
test_data = DataLoader(test_set, batch_size=64, shuffle=False)
a, a_label = next(iter(train_data))
#net = AlexNet()
net = ConvNet()
eval_losses, eval_acces = train_net(net, train_data, test_data)
draw_result(eval_losses, eval_acces)
结果: