文章目录

前言
一、卷积神经网络CNN
二、Tensorflow1.x
- 1.加载数据集
- 2.数据处理
- 3.定义模型
- 4.训练模型
- 5.结果可视化
二、Tensorflow2.x
- 1.加载数据集
- 2.数据处理
- 3.定义模型
- 4.训练模型
- 5.结果可视化
总结

前言

记录在tf1.x与tf2.x中使用卷积神经网络完成CIFAR-10数据集识别多分类任务，并进行断点续训。

一、卷积神经网络CNN

1、全连接网络：参数增多，速度减慢，过拟合
2、卷积神经网络：每层由多个二维平面组成，每个平面由多个独立神经元组成
输入层→（卷积层+→池化层？）+→全连接层+
（1）卷积层：增强特征，降低噪音

#卷积
y = tf.nn.conv2d(x, w, strides, padding) + b
'''
x:输入4维张量[batch,height,weight,channel]
w:[height,weight,input_channel,output_channel]
b:output_channel
strides：每一维步长
'''
#卷积层
tf.keras.layers.Conv2D(filters,#卷积核数目
					   kernel_size,#卷积核大小
          			   activation,#激活函数
          			   padding#填充方式))

$图像\underset{权值矩阵}{\overset{卷积核}{\longrightarrow}}特征图$
局部连接，权值共享
步长：卷积核移动格数，得到不同尺寸输出（降采样）
$[N1,N1]\underset{S}{\overset{[N1,N1]}{\longrightarrow}}[(N1-N2)/S+1,(N1-N2)/S+1]$
0填充：用0填充边缘，使输入输出大小相同
多通道卷积：使用多个卷积核提取特征
（2）降采样层：减少参数，降低过拟合

#池化
y1 = tf.nn.max_pool(y, ksize, strides, padding)
'''
y:输入4维张量[batch,height,weight,channel]
ksize:池化窗口[1,height,weight,1]
'''
#池化层
tf.keras.layers.MaxPooling2D(pool_size)

池化：计算区域均值（背景特征）或最大值（纹理特征），合并特征

二、Tensorflow1.x

1.加载数据集

在tensorflow2.x中调用CIFAR-10数据集
10分类32x32RGB彩色图片，训练集50000张，测试集10000张
x_train：(50000, 32, 32, 3)
y_train：(10000, 1)
x_test：(10000, 32, 32, 3)
y_test：(10000, 1)

import tensorflow as tf2
import matplotlib.pyplot as plt
import numpy as np
cifar10 = tf2.keras.datasets.cifar10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
#显示16张图片
def show(images, labels, preds):
    label_dict = {0:'plane', 1:'car', 2:'bird', 3:'cat',\
                  4:'deer', 5:'dog', 6:'frog', 7:'horse',\
                  8:'ship', 9:'trunk'}
    fig1 = plt.figure(1, figsize=(12, 12))
    for i in range(16):
        ax = fig1.add_subplot(4, 4, i+1)
        ax.imshow(images[i], cmap='binary')
        label = label_dict[np.argmax(labels[i])]
        pred = label_dict[np.argmax(preds[i])]
        title = 'label:%s,pred:%s' % (label, pred)
        ax.set_title(title)
        ax.set_xticks([])
        ax.set_yticks([])

2.数据处理

import tensorflow.compat.v1 as tf
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils import shuffle
from time import time
import os
tf.disable_eager_execution()

#维度转换，灰度值归一化，标签独热编码
x_train = (x_train/255.0).astype(np.float32)
x_test = (x_test/255.0).astype(np.float32)
y_train = OneHotEncoder().fit_transform(y_train).toarray()
y_test = OneHotEncoder().fit_transform(y_test).toarray()
#训练集50000个样本,取5000个样本作为验证集;测试集10000个样本
x_valid, y_valid = x_train[45000:], y_train[45000:]
x_train, y_train = x_train[:45000], y_train[:45000]

3.定义模型

#输入层
#输入通道3，图像大小32*32
with tf.name_scope('Input'):
    x = tf.placeholder(tf.float32, [None, 32, 32, 3], name='X')
#卷积层1
#输出通道32，卷积核3*3，步长为1，图像大小32*32
with tf.name_scope('Conv_1'):
    w1 = tf.Variable(\
         tf.truncated_normal((3,3,3,32), stddev=0.1), name='W1')
    b1 = tf.Variable(tf.zeros(32), name='B1')
    y1 = tf.nn.conv2d(x, w1, strides=[1,1,1,1], padding='SAME') + b1
    y1 = tf.nn.relu(y1)
#池化层1
#输出通道32，最大值池化2*2，步长为2，图像大小16*16
with tf.name_scope('Pool_1'):
    y2 = tf.nn.max_pool(y1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
#卷积层2
#输出通道64，卷积核3*3，步长为1，图像大小32*32
with tf.name_scope('Conv_1'):
    w3 = tf.Variable(\
         tf.truncated_normal((3,3,32,64), stddev=0.1), name='W3')
    b3 = tf.Variable(tf.zeros(64), name='B3')
    y3 = tf.nn.conv2d(y2, w3, strides=[1,1,1,1], padding='SAME') + b3
    y3 = tf.nn.relu(y3)
#池化层2
#输出通道64，最大值池化2*2，步长为2，图像大小8*8
with tf.name_scope('Pool_1'):
    y4 = tf.nn.max_pool(y3, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
#全连接层
#输入8*8*64，128个神经元
with tf.name_scope('Output'):
    w5 = tf.Variable(\
         tf.truncated_normal((8*8*64, 128), stddev=0.1), name='W5')
    b5 = tf.Variable(tf.zeros((128)), name='B5')
    y5 = tf.reshape(y4, (-1, 8*8*64))
    y5 = tf.matmul(y5, w5) + b5
    y5 = tf.nn.relu(y5)
    y5 = tf.nn.dropout(y5, keep_prob=0.8)
#输出层
#输出10个神经元
with tf.name_scope('Output'):
    w6 = tf.Variable(\
         tf.truncated_normal((128, 10), stddev=0.1), name='W6')
    b6 = tf.Variable(tf.zeros((10)), name='B6')
    pred = tf.matmul(y5, w6) + b6
    #pred = tf.nn.softmax(y6)

#优化器
with tf.name_scope('Optimizer'):
    y = tf.placeholder(tf.float32, [None, 10], name='Y')
    loss_function = tf.reduce_mean(\
                    tf.nn.softmax_cross_entropy_with_logits(\
                    logits=pred, labels=y))
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)\
                .minimize(loss_function)
#准确率
equal = tf.equal(tf.argmax(y, axis=1), tf.argmax(pred, axis=1))
accuracy = tf.reduce_mean(tf.cast(equal, tf.float32))

4.训练模型

训练参数

train_epoch = 10
batch_size = 1000
batch_num = x_train.shape[0] // batch_size
#损失函数与准确率
step = 0
display_step = 10
loss_list = []
acc_list = []
epoch = tf.Variable(0, name='epoch', trainable=False)
#变量初始化
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

断点续训

ckpt_dir = './ckpt_dir/cifar10'
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)
vl = [v for v in tf.global_variables() if 'Adam' not in v.name]
saver = tf.train.Saver(var_list=vl, max_to_keep=1)
ckpt = tf.train.latest_checkpoint(ckpt_dir)
if ckpt != None:
    saver.restore(sess, ckpt)
start_ep = sess.run(epoch)

迭代训练

start_time = time()
for ep in range(start_ep, train_epoch):
    #打乱顺序
    x_train, y_train = shuffle(x_train, y_train)
    print('epoch:{}/{}'.format(ep+1, train_epoch))
    for batch in range(batch_num):
        xi = x_train[batch*batch_size:(batch+1)*batch_size]
        yi = y_train[batch*batch_size:(batch+1)*batch_size]
        sess.run(optimizer, feed_dict={x:xi, y:yi})
        step = step + 1
        if step % display_step == 0:
            loss, acc = sess.run([loss_function, accuracy],\
                                 feed_dict={x:x_valid, y:y_valid})
            loss_list.append(loss)
            acc_list.append(acc)
    #保存检查点
    sess.run(epoch.assign(ep+1))
    saver.save(sess, os.path.join(ckpt_dir,\
              'cifar10_model.ckpt'), global_step=ep+1)

5.结果可视化

end_time = time()
y_pred, acc = sess.run([pred, accuracy],\
                        feed_dict={x:x_test, y:y_test})
fig2 = plt.figure(2, figsize=(12, 6))
ax = fig2.add_subplot(1, 2, 1)
ax.plot(loss_list, 'r-')
ax.set_title('loss')
ax = fig2.add_subplot(1, 2, 2)
ax.plot(acc_list, 'b-')
ax.set_title('acc')
print('用时%.1fs' % (end_time - start_time))
print('Accuracy:{:.2%}'.format(acc))
show(x_test[0:16], y_test[0:16], y_pred[0:16])

在第8次训练后中断，再次运行，从第9次开始训练

验证集上的损失与准确率

前16张图片的标签与预测值

二、Tensorflow2.x

1.加载数据集

标签值采用整数，不进行独热编码

import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from time import time
cifar10 = tf.keras.datasets.cifar10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
#显示16张图片
def show(images, labels, preds):
    label_dict = {0:'plane', 1:'car', 2:'bird', 3:'cat',\
                  4:'deer', 5:'dog', 6:'frog', 7:'horse',\
                  8:'ship', 9:'trunk'}
    fig1 = plt.figure(1, figsize=(12, 12))
    for i in range(16):
        ax = fig1.add_subplot(4, 4, i+1)
        ax.imshow(images[i], cmap='binary')
        label = label_dict[labels[i]]
        pred = label_dict[preds[i]]
        title = 'label:%s,pred:%s' % (label, pred)
        ax.set_title(title)
        ax.set_xticks([])
        ax.set_yticks([])

2.数据处理

不进行验证集的划分

#维度转换，灰度值归一化，标签独热编码
x_train = (x_train/255.0).astype(np.float32)
x_test = (x_test/255.0).astype(np.float32)

3.定义模型

创建模型

model = tf.keras.models.Sequential()
#添加层
model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=(3,3),\
          input_shape=(32,32,3), activation='relu', padding='same'))
model.add(tf.keras.layers.Dropout(rate=0.3))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3),\
          activation='relu', padding='same'))
model.add(tf.keras.layers.Dropout(rate=0.3))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(units=10,\
          kernel_initializer='normal', activation='softmax'))
#模型摘要
model.summary()

加载模型

'''
#加载模型(整个模型)
mpath = './model/cifar10.h5'
model.load_weights(mpath)
'''
#加载模型(检查点)
cdir = './model/'
ckpt = tf.train.latest_checkpoint(cdir)
if ckpt != None:
    model.load_weights(ckpt)

训练模式

#整数类型作标签
model.compile(optimizer='adam',\
              loss='sparse_categorical_crossentropy',\
              metrics=['accuracy'])

4.训练模型

#回调参数设置检查点与早停
cpath = './model/cifar10.{epoch:02d}-{val_loss:.4f}.H5'
callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath=cpath,
                                                save_weights_only=True,
                                                verbose=1,
                                                save_freq='epoch'),
             tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                              patience=3)]
#模型训练
start_time = time()
history = model.fit(x_train, y_train,\
                    validation_split=0.2, epochs=10, batch_size=1000,\
                    callbacks=callbacks, verbose=1)
end_time = time()
print('用时%.1fs' % (end_time-start_time))

5.结果可视化

history.history：字典类型数据
loss，accuracy，val_loss，val_accuracy

fig2 = plt.figure(2, figsize=(12, 6))
ax = fig2.add_subplot(1, 2, 1)
ax.plot(history.history['val_loss'], 'r-')
ax.set_title('loss')
ax = fig2.add_subplot(1, 2, 2)
ax.plot(history.history['val_accuracy'], 'b-')
ax.set_title('acc')

模型评估

test_loss, test_acc = model.evaluate(x_train, y_train, verbose=1)
print('Loss:%.2f' % test_loss)
print('Accuracy:{:.2%}'.format(test_acc))

模型预测

#分类预测
preds = model.predict_classes(x_test)
show(x_test[0:16], y_test[0:16].flatten(), preds[0:16])
#保存模型
#model.save_weights(mpath)

网络结构

训练后保存模型

验证集损失值与准确率

前16张图片的标签值与预测值

导入模型，准确率与之前相同

在第二批训练后停止程序

再次训练时准确率在之前的基础上提升
在这里插入图片描述

总结

卷积神经网络能够对数据进行特征提取，减少参数的数量，提高训练速度；
使用OneHotEncoder进行独热编码后需要使用toarray()转化为数组形式参与运算；
在训练时间较长时可定时保存进行断点续训；
在tf1.x中保存检查点时会保存多个同名的权重，可不保存名称中含有Adam的权重，保存前后需要重启控制台以避免加载时权重名称不对应。