本文为为🔗365天深度学习训练营内部文章
原作者:K同学啊
一 ResNet和ResNetV2对比
改进点:(a)original表示原始的ResNet的残差结构,(b)proposed表示新的ResNet的残差结构,主要差别就是(a)结构先卷积后进行BN和激活函数的计算,最后执行addition后再进行ReLU计算;(b)结构先进行BN和激活函数计算后卷积,把addtion后的ReLU计算放到了残差结构内部。
二 ResNet50V2架构图
官方调用代码
from keras.applications.resnet_v2 import ResNet50V2
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
base_model = ResNet50V2(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(2, activation='sigmoid')(x) # 二分类
model = Model(inputs=base_model.input, outputs=x)
model.summary()
Model: "model_2" __________________________________________________________________________________________________ Layer (type) Output Shape Param # Connected to ================================================================================================== input_3 (InputLayer) [(None, 224, 224, 3 0 [] )] conv1_pad (ZeroPadding2D) (None, 230, 230, 3) 0 ['input_3[0][0]'] conv1_conv (Conv2D) (None, 112, 112, 64 9472 ['conv1_pad[0][0]'] ) pool1_pad (ZeroPadding2D) (None, 114, 114, 64 0 ['conv1_conv[0][0]'] ) pool1_pool (MaxPooling2D) (None, 56, 56, 64) 0 ['pool1_pad[0][0]'] conv2_block1_preact_bn (BatchN (None, 56, 56, 64) 256 ['pool1_pool[0][0]'] ormalization) conv2_block1_preact_relu (Acti (None, 56, 56, 64) 0 ['conv2_block1_preact_bn[0][0]'] vation) conv2_block1_1_conv (Conv2D) (None, 56, 56, 64) 4096 ['conv2_block1_preact_relu[0][0]' ] conv2_block1_1_bn (BatchNormal (None, 56, 56, 64) 256 ['conv2_block1_1_conv[0][0]'] ization) conv2_block1_1_relu (Activatio (None, 56, 56, 64) 0 ['conv2_block1_1_bn[0][0]'] n) conv2_block1_2_pad (ZeroPaddin (None, 58, 58, 64) 0 ['conv2_block1_1_relu[0][0]'] g2D) conv2_block1_2_conv (Conv2D) (None, 56, 56, 64) 36864 ['conv2_block1_2_pad[0][0]'] conv2_block1_2_bn (BatchNormal (None, 56, 56, 64) 256 ['conv2_block1_2_conv[0][0]'] ization) conv2_block1_2_relu (Activatio (None, 56, 56, 64) 0 ['conv2_block1_2_bn[0][0]'] n) conv2_block1_0_conv (Conv2D) (None, 56, 56, 256) 16640 ['conv2_block1_preact_relu[0][0]' ] conv2_block1_3_conv (Conv2D) (None, 56, 56, 256) 16640 ['conv2_block1_2_relu[0][0]'] conv2_block1_out (Add) (None, 56, 56, 256) 0 ['conv2_block1_0_conv[0][0]', 'conv2_block1_3_conv[0][0]'] conv2_block2_preact_bn (BatchN (None, 56, 56, 256) 1024 ['conv2_block1_out[0][0]'] ormalization) conv2_block2_preact_relu (Acti (None, 56, 56, 256) 0 ['conv2_block2_preact_bn[0][0]'] vation) conv2_block2_1_conv (Conv2D) (None, 56, 56, 64) 16384 ['conv2_block2_preact_relu[0][0]' ] conv2_block2_1_bn (BatchNormal (None, 56, 56, 64) 256 ['conv2_block2_1_conv[0][0]'] ization) conv2_block2_1_relu (Activatio (None, 56, 56, 64) 0 ['conv2_block2_1_bn[0][0]'] n) conv2_block2_2_pad (ZeroPaddin (None, 58, 58, 64) 0 ['conv2_block2_1_relu[0][0]'] g2D) conv2_block2_2_conv (Conv2D) (None, 56, 56, 64) 36864 ['conv2_block2_2_pad[0][0]'] conv2_block2_2_bn (BatchNormal (None, 56, 56, 64) 256 ['conv2_block2_2_conv[0][0]'] ization) conv2_block2_2_relu (Activatio (None, 56, 56, 64) 0 ['conv2_block2_2_bn[0][0]'] n) conv2_block2_3_conv (Conv2D) (None, 56, 56, 256) 16640 ['conv2_block2_2_relu[0][0]'] conv2_block2_out (Add) (None, 56, 56, 256) 0 ['conv2_block1_out[0][0]', 'conv2_block2_3_conv[0][0]'] conv2_block3_preact_bn (BatchN (None, 56, 56, 256) 1024 ['conv2_block2_out[0][0]'] ormalization) conv2_block3_preact_relu (Acti (None, 56, 56, 256) 0 ['conv2_block3_preact_bn[0][0]'] vation) conv2_block3_1_conv (Conv2D) (None, 56, 56, 64) 16384 ['conv2_block3_preact_relu[0][0]' ] conv2_block3_1_bn (BatchNormal (None, 56, 56, 64) 256 ['conv2_block3_1_conv[0][0]'] ization) conv2_block3_1_relu (Activatio (None, 56, 56, 64) 0 ['conv2_block3_1_bn[0][0]'] n) conv2_block3_2_pad (ZeroPaddin (None, 58, 58, 64) 0 ['conv2_block3_1_relu[0][0]'] g2D) conv2_block3_2_conv (Conv2D) (None, 28, 28, 64) 36864 ['conv2_block3_2_pad[0][0]'] conv2_block3_2_bn (BatchNormal (None, 28, 28, 64) 256 ['conv2_block3_2_conv[0][0]'] ization) conv2_block3_2_relu (Activatio (None, 28, 28, 64) 0 ['conv2_block3_2_bn[0][0]'] n) max_pooling2d_6 (MaxPooling2D) (None, 28, 28, 256) 0 ['conv2_block2_out[0][0]'] conv2_block3_3_conv (Conv2D) (None, 28, 28, 256) 16640 ['conv2_block3_2_relu[0][0]'] conv2_block3_out (Add) (None, 28, 28, 256) 0 ['max_pooling2d_6[0][0]', 'conv2_block3_3_conv[0][0]'] conv3_block1_preact_bn (BatchN (None, 28, 28, 256) 1024 ['conv2_block3_out[0][0]'] ormalization) conv3_block1_preact_relu (Acti (None, 28, 28, 256) 0 ['conv3_block1_preact_bn[0][0]'] vation) conv3_block1_1_conv (Conv2D) (None, 28, 28, 128) 32768 ['conv3_block1_preact_relu[0][0]' ] conv3_block1_1_bn (BatchNormal (None, 28, 28, 128) 512 ['conv3_block1_1_conv[0][0]'] ization) conv3_block1_1_relu (Activatio (None, 28, 28, 128) 0 ['conv3_block1_1_bn[0][0]'] n) conv3_block1_2_pad (ZeroPaddin (None, 30, 30, 128) 0 ['conv3_block1_1_relu[0][0]'] g2D) conv3_block1_2_conv (Conv2D) (None, 28, 28, 128) 147456 ['conv3_block1_2_pad[0][0]'] conv3_block1_2_bn (BatchNormal (None, 28, 28, 128) 512 ['conv3_block1_2_conv[0][0]'] ization) conv3_block1_2_relu (Activatio (None, 28, 28, 128) 0 ['conv3_block1_2_bn[0][0]'] n) conv3_block1_0_conv (Conv2D) (None, 28, 28, 512) 131584 ['conv3_block1_preact_relu[0][0]' ] conv3_block1_3_conv (Conv2D) (None, 28, 28, 512) 66048 ['conv3_block1_2_relu[0][0]'] conv3_block1_out (Add) (None, 28, 28, 512) 0 ['conv3_block1_0_conv[0][0]', 'conv3_block1_3_conv[0][0]'] conv3_block2_preact_bn (BatchN (None, 28, 28, 512) 2048 ['conv3_block1_out[0][0]'] ormalization) conv3_block2_preact_relu (Acti (None, 28, 28, 512) 0 ['conv3_block2_preact_bn[0][0]'] vation) conv3_block2_1_conv (Conv2D) (None, 28, 28, 128) 65536 ['conv3_block2_preact_relu[0][0]' ] conv3_block2_1_bn (BatchNormal (None, 28, 28, 128) 512 ['conv3_block2_1_conv[0][0]'] ization) conv3_block2_1_relu (Activatio (None, 28, 28, 128) 0 ['conv3_block2_1_bn[0][0]'] n) conv3_block2_2_pad (ZeroPaddin (None, 30, 30, 128) 0 ['conv3_block2_1_relu[0][0]'] g2D) conv3_block2_2_conv (Conv2D) (None, 28, 28, 128) 147456 ['conv3_block2_2_pad[0][0]'] conv3_block2_2_bn (BatchNormal (None, 28, 28, 128) 512 ['conv3_block2_2_conv[0][0]'] ization) conv3_block2_2_relu (Activatio (None, 28, 28, 128) 0 ['conv3_block2_2_bn[0][0]'] n) conv3_block2_3_conv (Conv2D) (None, 28, 28, 512) 66048 ['conv3_block2_2_relu[0][0]'] conv3_block2_out (Add) (None, 28, 28, 512) 0 ['conv3_block1_out[0][0]', 'conv3_block2_3_conv[0][0]'] conv3_block3_preact_bn (BatchN (None, 28, 28, 512) 2048 ['conv3_block2_out[0][0]'] ormalization) conv3_block3_preact_relu (Acti (None, 28, 28, 512) 0 ['conv3_block3_preact_bn[0][0]'] vation) conv3_block3_1_conv (Conv2D) (None, 28, 28, 128) 65536 ['conv3_block3_preact_relu[0][0]' ] conv3_block3_1_bn (BatchNormal (None, 28, 28, 128) 512 ['conv3_block3_1_conv[0][0]'] ization) conv3_block3_1_relu (Activatio (None, 28, 28, 128) 0 ['conv3_block3_1_bn[0][0]'] n) conv3_block3_2_pad (ZeroPaddin (None, 30, 30, 128) 0 ['conv3_block3_1_relu[0][0]'] g2D) conv3_block3_2_conv (Conv2D) (None, 28, 28, 128) 147456 ['conv3_block3_2_pad[0][0]'] conv3_block3_2_bn (BatchNormal (None, 28, 28, 128) 512 ['conv3_block3_2_conv[0][0]'] ization) conv3_block3_2_relu (Activatio (None, 28, 28, 128) 0 ['conv3_block3_2_bn[0][0]'] n) conv3_block3_3_conv (Conv2D) (None, 28, 28, 512) 66048 ['conv3_block3_2_relu[0][0]'] conv3_block3_out (Add) (None, 28, 28, 512) 0 ['conv3_block2_out[0][0]', 'conv3_block3_3_conv[0][0]'] conv3_block4_preact_bn (BatchN (None, 28, 28, 512) 2048 ['conv3_block3_out[0][0]'] ormalization) conv3_block4_preact_relu (Acti (None, 28, 28, 512) 0 ['conv3_block4_preact_bn[0][0]'] vation) conv3_block4_1_conv (Conv2D) (None, 28, 28, 128) 65536 ['conv3_block4_preact_relu[0][0]' ] conv3_block4_1_bn (BatchNormal (None, 28, 28, 128) 512 ['conv3_block4_1_conv[0][0]'] ization) conv3_block4_1_relu (Activatio (None, 28, 28, 128) 0 ['conv3_block4_1_bn[0][0]'] n) conv3_block4_2_pad (ZeroPaddin (None, 30, 30, 128) 0 ['conv3_block4_1_relu[0][0]'] g2D) conv3_block4_2_conv (Conv2D) (None, 14, 14, 128) 147456 ['conv3_block4_2_pad[0][0]'] conv3_block4_2_bn (BatchNormal (None, 14, 14, 128) 512 ['conv3_block4_2_conv[0][0]'] ization) conv3_block4_2_relu (Activatio (None, 14, 14, 128) 0 ['conv3_block4_2_bn[0][0]'] n) max_pooling2d_7 (MaxPooling2D) (None, 14, 14, 512) 0 ['conv3_block3_out[0][0]'] conv3_block4_3_conv (Conv2D) (None, 14, 14, 512) 66048 ['conv3_block4_2_relu[0][0]'] conv3_block4_out (Add) (None, 14, 14, 512) 0 ['max_pooling2d_7[0][0]', 'conv3_block4_3_conv[0][0]'] conv4_block1_preact_bn (BatchN (None, 14, 14, 512) 2048 ['conv3_block4_out[0][0]'] ormalization) conv4_block1_preact_relu (Acti (None, 14, 14, 512) 0 ['conv4_block1_preact_bn[0][0]'] vation) conv4_block1_1_conv (Conv2D) (None, 14, 14, 256) 131072 ['conv4_block1_preact_relu[0][0]' ] conv4_block1_1_bn (BatchNormal (None, 14, 14, 256) 1024 ['conv4_block1_1_conv[0][0]'] ization) conv4_block1_1_relu (Activatio (None, 14, 14, 256) 0 ['conv4_block1_1_bn[0][0]'] n) conv4_block1_2_pad (ZeroPaddin (None, 16, 16, 256) 0 ['conv4_block1_1_relu[0][0]'] g2D) conv4_block1_2_conv (Conv2D) (None, 14, 14, 256) 589824 ['conv4_block1_2_pad[0][0]'] conv4_block1_2_bn (BatchNormal (None, 14, 14, 256) 1024 ['conv4_block1_2_conv[0][0]'] ization) conv4_block1_2_relu (Activatio (None, 14, 14, 256) 0 ['conv4_block1_2_bn[0][0]'] n) conv4_block1_0_conv (Conv2D) (None, 14, 14, 1024 525312 ['conv4_block1_preact_relu[0][0]' ) ] conv4_block1_3_conv (Conv2D) (None, 14, 14, 1024 263168 ['conv4_block1_2_relu[0][0]'] ) conv4_block1_out (Add) (None, 14, 14, 1024 0 ['conv4_block1_0_conv[0][0]', ) 'conv4_block1_3_conv[0][0]'] conv4_block2_preact_bn (BatchN (None, 14, 14, 1024 4096 ['conv4_block1_out[0][0]'] ormalization) ) conv4_block2_preact_relu (Acti (None, 14, 14, 1024 0 ['conv4_block2_preact_bn[0][0]'] vation) ) conv4_block2_1_conv (Conv2D) (None, 14, 14, 256) 262144 ['conv4_block2_preact_relu[0][0]' ] conv4_block2_1_bn (BatchNormal (None, 14, 14, 256) 1024 ['conv4_block2_1_conv[0][0]'] ization) conv4_block2_1_relu (Activatio (None, 14, 14, 256) 0 ['conv4_block2_1_bn[0][0]'] n) conv4_block2_2_pad (ZeroPaddin (None, 16, 16, 256) 0 ['conv4_block2_1_relu[0][0]'] g2D) conv4_block2_2_conv (Conv2D) (None, 14, 14, 256) 589824 ['conv4_block2_2_pad[0][0]'] conv4_block2_2_bn (BatchNormal (None, 14, 14, 256) 1024 ['conv4_block2_2_conv[0][0]'] ization) conv4_block2_2_relu (Activatio (None, 14, 14, 256) 0 ['conv4_block2_2_bn[0][0]'] n) conv4_block2_3_conv (Conv2D) (None, 14, 14, 1024 263168 ['conv4_block2_2_relu[0][0]'] ) conv4_block2_out (Add) (None, 14, 14, 1024 0 ['conv4_block1_out[0][0]', ) 'conv4_block2_3_conv[0][0]'] conv4_block3_preact_bn (BatchN (None, 14, 14, 1024 4096 ['conv4_block2_out[0][0]'] ormalization) ) conv4_block3_preact_relu (Acti (None, 14, 14, 1024 0 ['conv4_block3_preact_bn[0][0]'] vation) ) conv4_block3_1_conv (Conv2D) (None, 14, 14, 256) 262144 ['conv4_block3_preact_relu[0][0]' ] conv4_block3_1_bn (BatchNormal (None, 14, 14, 256) 1024 ['conv4_block3_1_conv[0][0]'] ization) conv4_block3_1_relu (Activatio (None, 14, 14, 256) 0 ['conv4_block3_1_bn[0][0]'] n) conv4_block3_2_pad (ZeroPaddin (None, 16, 16, 256) 0 ['conv4_block3_1_relu[0][0]'] g2D) conv4_block3_2_conv (Conv2D) (None, 14, 14, 256) 589824 ['conv4_block3_2_pad[0][0]'] conv4_block3_2_bn (BatchNormal (None, 14, 14, 256) 1024 ['conv4_block3_2_conv[0][0]'] ization) conv4_block3_2_relu (Activatio (None, 14, 14, 256) 0 ['conv4_block3_2_bn[0][0]'] n) conv4_block3_3_conv (Conv2D) (None, 14, 14, 1024 263168 ['conv4_block3_2_relu[0][0]'] ) conv4_block3_out (Add) (None, 14, 14, 1024 0 ['conv4_block2_out[0][0]', ) 'conv4_block3_3_conv[0][0]'] conv4_block4_preact_bn (BatchN (None, 14, 14, 1024 4096 ['conv4_block3_out[0][0]'] ormalization) ) conv4_block4_preact_relu (Acti (None, 14, 14, 1024 0 ['conv4_block4_preact_bn[0][0]'] vation) ) conv4_block4_1_conv (Conv2D) (None, 14, 14, 256) 262144 ['conv4_block4_preact_relu[0][0]' ] conv4_block4_1_bn (BatchNormal (None, 14, 14, 256) 1024 ['conv4_block4_1_conv[0][0]'] ization) conv4_block4_1_relu (Activatio (None, 14, 14, 256) 0 ['conv4_block4_1_bn[0][0]'] n) conv4_block4_2_pad (ZeroPaddin (None, 16, 16, 256) 0 ['conv4_block4_1_relu[0][0]'] g2D) conv4_block4_2_conv (Conv2D) (None, 14, 14, 256) 589824 ['conv4_block4_2_pad[0][0]'] conv4_block4_2_bn (BatchNormal (None, 14, 14, 256) 1024 ['conv4_block4_2_conv[0][0]'] ization) conv4_block4_2_relu (Activatio (None, 14, 14, 256) 0 ['conv4_block4_2_bn[0][0]'] n) conv4_block4_3_conv (Conv2D) (None, 14, 14, 1024 263168 ['conv4_block4_2_relu[0][0]'] ) conv4_block4_out (Add) (None, 14, 14, 1024 0 ['conv4_block3_out[0][0]', ) 'conv4_block4_3_conv[0][0]'] conv4_block5_preact_bn (BatchN (None, 14, 14, 1024 4096 ['conv4_block4_out[0][0]'] ormalization) ) conv4_block5_preact_relu (Acti (None, 14, 14, 1024 0 ['conv4_block5_preact_bn[0][0]'] vation) ) conv4_block5_1_conv (Conv2D) (None, 14, 14, 256) 262144 ['conv4_block5_preact_relu[0][0]' ] conv4_block5_1_bn (BatchNormal (None, 14, 14, 256) 1024 ['conv4_block5_1_conv[0][0]'] ization) conv4_block5_1_relu (Activatio (None, 14, 14, 256) 0 ['conv4_block5_1_bn[0][0]'] n) conv4_block5_2_pad (ZeroPaddin (None, 16, 16, 256) 0 ['conv4_block5_1_relu[0][0]'] g2D) conv4_block5_2_conv (Conv2D) (None, 14, 14, 256) 589824 ['conv4_block5_2_pad[0][0]'] conv4_block5_2_bn (BatchNormal (None, 14, 14, 256) 1024 ['conv4_block5_2_conv[0][0]'] ization) conv4_block5_2_relu (Activatio (None, 14, 14, 256) 0 ['conv4_block5_2_bn[0][0]'] n) conv4_block5_3_conv (Conv2D) (None, 14, 14, 1024 263168 ['conv4_block5_2_relu[0][0]'] ) conv4_block5_out (Add) (None, 14, 14, 1024 0 ['conv4_block4_out[0][0]', ) 'conv4_block5_3_conv[0][0]'] conv4_block6_preact_bn (BatchN (None, 14, 14, 1024 4096 ['conv4_block5_out[0][0]'] ormalization) ) conv4_block6_preact_relu (Acti (None, 14, 14, 1024 0 ['conv4_block6_preact_bn[0][0]'] vation) ) conv4_block6_1_conv (Conv2D) (None, 14, 14, 256) 262144 ['conv4_block6_preact_relu[0][0]' ] conv4_block6_1_bn (BatchNormal (None, 14, 14, 256) 1024 ['conv4_block6_1_conv[0][0]'] ization) conv4_block6_1_relu (Activatio (None, 14, 14, 256) 0 ['conv4_block6_1_bn[0][0]'] n) conv4_block6_2_pad (ZeroPaddin (None, 16, 16, 256) 0 ['conv4_block6_1_relu[0][0]'] g2D) conv4_block6_2_conv (Conv2D) (None, 7, 7, 256) 589824 ['conv4_block6_2_pad[0][0]'] conv4_block6_2_bn (BatchNormal (None, 7, 7, 256) 1024 ['conv4_block6_2_conv[0][0]'] ization) conv4_block6_2_relu (Activatio (None, 7, 7, 256) 0 ['conv4_block6_2_bn[0][0]'] n) max_pooling2d_8 (MaxPooling2D) (None, 7, 7, 1024) 0 ['conv4_block5_out[0][0]'] conv4_block6_3_conv (Conv2D) (None, 7, 7, 1024) 263168 ['conv4_block6_2_relu[0][0]'] conv4_block6_out (Add) (None, 7, 7, 1024) 0 ['max_pooling2d_8[0][0]', 'conv4_block6_3_conv[0][0]'] conv5_block1_preact_bn (BatchN (None, 7, 7, 1024) 4096 ['conv4_block6_out[0][0]'] ormalization) conv5_block1_preact_relu (Acti (None, 7, 7, 1024) 0 ['conv5_block1_preact_bn[0][0]'] vation) conv5_block1_1_conv (Conv2D) (None, 7, 7, 512) 524288 ['conv5_block1_preact_relu[0][0]' ] conv5_block1_1_bn (BatchNormal (None, 7, 7, 512) 2048 ['conv5_block1_1_conv[0][0]'] ization) conv5_block1_1_relu (Activatio (None, 7, 7, 512) 0 ['conv5_block1_1_bn[0][0]'] n) conv5_block1_2_pad (ZeroPaddin (None, 9, 9, 512) 0 ['conv5_block1_1_relu[0][0]'] g2D) conv5_block1_2_conv (Conv2D) (None, 7, 7, 512) 2359296 ['conv5_block1_2_pad[0][0]'] conv5_block1_2_bn (BatchNormal (None, 7, 7, 512) 2048 ['conv5_block1_2_conv[0][0]'] ization) conv5_block1_2_relu (Activatio (None, 7, 7, 512) 0 ['conv5_block1_2_bn[0][0]'] n) conv5_block1_0_conv (Conv2D) (None, 7, 7, 2048) 2099200 ['conv5_block1_preact_relu[0][0]' ] conv5_block1_3_conv (Conv2D) (None, 7, 7, 2048) 1050624 ['conv5_block1_2_relu[0][0]'] conv5_block1_out (Add) (None, 7, 7, 2048) 0 ['conv5_block1_0_conv[0][0]', 'conv5_block1_3_conv[0][0]'] conv5_block2_preact_bn (BatchN (None, 7, 7, 2048) 8192 ['conv5_block1_out[0][0]'] ormalization) conv5_block2_preact_relu (Acti (None, 7, 7, 2048) 0 ['conv5_block2_preact_bn[0][0]'] vation) conv5_block2_1_conv (Conv2D) (None, 7, 7, 512) 1048576 ['conv5_block2_preact_relu[0][0]' ] conv5_block2_1_bn (BatchNormal (None, 7, 7, 512) 2048 ['conv5_block2_1_conv[0][0]'] ization) conv5_block2_1_relu (Activatio (None, 7, 7, 512) 0 ['conv5_block2_1_bn[0][0]'] n) conv5_block2_2_pad (ZeroPaddin (None, 9, 9, 512) 0 ['conv5_block2_1_relu[0][0]'] g2D) conv5_block2_2_conv (Conv2D) (None, 7, 7, 512) 2359296 ['conv5_block2_2_pad[0][0]'] conv5_block2_2_bn (BatchNormal (None, 7, 7, 512) 2048 ['conv5_block2_2_conv[0][0]'] ization) conv5_block2_2_relu (Activatio (None, 7, 7, 512) 0 ['conv5_block2_2_bn[0][0]'] n) conv5_block2_3_conv (Conv2D) (None, 7, 7, 2048) 1050624 ['conv5_block2_2_relu[0][0]'] conv5_block2_out (Add) (None, 7, 7, 2048) 0 ['conv5_block1_out[0][0]', 'conv5_block2_3_conv[0][0]'] conv5_block3_preact_bn (BatchN (None, 7, 7, 2048) 8192 ['conv5_block2_out[0][0]'] ormalization) conv5_block3_preact_relu (Acti (None, 7, 7, 2048) 0 ['conv5_block3_preact_bn[0][0]'] vation) conv5_block3_1_conv (Conv2D) (None, 7, 7, 512) 1048576 ['conv5_block3_preact_relu[0][0]' ] conv5_block3_1_bn (BatchNormal (None, 7, 7, 512) 2048 ['conv5_block3_1_conv[0][0]'] ization) conv5_block3_1_relu (Activatio (None, 7, 7, 512) 0 ['conv5_block3_1_bn[0][0]'] n) conv5_block3_2_pad (ZeroPaddin (None, 9, 9, 512) 0 ['conv5_block3_1_relu[0][0]'] g2D) conv5_block3_2_conv (Conv2D) (None, 7, 7, 512) 2359296 ['conv5_block3_2_pad[0][0]'] conv5_block3_2_bn (BatchNormal (None, 7, 7, 512) 2048 ['conv5_block3_2_conv[0][0]'] ization) conv5_block3_2_relu (Activatio (None, 7, 7, 512) 0 ['conv5_block3_2_bn[0][0]'] n) conv5_block3_3_conv (Conv2D) (None, 7, 7, 2048) 1050624 ['conv5_block3_2_relu[0][0]'] conv5_block3_out (Add) (None, 7, 7, 2048) 0 ['conv5_block2_out[0][0]', 'conv5_block3_3_conv[0][0]'] post_bn (BatchNormalization) (None, 7, 7, 2048) 8192 ['conv5_block3_out[0][0]'] post_relu (Activation) (None, 7, 7, 2048) 0 ['post_bn[0][0]'] global_average_pooling2d_2 (Gl (None, 2048) 0 ['post_relu[0][0]'] obalAveragePooling2D) dense_2 (Dense) (None, 2) 4098 ['global_average_pooling2d_2[0][0 ]'] ================================================================================================== Total params: 23,568,898 Trainable params: 23,523,458 Non-trainable params: 45,440 __________________________________________________________________________________________________
进行模型效果对比:
from keras.callbacks import EarlyStopping
# 设置早停法
early_stopping = EarlyStopping(
monitor='val_loss',
patience=3,
verbose=1,
restore_best_weights=True
)
epochs = 10
history = model.fit(
train_ds,
validation_data=val_ds,
epochs=epochs,
callbacks=[early_stopping]
)
Epoch 1/10 57/57 [==============================] - 646s 11s/step - loss: 0.4862 - accuracy: 0.7794 - val_loss: 22.1072 - val_accuracy: 0.4913 Epoch 2/10 57/57 [==============================] - 659s 12s/step - loss: 0.3044 - accuracy: 0.8715 - val_loss: 9.3026 - val_accuracy: 0.4913 Epoch 3/10 57/57 [==============================] - 716s 13s/step - loss: 0.2008 - accuracy: 0.9175 - val_loss: 11.3780 - val_accuracy: 0.5157 Epoch 4/10 57/57 [==============================] - 676s 12s/step - loss: 0.1516 - accuracy: 0.9431 - val_loss: 3.5149 - val_accuracy: 0.5973 Epoch 5/10 57/57 [==============================] - 676s 12s/step - loss: 0.1438 - accuracy: 0.9436 - val_loss: 2.1436 - val_accuracy: 0.6705 Epoch 6/10 57/57 [==============================] - 714s 13s/step - loss: 0.1471 - accuracy: 0.9444 - val_loss: 1.8244 - val_accuracy: 0.5768 Epoch 7/10 57/57 [==============================] - 660s 12s/step - loss: 0.1071 - accuracy: 0.9552 - val_loss: 1.1629 - val_accuracy: 0.7604 Epoch 8/10 57/57 [==============================] - 615s 11s/step - loss: 0.0637 - accuracy: 0.9774 - val_loss: 2.8120 - val_accuracy: 0.6249 Epoch 9/10 57/57 [==============================] - 613s 11s/step - loss: 0.0715 - accuracy: 0.9739 - val_loss: 4.8831 - val_accuracy: 0.5523 Epoch 10/10 57/57 [==============================] - 613s 11s/step - loss: 0.0651 - accuracy: 0.9766 - val_loss: 0.5954 - val_accuracy: 0.8619
# 获取实际训练轮数
actual_epochs = len(history.history['accuracy'])
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(actual_epochs)
plt.figure(figsize=(12, 4))
# 绘制准确率
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
# 绘制损失
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()