Inception-Resnet-v1、Inception-Resnet-v2学习笔记

Inception-Resnet-v1、Inception-Resnet-v2来自2016年谷歌发表的这篇论文：Inception-v4 Inception-ResNet and the Impact of Residual Connections on Learning，附论文链接：

[1602.07261] Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning (arxiv.org)

https://arxiv.org/pdf/1602.07261.pdf （下载）

这篇文章提出了Inception-V4、Inception-ResNet-V1、Inception-ResNet-V2三个模型。 Inception-V4在Inception-V3的基础上进一步改进了Inception模块，提升了模型性能和计算效率，但没有使用残差模块， Inception-ResNet将Inception模块和深度残差网络ResNet结合，提出了三种包含残差连接的Inception模块，残差连接显著加快了训练收敛速度。经过模型集成和图像多尺度裁剪处理后，模型Top-5错误率降低至3.1%。

① Inception-ResNet-V1网络结构（整体结构v1和v2相同，主要区别在于通道数）：

相较于原始Inception模块，Inception-Resnet-v1进行了改进：

上图是 Inception 架构初始版本。

Inception初始版本

优点：

增加了网络的宽度
增加了网络对尺度的适应性
加入池化操作，减少空间大小，减少过拟合
每个卷积层后跟ReLU，增加了非线性特征

缺点：所有卷积都在上一层的所有输出上做，造成5×5卷积之后特征图厚度过大。

改进：3×3和5×5卷积层前以及池化层后加入1×1卷积降维。

上图是 Inception-Resnet-v1中的Inception架构改良版本

来看看 Inception-Resnet-v1的具体结构吧。

stride 2 代表步长为2 ，V 是TF中的padding="Valid" == pytorch中的padding=0 ，没有标 V 是TF中的padding="Same" == pytorch中的padding=1 with zero padding(用0填充)。

需要注意的是，文中Conv s=2的padding为1；s=1的padding为0。

此外，我对图中结构进行了计算，经过不对称卷积（如IR-B模块，经过1X7,7X1的卷积按照原文特征图的大小会发生变化，但输出的结果表明这里的特征图大小并没有发生变化），它的H和W仍然不变，这里是因为它添加了0，保证特征图的大小不发生变化。

Reduction-A结构

② Inception-ResNet-V2网络结构（整体结构v1和v2相同，主要区别在于通道数）：

这三个网络中的参数略有不同：

Inception-ResNet-v2网络的35×35→17×17和17×17→8×8图缩减模块A、B：

Inception-ResNet-v2网络的Reduction-A、B

Inception-ResNet v1 的计算成本和 Inception v3 的接近；Inception-ResNetv2 的计算成本和 Inception v4 的接近。它们有不同的 stem，两个网络都有模块 A、B、C 和缩减块结构。唯一的不同在于超参数设置。

③ 代码实现：

（1）Inception-Resnet-v1：

# 先定义3X3的卷积，用于代码复用
class conv3x3(nn.Module):
    def __init__(self, in_planes, out_channels, stride=1, padding=0):
        super(conv3x3, self).__init__()
        self.conv3x3 = nn.Sequential(
             nn.Conv2d(in_planes, out_channels, kernel_size=3, stride=stride, padding=padding),#卷积核为3x3
             nn.BatchNorm2d(out_channels),#BN层，防止过拟合以及梯度爆炸
             nn.ReLU()#激活函数
        )
        
    def forward(self, input):
        return self.conv3x3(input)
        
class conv1x1(nn.Module):
    def __init__(self, in_planes, out_channels, stride=1, padding=0):
        super(conv1x1, self).__init__()
        self.conv1x1 = nn.Sequential(
             nn.Conv2d(in_planes, out_channels, kernel_size=1, stride=stride, padding=padding),#卷积核为1x1
             nn.BatchNorm2d(out_channels),
             nn.ReLU()
        )

    def forward(self, input):
        return self.conv1x1(input)

Stem模块：输入299*299*3，输出35*35*256.

class StemV1(nn.Module):
    def __init__(self, in_planes):
        super(StemV1, self).__init__()

        self.conv1 = conv3x3(in_planes =in_planes,out_channels=32,stride=2, padding=0)
        self.conv2 = conv3x3(in_planes=32, out_channels=32,  stride=1, padding=0)
        self.conv3 = conv3x3(in_planes=32, out_channels=64, stride=1, padding=1)
        self.maxpool = nn.MaxPool2d(kernel_size=3,  stride=2, padding=0)
        self.conv4 = conv3x3(in_planes=64, out_channels=64,  stride=1, padding=1)
        self.conv5 = conv1x1(in_planes =64,out_channels=80,  stride=1, padding=0)
        self.conv6 = conv3x3(in_planes=80, out_channels=192, stride=1, padding=0)
        self.conv7 = conv3x3(in_planes=192, out_channels=256,  stride=2, padding=0)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.maxpool(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.conv6(x)
        x = self.conv7(x)

        return x

IR-A模块*5：输入35*35*256，输出35*35*256.

class Inception_ResNet_A(nn.Module):
    def __init__(self, input ):
        super(Inception_ResNet_A, self).__init__()
        self.conv1 = conv1x1(in_planes =input,out_channels=32,stride=1, padding=0)
        self.conv2 = conv3x3(in_planes=32, out_channels=32, stride=1, padding=1)
        self.line =  nn.Conv2d(96, 256, 1, stride=1, padding=0, bias=True)

        self.relu = nn.ReLU()

    def forward(self, x):

        c1 = self.conv1(x)
        # print("c1",c1.shape)
        c2 = self.conv1(x)
        # print("c2", c2.shape)
        c3 = self.conv1(x)
        # print("c3", c3.shape)
        c2_1 = self.conv2(c2)
        # print("c2_1", c2_1.shape)
        c3_1 = self.conv2(c3)
        # print("c3_1", c3_1.shape)
        c3_2 = self.conv2(c3_1)
        # print("c3_2", c3_2.shape)
        cat = torch.cat([c1, c2_1, c3_2],dim=1)#torch.Size([4, 96, 15, 15])
        # print("x",x.shape)

        line = self.line(cat)
        # print("line",line.shape)
        out =x+line
        out = self.relu(out)

        return out

Reduction-A模块：输入35*35*256，输出17*17*896.

class Reduction_A(nn.Module):
    def __init__(self, input,n=384,k=192,l=224,m=256):
        super(Reduction_A, self).__init__()
        self.maxpool = nn.MaxPool2d(kernel_size=3,  stride=2, padding=0)
        self.conv1 = conv3x3(in_planes=input, out_channels=n,stride=2,padding=0)
        self.conv2 = conv1x1(in_planes=input, out_channels=k,padding=1)
        self.conv3 = conv3x3(in_planes=k,  out_channels=l,padding=0)
        self.conv4 = conv3x3(in_planes=l,  out_channels=m,stride=2,padding=0)

    def forward(self, x):

        c1 = self.maxpool(x)
        # print("c1",c1.shape)
        c2 = self.conv1(x)
        # print("c2", c2.shape)
        c3 = self.conv2(x)
        # print("c3", c3.shape)
        c3_1 = self.conv3(c3)
        # print("c3_1", c3_1.shape)
        c3_2 = self.conv4(c3_1)
        # print("c3_2", c3_2.shape)
        cat = torch.cat([c1, c2,c3_2], dim=1)

        return cat

IR-B模块*10：输入17*17*896，输出17*17*896.

class Inception_ResNet_B(nn.Module):
    def __init__(self, input):
        super(Inception_ResNet_B, self).__init__()
        self.conv1 = conv1x1(in_planes =input,out_channels=128,stride=1, padding=0)
        self.conv1x7 = nn.Conv2d(in_channels=128,out_channels=128,kernel_size=(1,7), padding=(0,3))
        self.conv7x1 = nn.Conv2d(in_channels=128, out_channels=128,kernel_size=(7,1), padding=(3,0))
        self.line = nn.Conv2d(256, 896, 1, stride=1, padding=0, bias=True)

        self.relu = nn.ReLU()

    def forward(self, x):

        c1 = self.conv1(x)
        # print("c1",c1.shape)
        c2 = self.conv1(x)
        # print("c2", c2.shape)
        c2_1 = self.conv1x7(c2)
        # print("c2_1", c2_1.shape)

        c2_1 = self.relu(c2_1)

        c2_2 = self.conv7x1(c2_1)
        # print("c2_2", c2_2.shape)

        c2_2 = self.relu(c2_2)

        cat = torch.cat([c1, c2_2], dim=1)
        line = self.line(cat)
        out =x+line

        out = self.relu(out)
        # print("out", out.shape)
        return out

Reduction-B模块：输入17*17*896，输出8*8*1792.

class Reduction_B(nn.Module):
    def __init__(self, input):
        super(Reduction_B, self).__init__()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv1 = conv1x1(in_planes=input,  out_channels=256, padding=1)
        self.conv2 = conv3x3(in_planes=256,  out_channels=384, stride=2, padding=0)
        self.conv3 = conv3x3(in_planes=256,  out_channels=256,stride=2, padding=0)
        self.conv4 = conv3x3(in_planes=256,  out_channels=256,  padding=1)
        self.conv5 = conv3x3(in_planes=256,  out_channels=256, stride=2, padding=0)
    def forward(self, x):
        c1 = self.maxpool(x)
        # print("c1", c1.shape)
        c2 = self.conv1(x)
        # print("c2", c2.shape)
        c3 = self.conv1(x)
        # print("c3", c3.shape)
        c4 = self.conv1(x)
        # print("c4", c4.shape)
        c2_1 = self.conv2(c2)
        # print("cc2_1", c2_1.shape)
        c3_1 = self.conv3(c3)
        # print("c3_1", c3_1.shape)
        c4_1 = self.conv4(c4)
        # print("c4_1", c4_1.shape)
        c4_2 = self.conv5(c4_1)
        # print("c4_2", c4_2.shape)
        cat = torch.cat([c1, c2_1, c3_1,c4_2], dim=1)
        # print("cat", cat.shape)
        return cat

IR-C模块*5：输入8*8*1792，输出8*8*1792.

class Inception_ResNet_C(nn.Module):
    def __init__(self, input):
        super(Inception_ResNet_C, self).__init__()
        self.conv1 = conv1x1(in_planes=input, out_channels=192, stride=1, padding=0)
        self.conv1x3 = nn.Conv2d(in_channels=192, out_channels=192, kernel_size=(1, 3), padding=(0,1))
        self.conv3x1 = nn.Conv2d(in_channels=192, out_channels=192, kernel_size=(3, 1), padding=(1,0))
        self.line = nn.Conv2d(384, 1792, 1, stride=1, padding=0, bias=True)

        self.relu = nn.ReLU()

    def forward(self, x):
        c1 = self.conv1(x)
        # print("x", x.shape)
        # print("c1",c1.shape)
        c2 = self.conv1(x)
        # print("c2", c2.shape)
        c2_1 = self.conv1x3(c2)
        # print("c2_1", c2_1.shape)

        c2_1 = self.relu(c2_1)
        c2_2 = self.conv3x1(c2_1)
        # print("c2_2", c2_2.shape)

        c2_2 = self.relu(c2_2)
        cat = torch.cat([c1, c2_2], dim=1)
        # print("cat", cat.shape)
        line = self.line(cat)
        out = x+ line
        # print("out", out.shape)
        out = self.relu(out)

        return out


class Inception_ResNet(nn.Module):
    def __init__(self,classes=2):
        super(Inception_ResNet, self).__init__()
        blocks = []
        blocks.append(StemV1(in_planes=3))
        for i in range(5):
            blocks.append(Inception_ResNet_A(input=256))
        blocks.append(Reduction_A(input=256))
        for i in range(10):
            blocks.append(Inception_ResNet_B(input=896))
        blocks.append(Reduction_B(input=896))
        for i in range(10):
            blocks.append(Inception_ResNet_C(input=1792))

        self.features = nn.Sequential(*blocks)

        self.avepool = nn.AvgPool2d(kernel_size=3)

        self.dropout = nn.Dropout(p=0.2)
        self.linear = nn.Linear(1792, classes)

    def forward(self,x):
        x = self.features(x)
        # print("x",x.shape)
        x = self.avepool(x)
        # print("avepool", x.shape)
        x = self.dropout(x)
        # print("dropout", x.shape)
        x = x.view(x.size(0), -1)
        x = self.linear(x)

        return  x

上述代码参考：基于PyTorch实现 Inception-ResNet-v1_NAND_LU的博客-CSDN博客

（2）Inception-Resnet-v2：

import torch
import torch.nn as nn
import torch.nn.functional as F

# 由于后面会经常用到，提前定义3×3卷积 和 1×1的卷积
class conv3x3(nn.Module):
    def __init__(self, in_planes, out_channels, stride=1, padding=0):
        super(conv3x3, self).__init__()
        self.conv3x3 = nn.Sequential(
            nn.Conv2d(in_planes, out_channels, kernel_size=3, stride=stride, padding=padding),  # 卷积核为3x3
            nn.BatchNorm2d(out_channels),  # BN层，防止过拟合以及梯度爆炸
            nn.ReLU()  # 激活函数
        )

    def forward(self, input):
        return self.conv3x3(input)

class conv1x1(nn.Module):
    def __init__(self, in_planes, out_channels, stride=1, padding=0):
        super(conv1x1, self).__init__()
        self.conv1x1 = nn.Sequential(
            nn.Conv2d(in_planes, out_channels, kernel_size=1, stride=stride, padding=padding),  # 卷积核为1x1
            nn.BatchNorm2d(out_channels),
            nn.ReLU()
        )

    def forward(self, input):
        return self.conv1x1(input)

stem模块：输入299*299*3，输出35*35*384.

# 定义stem模块
class StemV2(nn.Module):
    def __init__(self, in_planes=3):
        super(StemV2, self).__init__()


        self.conv1 = conv3x3(in_planes =in_planes,out_channels=32,stride=2, padding=0)
        self.conv2 = conv3x3(in_planes=32, out_channels=32,  stride=1, padding=0)
        self.conv3 = conv3x3(in_planes=32, out_channels=64, stride=1, padding=1)
        self.maxpool1 = nn.MaxPool2d(kernel_size=3,  stride=2, padding=0)
        self.conv4 = conv3x3(in_planes=64, out_channels=96,  stride=2, padding=0)
        self.conv5 = conv1x1(in_planes=160, out_channels=64,  stride=1, padding=1)
        self.conv6 = conv3x3(in_planes=64, out_channels=96, stride=1, padding=0)
        self.conv7 = conv1x1(in_planes=160, out_channels=64,  stride=1, padding=1)
        self.conv8 = nn.Conv2d(in_channels=64, out_channels=64, stride=1, kernel_size=(1, 7), padding=(0, 3))
        self.conv9 = nn.Conv2d(in_channels=64, out_channels=64, stride=1, kernel_size=(7, 1), padding=(3, 0))
        self.conv10 = conv3x3(in_planes=64, out_channels=96, stride=1, padding=0)
        self.conv11 = conv3x3(in_planes=192, out_channels=192, stride=2, padding=0)
        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x1 = self.maxpool1(x)
        x2 = self.conv4(x)
        x = torch.cat([x1, x2], dim=1)
        x1 = self.conv5(x)
        x1 = self.conv6(x1)
        x2 = self.conv7(x)
        x2 = self.conv8(x2)
        x2 = self.relu(x2)
        x2 = self.conv9(x2)
        x2 = self.relu(x2)
        x2 = self.conv10(x2)
        x = torch.cat([x1, x2], dim=1)
        x1 = self.conv11(x)
        x2 = self.maxpool2(x)
        x = torch.cat([x1, x2], dim=1)


        return x

IR-A模块*5：输入35*35*384，输出35*35*256.

# 定义IR-A模块
class Inception_ResNet_A(nn.Module):
    def __init__(self, input,  scale=0.3):
        super(Inception_ResNet_A, self).__init__()
        self.conv1 = conv1x1(in_planes =input,out_channels=32,stride=1, padding=0)

        self.conv2 = conv3x3(in_planes=32, out_channels=32, stride=1, padding=1)

        self.conv3 = conv3x3(in_planes=32, out_channels=48, stride=1, padding=1)
        self.conv4 = conv3x3(in_planes=48, out_channels=64, stride=1, padding=1)

        self.line =  nn.Conv2d(128, 384, 1, stride=1, padding=0, bias=True)
        self.scale = scale

        self.relu = nn.ReLU()

    def forward(self, x):

        c1 = self.conv1(x)
        # print("c1",c1.shape)
        c2 = self.conv1(x)
        # print("c2", c2.shape)
        c3 = self.conv1(x)
        # print("c3", c3.shape)
        c2_1 = self.conv2(c2)
        # print("c2_1", c2_1.shape)
        c3_1 = self.conv3(c3)
        # print("c3_1", c3_1.shape)
        c3_2 = self.conv4(c3_1)
        # print("c3_2", c3_2.shape)
        cat = torch.cat([c1, c2_1, c3_2],dim=1)#torch.Size([4, 96, 15, 15])
        # print("x",x.shape)

        line = self.line(cat)
        # print("line",line.shape)
        out = self.scale*x+line
        out = self.relu(out)

        return out

Reduction-A模块：输入35*35*256，输出17*17*896.

# 定义Reduction-A模块
class Reduction_A(nn.Module):
    def __init__(self, input, n=384, k=256, l=256, m=384, scale=0.3):
        super(Reduction_A, self).__init__()
        self.maxpool = nn.MaxPool2d(kernel_size=3,  stride=2, padding=0)
        self.conv1 = conv3x3(in_planes=input, out_channels=n, stride=2,padding=0)
        self.conv2 = conv1x1(in_planes=input, out_channels=k, padding=1)
        self.conv3 = conv3x3(in_planes=k,  out_channels=l, padding=0)
        self.conv4 = conv3x3(in_planes=l,  out_channels=m, stride=2, padding=0)
        self.scale =scale
    def forward(self, x):

        c1 = self.maxpool(x)
        # print("c1",c1.shape)
        c2 = self.conv1(x)
        # print("c2", c2.shape)
        c3 = self.conv2(x)
        # print("c3", c3.shape)
        c3_1 = self.conv3(c3)
        # print("c3_1", c3_1.shape)
        c3_2 = self.conv4(c3_1)
        # print("c3_2", c3_2.shape)
        cat = torch.cat([c1, c2, c3_2], dim=1)

        return cat

IR-B模块*10：输入17*17*896，输出8*8*1792.

# 定义IR-B模块
class Inception_ResNet_B(nn.Module):
    def __init__(self, input, scale=0.3):
        super(Inception_ResNet_B, self).__init__()
        self.conv1 = conv1x1(in_planes =input,out_channels=192,stride=0, padding=1)
        self.conv2 = conv1x1(in_planes =input,out_channels=128,stride=0, padding=1)

        self.conv1x7 = nn.Conv2d(in_channels=128,out_channels=160,kernel_size=(1,7), padding=(0,3))
        self.conv7x1 = nn.Conv2d(in_channels=160, out_channels=192,kernel_size=(7,1), padding=(3,0))
        self.line = nn.Conv2d(384, 1152, 1, stride=1, padding=0, bias=True)
        self.scale = scale

        self.relu = nn.ReLU()

    def forward(self, x):

        c1 = self.conv1(x)
        # print("c1",c1.shape)
        c2 = self.conv2(x)
        # print("c2", c2.shape)
        c2_1 = self.conv1x7(c2)
        # print("c2_1", c2_1.shape)

        c2_1 = self.relu(c2_1)

        c2_2 = self.conv7x1(c2_1)
        # print("c2_2", c2_2.shape)

        c2_2 = self.relu(c2_2)

        cat = torch.cat([c1, c2_2], dim=1)
        line = self.line(cat)
        out =self.scale*x+line

        out = self.relu(out)
        # print("out", out.shape)
        return out

Reduction-B模块：输入8*8*1792，输出8*8*1792.

# 定义Reduction-B模块
class Reduction_B(nn.Module):
    def __init__(self, input):
        super(Reduction_B, self).__init__()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
        self.conv1 = conv1x1(in_planes=input,  out_channels=256, padding=1)
        self.conv2 = conv3x3(in_planes=256,  out_channels=288, stride=2, padding=0)
        self.conv3 = conv3x3(in_planes=256,  out_channels=288, stride=2, padding=0)
        self.conv4 = conv3x3(in_planes=256,  out_channels=288,  padding=1)
        self.conv5 = conv3x3(in_planes=288,  out_channels=320, stride=2, padding=0)
    def forward(self, x):
        c1 = self.maxpool(x)
        # print("c1", c1.shape)
        c2 = self.conv1(x)
        # print("c2", c2.shape)
        c3 = self.conv1(x)
        # print("c3", c3.shape)
        c4 = self.conv1(x)
        # print("c4", c4.shape)
        c2_1 = self.conv2(c2)
        # print("cc2_1", c2_1.shape)
        c3_1 = self.conv3(c3)
        # print("c3_1", c3_1.shape)
        c4_1 = self.conv4(c4)
        # print("c4_1", c4_1.shape)
        c4_2 = self.conv5(c4_1)
        # print("c4_2", c4_2.shape)
        cat = torch.cat([c1, c2_1, c3_1, c4_2], dim=1)
        # print("cat", cat.shape)
        return cat

IR-C模块*5：输入8*8*1792，输出8*8*1792.

# 定义IR-C模块
class Inception_ResNet_C(nn.Module):
    def __init__(self, input, scale=0.3):
        super(Inception_ResNet_C, self).__init__()
        self.conv1 = conv1x1(in_planes=input, out_channels=192, stride=1, padding=0)
        self.conv1x3 = nn.Conv2d(in_channels=192, out_channels=224, kernel_size=(1, 3), padding=(0,1))
        self.conv3x1 = nn.Conv2d(in_channels=224, out_channels=256, kernel_size=(3, 1), padding=(1,0))
        self.line = nn.Conv2d(448, 2048, 1, stride=1, padding=0, bias=True)

        self.relu = nn.ReLU()
        self.scale = scale
    def forward(self, x):
        c1 = self.conv1(x)
        # print("x", x.shape)
        # print("c1",c1.shape)
        c2 = self.conv1(x)
        # print("c2", c2.shape)
        c2_1 = self.conv1x3(c2)
        # print("c2_1", c2_1.shape)

        c2_1 = self.relu(c2_1)
        c2_2 = self.conv3x1(c2_1)
        # print("c2_2", c2_2.shape)

        c2_2 = self.relu(c2_2)
        cat = torch.cat([c1, c2_2], dim=1)
        # print("cat", cat.shape)
        line = self.line(cat)
        out = self.scale*x+ line
        # print("out", out.shape)
        out = self.relu(out)

        return out

# IR v2
class Inception_ResNet_v2(nn.Module):
    def __init__(self, classes=2):
        super(Inception_ResNet_v2, self).__init__()
        blocks = []
        blocks.append(StemV2(in_planes=3))
        for _ in range(5):
            blocks.append(Inception_ResNet_A(input=384))
        blocks.append(Reduction_A(input=384))
        for _ in range(10):
            blocks.append(Inception_ResNet_B(input=1152))
        blocks.append(Reduction_B(input=1152))
        for _ in range(10):
            blocks.append(Inception_ResNet_C(input=2146))

        self.features = nn.Sequential(*blocks)

        self.avepool = nn.AvgPool2d(kernel_size=3)

        self.dropout = nn.Dropout(p=0.2)
        self.linear = nn.Linear(2048, classes)

    def forward(self, x):
        x = self.features(x)
        # print("x",x.shape)
        x = self.avepool(x)
        # print("avepool", x.shape)
        x = self.dropout(x)
        # print("dropout", x.shape)
        x = x.view(x.size(0), -1)
        x = self.linear(x)

        return  x