1. 直接使用

1.1 获取预训练权重

比如直接使用Pytorch版的预训练权重。先把权重保存下来，并打印分类类别（方便后面对比）

import torch
import torchvision.models as models
from PIL import Image
import numpy as np

# input
image = Image.open("E:\\code\\c++\\libtorch_models\\data\\cat.jpg")  # 图片发在了build文件夹下
image = image.resize((224, 224), Image.ANTIALIAS)
image = np.asarray(image)
image = image / 255.0
image = torch.Tensor(image).unsqueeze_(dim=0)  # (b,h,w,c)
image = image.permute((0, 3, 1, 2)).float()  # (b,h,w,c) -> (b,c,h,w)

# model
model = models.resnet18(pretrained=True)
model = model.eval()
resnet = torch.jit.trace(model, torch.rand(1, 3, 224, 224))

# infer
output = resnet(image)
max_index = torch.max(output, 1)[1].item()
print(max_index)  # ImageNet1000类的类别序号
resnet.save('resnet.pt')

将保存权重resnet.pt，并打印分类索引号是283，对应的是猫。

1.2 libtorch直接使用pt权重

使用接口torch::jit::load 即可载入权重并获取resnet18模型。

然后再使用std::vector<torch::jit::IValue>传送数据到模型中，即可得到类别。

打印结果是283，和前面的pytorch版是一样。

#include <iostream>
#include <opencv.hpp>

int main()
{
	// load weights and model.
	auto resnet18 = torch::jit::load("E:\\code\\c++\\libtorch_models\\weights\\resnet18.pt");
	assert(module != nullptr);
	resnet18.to(torch::kCUDA);
	resnet18.eval();

	// pre
	cv::Mat image = cv::imread("E:\\code\\c++\\libtorch_models\\data\\cat.jpg");
	cv::resize(image, image, cv::Size(224, 224));
	torch::Tensor tensor_image = torch::from_blob(image.data, {224, 224,3 }, torch::kByte);
	tensor_image = torch::unsqueeze(tensor_image, 0).permute({ 0,3,1,2 }).to(torch::kCUDA).to(torch::kFloat).div(255.0);  // (b,h,w,c) -> (b,c,h,w)
	std::cout << tensor_image.options() << std::endl;
	std::vector<torch::jit::IValue> inputs;
	inputs.push_back(tensor_image);

	// infer
	auto output = resnet18.forward(inputs).toTensor();
	auto max_result = output.max(1, true);  
	auto max_index = std::get<1>(max_result).item<float>();
	std::cout << max_index << std::endl;

	return 0;
}

2. 间接使用

间接使用是指基于libtorch c++ 复现一遍resnet网络，再利用前面得到的权重，初始化模型。输出结果依然是283.

#include <iostream>
#include "resnet.h"  // libtorch实现的resnet
#include <opencv.hpp>

int main()
{
	// load weights and model.
	ResNet resnet = resnet18(1000);  // orig net
	torch::load(resnet, "E:\\code\\c++\\libtorch_models\\weights\\resnet18.pt");  // load weights.
	assert(resnet != nullptr);
	resnet->to(torch::kCUDA);
	resnet->eval();

	// pre
	cv::Mat image = cv::imread("E:\\code\\c++\\libtorch_models\\data\\cat.jpg");
	cv::resize(image, image, cv::Size(224, 224));
	torch::Tensor tensor_image = torch::from_blob(image.data, { 224, 224,3 }, torch::kByte);
	tensor_image = torch::unsqueeze(tensor_image, 0).permute({ 0,3,1,2 }).to(torch::kCUDA).to(torch::kFloat).div(255.0);  // (b,h,w,c) -> (b,c,h,w)
	std::cout << tensor_image.options() << std::endl;

	// infer
	auto output = resnet->forward(tensor_image);
	auto max_result = output.max(1, true);
	auto max_index = std::get<1>(max_result).item<float>();
	std::cout << max_index << std::endl;
	return 0;
}

接下来介绍resnet详细实现过程。

2.1 BasicBlock

先实现resnet最小单元BasicBlock，该单元是两次卷积组成的残差块。结构如下。

两种形式，如果第一个卷积stride等于2进行下采样，则跳层连接也需要下采样，维度才能一致，再进行对应相加。

// resnet18 and resnet34
class BasicBlockImpl : public torch::nn::Module {
public:
	BasicBlockImpl(int64_t in_channels, int64_t out_channels, int64_t stride, torch::nn::Sequential downsample);
	torch::Tensor forward(torch::Tensor x);
public:
	torch::nn::Sequential downsample{ nullptr };
private:
	torch::nn::Conv2d conv1{ nullptr };
	torch::nn::BatchNorm2d bn1{ nullptr };
	torch::nn::Conv2d conv2{ nullptr };
	torch::nn::BatchNorm2d bn2{ nullptr };
};
TORCH_MODULE(BasicBlock);

// other resnet using BottleNeck
class BottleNeckImpl : public torch::nn::Module {
public:
	BottleNeckImpl(int64_t in_channels, int64_t out_channels, int64_t stride,
		torch::nn::Sequential downsample, int groups, int base_width);
	torch::Tensor forward(torch::Tensor x);
public:
	torch::nn::Sequential downsample{ nullptr };
private:
	torch::nn::Conv2d conv1{ nullptr };
	torch::nn::BatchNorm2d bn1{ nullptr };
	torch::nn::Conv2d conv2{ nullptr };
	torch::nn::BatchNorm2d bn2{ nullptr };
	torch::nn::Conv2d conv3{ nullptr };
	torch::nn::BatchNorm2d bn3{ nullptr };
};
TORCH_MODULE(BottleNeck);

// conv3x3+bn+relu, conv3x3+bn, 
// downsample: 用来对原始输入进行下采样.
// stride: 控制是否下采样，stride=2则是下采样，且downsample将用于对原始输入进行下采样.
BasicBlockImpl::BasicBlockImpl(int64_t in_channels, int64_t out_channels, int64_t stride, torch::nn::Sequential downsample) {
	this->downsample = downsample;
	conv1 = torch::nn::Conv2d(torch::nn::Conv2dOptions(in_channels, out_channels, 3).stride(stride).padding(1).bias(false));
	bn1 = torch::nn::BatchNorm2d(torch::nn::BatchNorm2dOptions(out_channels));
	conv2 = torch::nn::Conv2d(torch::nn::Conv2dOptions(out_channels, out_channels, 3).stride(1).padding(1).bias(false));
	bn2 = torch::nn::BatchNorm2d(torch::nn::BatchNorm2dOptions(out_channels));

	register_module("conv1", conv1);
	register_module("bn1", bn1);
	register_module("conv2", conv2);
	register_module("bn2", bn2);
	if (!downsample->is_empty()) {
		register_module("downsample", downsample);
	}
}
torch::Tensor BasicBlockImpl::forward(torch::Tensor x) {
	torch::Tensor identity = x.clone();

	x = conv1->forward(x);  // scale/2. or keep scale unchange.
	x = bn1->forward(x);
	x = torch::relu(x);

	x = conv2->forward(x);
	x = bn2->forward(x);

	// 加入x的维度减半，则原始输入必须也减半。
	if (!downsample->is_empty()) identity = downsample->forward(identity);

	x += identity;
	x = torch::relu(x);
	return x;
}

2.2 实现ResNet

这里以resnet18为例。网络结构如下。

简单一句话，使用残差块多次卷积，最后接一个全链接层进行分类。

注意上图中的layer1到layer4是由BasicBlock0和BasicBlock1两种残差块组成。实现如下。

// out_channels: 每一个block输出的通道数。
// blocks: 每个layer包含的blocks数.
torch::nn::Sequential ResNetImpl::_make_layer(int64_t out_channels, int64_t blocks, int64_t stride) {
	// 1, downsampe: stride or channel
	torch::nn::Sequential downsample;
	if (stride != 1 || this->in_channels != out_channels * expansion) {  // 步长等于2，或者输入通道不等于输出通道，则都是接conv操作，改变输入x的维度
		downsample = torch::nn::Sequential(
			torch::nn::Conv2d(torch::nn::Conv2dOptions(this->in_channels, out_channels * this->expansion, 1).stride(stride).padding(0).groups(1).bias(false)),
			torch::nn::BatchNorm2d(out_channels * this->expansion)
		);
	}
	// 2, layers: first is downsample and others are conv with 1 stride.
	torch::nn::Sequential layers;
	if (this->is_basic) {
		layers->push_back(BasicBlock(this->in_channels, out_channels, stride, downsample));  // 控制是否下采样
		this->in_channels = out_channels;  // 更新输入通道，以备下次使用
		for (int64_t i = 1; i < blocks; i++) {  // 剩余的block都是in_channels == out_channels. and stride = 1. 
			layers->push_back(BasicBlock(this->in_channels, this->in_channels, 1, torch::nn::Sequential()));  // 追加多个conv3x3，且不改变维度
		}
	}
	else {
		layers->push_back(BottleNeck(this->in_channels, out_channels, stride, downsample, this->groups, this->base_width));
		this->in_channels = out_channels * this->expansion;  // 更新输入通道，以备下次使用
		for (int64_t i = 1; i < blocks; i++) {  // 剩余的block都是in_channels == out_channels. and stride = 1. 
			layers->push_back(BottleNeck(this->in_channels, this->in_channels, 1, torch::nn::Sequential(), this->groups, this->base_width));
		}
	}
	return layers;
}

resnet实现。

class ResNetImpl : public torch::nn::Module {
public:
	ResNetImpl(std::vector<int> layers, int num_classes, std::string model_type,
		int groups, int width_per_group);
	torch::Tensor forward(torch::Tensor x);
public:
	torch::nn::Sequential _make_layer(int64_t in_channels, int64_t blocks, int64_t stride = 1);
private:
	int expansion = 1;  // 通道扩大倍数，resnet50会用到
	bool is_basic = true;  // 是BasicBlock，还是BottleNeck
	int in_channels = 64;  // 记录输入通道数
	int groups = 1, base_width = 64;
	torch::nn::Conv2d conv1{ nullptr };
	torch::nn::BatchNorm2d bn1{ nullptr };
	torch::nn::Sequential layer1{ nullptr };
	torch::nn::Sequential layer2{ nullptr };
	torch::nn::Sequential layer3{ nullptr };
	torch::nn::Sequential layer4{ nullptr };
	torch::nn::Linear fc{ nullptr };
};
TORCH_MODULE(ResNet);

// layers: resnet18: { 2, 2, 2, 2 }, resnet34: { 3, 4, 6, 3 }, resnet50: { 3, 4, 6, 3 };
ResNetImpl::ResNetImpl(std::vector<int> layers, int num_classes = 1000, std::string model_type = "resnet18", int groups = 1, int width_per_group = 64) {
	if (model_type != "resnet18" && model_type != "resnet34")  // 即不使用BasicBlock，使用BottleNeck
	{  
		this->expansion = 4;
		is_basic = false;
	}
	this->groups = groups;  // 1
	this->base_width = base_width;  // 64
	
	this->conv1 = torch::nn::Conv2d(torch::nn::Conv2dOptions(3, 64, 7).stride(2).padding(3).groups(1).bias(false));  // scale/2
	this->bn1 = torch::nn::BatchNorm2d(torch::nn::BatchNorm2dOptions(64));
	this->layer1 = torch::nn::Sequential(_make_layer(64, layers[0]));  // stride=1, scale and channels unchange
	this->layer2 = torch::nn::Sequential(_make_layer(128, layers[1], 2)); // stride=2, scale/2. channels double
	this->layer3 = torch::nn::Sequential(_make_layer(256, layers[2], 2)); // stride=2, scale/2. channels double
	this->layer4 = torch::nn::Sequential(_make_layer(512, layers[3], 2)); // stride=2, scale/2. channels double

	this->fc = torch::nn::Linear(512 * this->expansion, num_classes);
	
	register_module("conv1", conv1);
	register_module("bn1", bn1);
	register_module("layer1", layer1);
	register_module("layer2", layer2);
	register_module("layer3", layer3);
	register_module("layer4", layer4);
	register_module("fc", fc);
}
torch::Tensor ResNetImpl::forward(torch::Tensor x) {
	// 1，先是两次下采样. (b,3,224,224) -> (b,64,56,56)
	x = conv1->forward(x);  // (b,3,224,224)->(b,64,112,112)
	x = bn1->forward(x);
	x = torch::relu(x);  // feature 1
	x = torch::max_pool2d(x, 3, 2, 1);  // k=3,s=2,p=1. (b,64,112,112)->(b,64,56,56)

	x = layer1->forward(x);  // feature 2. (b,64,56,56)
	x = layer2->forward(x);  // feature 3. (b,128,28,28)
	x = layer3->forward(x);  // feature 4. (b,256,14,14)
	x = layer4->forward(x);  // feature 5. (b,512,7,7)
	
	x = torch::adaptive_avg_pool2d(x, {1, 1});  // (b,512,1,1)
	//x = torch::avg_pool2d(x, 7, 1);  // (b,512,1,1)
	x = x.view({ x.sizes()[0], -1 });  // (b,512)
	x = fc->forward(x); // (b,1000)
	
	return torch::log_softmax(x, 1);  // score (负无穷,0]
}

创建resnet18和resnet34。其中layers中的数字代表当前layer中包含的BasicBlock个数。


// 创建不同resnet分类网络的函数
ResNet resnet18(int64_t num_classes) {
	std::vector<int> layers = { 2, 2, 2, 2 };
	ResNet model(layers, num_classes, "resnet18");
	return model;
}

ResNet resnet34(int64_t num_classes) {
	std::vector<int> layers = { 3, 4, 6, 3 };
	ResNet model(layers, num_classes, "resnet34");
	return model;
}

2.3 BottleNeck

resnet系列框架是一样的，不同点是组件有差异。

resnet18和resnet34都是用BasicBlock组件，而resnet50及以上则使用BottleNeck结构。如下所示。

BottleNeck有三种形式：

（1）BottleNeck0: stride=1, only 4*channels；

（2）BottleNeck1: stride=1, only 4*channels；

（3）BottleNeck2: stride=2, 4*channels and scales/2

// other resnet using BottleNeck
class BottleNeckImpl : public torch::nn::Module {
public:
	BottleNeckImpl(int64_t in_channels, int64_t out_channels, int64_t stride,
		torch::nn::Sequential downsample, int groups, int base_width);
	torch::Tensor forward(torch::Tensor x);
public:
	torch::nn::Sequential downsample{ nullptr };
private:
	torch::nn::Conv2d conv1{ nullptr };
	torch::nn::BatchNorm2d bn1{ nullptr };
	torch::nn::Conv2d conv2{ nullptr };
	torch::nn::BatchNorm2d bn2{ nullptr };
	torch::nn::Conv2d conv3{ nullptr };
	torch::nn::BatchNorm2d bn3{ nullptr };
};
TORCH_MODULE(BottleNeck);

// stride: 控制是否下采样，stride=2则是下采样，且downsample将用于对原始输入进行下采样.
// conv1x1+bn+relu, conv3x3+bn+relu, conv1x1+bn+relu
BottleNeckImpl::BottleNeckImpl(int64_t in_channels, int64_t out_channels, int64_t stride,
	torch::nn::Sequential downsample, int groups, int base_width) {
	this->downsample = downsample;
	// 64 * (64 / 64) / 1 = 64, 128 * (64 / 64) / 1 = 128, 128 * (64 / 64) / 2 = 64.
	int width = int(out_channels * (base_width / 64.)) * groups;  // 64 * (64/64) / 1. 当前的输出通道数

	// 1x1 conv
	conv1 = torch::nn::Conv2d(torch::nn::Conv2dOptions(in_channels, width, 1).stride(1).padding(0).groups(1).bias(false));
	bn1 = torch::nn::BatchNorm2d(torch::nn::BatchNorm2dOptions(width));
	// 3x3 conv
	conv2 = torch::nn::Conv2d(torch::nn::Conv2dOptions(width, width, 3).stride(stride).padding(1).groups(groups).bias(false));
	bn2 = torch::nn::BatchNorm2d(torch::nn::BatchNorm2dOptions(width));
	// 1x1 conv
	conv3 = torch::nn::Conv2d(torch::nn::Conv2dOptions(width, out_channels * 4, 1).stride(1).padding(0).groups(1).bias(false));
	bn3 = torch::nn::BatchNorm2d(torch::nn::BatchNorm2dOptions(out_channels * 4));

	register_module("conv1", conv1);
	register_module("bn1", bn1);
	register_module("conv2", conv2);
	register_module("bn2", bn2);
	register_module("conv3", conv3);
	register_module("bn3", bn3);

	if (!downsample->is_empty()) {
		register_module("downsample", downsample);
	}
}
torch::Tensor BottleNeckImpl::forward(torch::Tensor x) {
	torch::Tensor identity = x.clone();

	// conv1x1+bn+relu
	x = conv1->forward(x); 
	x = bn1->forward(x);
	x = torch::relu(x);
	// conv3x3+bn+relu
	x = conv2->forward(x);  // if stride==2, scale/2
	x = bn2->forward(x);
	x = torch::relu(x);
	// conv1x1+bn+relu
	x = conv3->forward(x);  // double channels
	x = bn3->forward(x);

	if (!downsample->is_empty()) identity = downsample->forward(identity);

	x += identity;
	x = torch::relu(x);

	return x;
}