[C++ 基于Eigen库实现CRN前向推理]
第二部分:Conv2d实现
- 前言:(Eigen库使用记录)
- 第一部分:WavFile.class (实现读取wav/pcm,实现STFT)
- 第二部分:Conv2d实现
- 第三部分:TransposedConv2d实现 (mimo,padding,stride,dilation,kernel,outpadding)
- 第四部分:NonLinearity (Sigmoid,Tanh,ReLU,ELU,Softplus)
- 第五部分:LSTM
- GITHUB仓库
1. Conv2d介绍
1.1 参数介绍
这是pytorch官方源码
def __init__(
self,
in_channels: int,
out_channels: int,
kernel_size: _size_2_t,
stride: _size_2_t = 1,
padding: _size_2_t = 0,
dilation: _size_2_t = 1,
groups: int = 1,
bias: bool = True,
padding_mode: str = 'zeros' # TODO: refine this type
):
-
in_channels:网络输入的通道数,简单理解为每个输入样本包含多个个FeatureMap。
-
out_channels:网络输出的通道数。即卷积核的个数
-
kernel_size:卷积核的大小,如果该参数是一个整数q,那么卷积核的大小是qXq。
至此,一个简单的卷积如图
-
stride:步长。是卷积过程中移动的步长。默认情况下是1。一般卷积核在输入图像上的移动是自左至右,自上至下。如果参数是一个整数那么就默认在水平和垂直方向都是该整数。如果参数是stride=(2, 1),2代表着高(h)进行步长为2,1代表着宽(w)进行步长为1。
加入步长后,当步长为2时,卷积如图:
-
padding:填充,默认是0值填充。改参数指定的是在边缘填充多少行或列的0值
如padding为1时,卷积如图
-
dilation:扩张。一般情况下,卷积核与输入图像对应的位置之间的计算是相同尺寸的,也就是说卷积核的大小是3X3,那么它在输入图像上每次作用的区域是3X3,这种情况下dilation=0。当dilation=1时,表示的是下图这种情况。
1.2 python实现
2. 基于Eigen的C++实现
2.1 Layer_Conv2d.h
//
// Created by Koer on 2022/10/31.
//
#ifndef CRN_LAYER_CONV2D_H
#define CRN_LAYER_CONV2D_H
#include "vector"
#include "mat.h"
#include "Eigen"
#include "tuple"
#include "Eigen/CXX11/Tensor"
class Layer_Conv2d {
public:
Layer_Conv2d();
Layer_Conv2d(int64_t in_ch, int64_t out_ch, std::pair<int64_t, int64_t> kernel = std::make_pair(1, 1),
std::pair<int64_t, int64_t> stride = std::make_pair(1, 1),
std::pair<int64_t, int64_t> dilation = std::make_pair(1, 1),
std::pair<int64_t, int64_t> padding = std::make_pair(0, 0));
void LoadState(MATFile *pmFile, const std::string &state_preffix);
void LoadTestState();
Eigen::Tensor<float_t, 4> forward(Eigen::Tensor<float_t, 4> &input);
private:
int64_t in_channels;
int64_t out_channels;
std::pair<int64_t, int64_t> kernel_size;
std::pair<int64_t, int64_t> stride;
std::pair<int64_t, int64_t> dilation;
std::pair<int64_t, int64_t> padding;
Eigen::Tensor<float_t, 4> weights;
Eigen::Tensor<float_t, 2> bias;
};
#endif //CRN_LAYER_CONV2D_H
2.2 Layer_Conv2d.cpp
//
// Created by Koer on 2022/10/31.
//
#include "iostream"
#include "../include/Layer_Conv2d.h"
Layer_Conv2d::Layer_Conv2d() {
this->in_channels = 1;
this->out_channels = 1;
this->kernel_size = std::make_pair(1, 1);
this->stride = std::make_pair(1, 1);
this->padding = std::make_pair(0, 0);
}
Layer_Conv2d::Layer_Conv2d(int64_t in_ch, int64_t out_ch,
std::pair<int64_t, int64_t> kernel,
std::pair<int64_t, int64_t> stride,
std::pair<int64_t, int64_t> dilation,
std::pair<int64_t, int64_t> padding) {
/* code */
this->in_channels = in_ch;
this->out_channels = out_ch;
this->kernel_size = kernel;
this->stride = stride;
this->dilation = dilation;
this->padding = padding;
}
void Layer_Conv2d::LoadState(MATFile *pmFile, const std::string &state_preffix) {
std::string weight_name = state_preffix + "_weight";
std::string bias_name = state_preffix + "_bias";
// Read weight
mxArray *pa = matGetVariable(pmFile, weight_name.c_str());
auto *values = (float_t *) mxGetData(pa);
// First Dimension eg.(16,1,2,3) ===> M=16
long long dim1 = mxGetM(pa);
// Rest Total Dimension eg.(16,1,2,3) ===>N = 1 * 2 * 3 = 6
long long dim2 = mxGetN(pa);
dim2 = dim2 / this->kernel_size.first / this->kernel_size.second;
this->weights.resize(dim1, dim2, this->kernel_size.first, this->kernel_size.second);
int idx = 0;
for (int i = 0; i < this->kernel_size.second; i++) {
for (int j = 0; j < this->kernel_size.first; j++) {
for (int k = 0; k < dim2; k++) {
for (int l = 0; l < dim1; l++) {
this->weights(l, k, j, i) = values[idx++];
}
}
}
}
// std::cout << this->weights << std::endl;
// Read bias
pa = matGetVariable(pmFile, bias_name.c_str());
values = (float_t *) mxGetData(pa);
dim1 = mxGetM(pa);
dim2 = mxGetN(pa);
this->bias.resize(dim1, dim2);
idx = 0;
for (int i = 0; i < dim2; i++) {
for (int j = 0; j < dim1; j++) {
this->bias(j, i) = values[idx++];
}
}
// std::cout << this->bias << std::endl;
// std::cout << " Finish Loading State of " + state_preffix << std::endl;
}
void Layer_Conv2d::LoadTestState() {
Eigen::Tensor<float_t, 4> w(this->out_channels, this->in_channels, this->kernel_size.first,
this->kernel_size.second);
w.setConstant(1.0);
this->weights = w;
Eigen::Tensor<float_t, 2> b(1, this->out_channels);
b.setConstant(0.0);
this->bias = b;
}
Eigen::Tensor<float_t, 4> Layer_Conv2d::forward(Eigen::Tensor<float_t, 4> &input) {
const Eigen::Tensor<size_t, 4>::Dimensions &dim_inp = input.dimensions();
/* Sequence channel × T × F */
size_t pad_size_time = this->padding.first;
size_t pad_size_freq = this->padding.second;
int64_t batch = dim_inp[0], C_in = dim_inp[1], H_in = dim_inp[2], W_in = dim_inp[3];
int64_t H_pad = H_in + pad_size_time * 2;
int64_t W_pad = W_in + pad_size_freq * 2;
/* padding tensor */
Eigen::Tensor<float_t, 4> padded_input = Eigen::Tensor<float_t, 4>(batch, C_in, H_pad, W_pad);
padded_input.setZero();
padded_input.slice(Eigen::array<size_t, 4>{0, 0, pad_size_time, pad_size_freq}, dim_inp) = input;
/* output shape */
int64_t H_out = (H_pad - this->dilation.first * (this->kernel_size.first - 1) - 1) / this->stride.first + 1;
int64_t W_out = (W_pad - this->dilation.second * (this->kernel_size.second - 1) - 1) / this->stride.second + 1;
Eigen::Tensor<float_t, 4> output = Eigen::Tensor<float_t, 4>(batch, this->out_channels, H_out, W_out);
output.setZero();
/* params
* region: tmp storage of map to be convolved
* kernel: tmp storage of kernel of the out_channels idx_outc
* tmp_res: tmp storage of res (convolve all in_channels and sum up)
* dim_sum: the origin tmp_res is at view of (1,ic,k1,k2), sum along the 1,2,3 axis
* h_region: the h of convolve region - 1
* w_region: the w of convolve region - 1
*/
Eigen::Tensor<float_t, 4> region;
Eigen::Tensor<float_t, 4> kernel;
Eigen::Tensor<float_t, 1> tmp_res;
Eigen::array<int, 3> dim_sum{1, 2, 3};
int64_t h_region = (this->kernel_size.first - 1) * this->dilation.first;
int64_t w_region = (this->kernel_size.second - 1) * this->dilation.second;
for (int64_t idx_batch = 0; idx_batch < batch; idx_batch++) {
for (int64_t idx_outc = 0; idx_outc < this->out_channels; idx_outc++) {
kernel = this->weights.slice(Eigen::array<int64_t, 4>{idx_outc, 0, 0, 0},
Eigen::array<int64_t, 4>{1, this->in_channels, this->kernel_size.first,
this->kernel_size.second}
);
for (int64_t idx_h = 0; idx_h < H_pad - h_region; idx_h += stride.first) {
for (int64_t idx_w = 0; idx_w < W_pad - w_region; idx_w += stride.second) {
region = padded_input.stridedSlice(
Eigen::array<int64_t, 4>{idx_batch, 0, idx_h, idx_w},
Eigen::array<int64_t, 4>{idx_batch + 1, this->in_channels, idx_h + h_region + 1,
idx_w + w_region + 1},
Eigen::array<int64_t, 4>{1, 1, this->dilation.first, this->dilation.second});
tmp_res = (region * kernel).sum(dim_sum);
output(idx_batch, idx_outc, idx_h / this->stride.first, idx_w / this->stride.second) =
tmp_res(0) + this->bias(0, idx_outc);
}
}
}
}
return output;
}
这是基于循环写的,效率十分十分低。后面要写成unfold形式进行并行运算。
参考链接
[1] Conv2d介绍