cuda小白
原始API链接 NPP
GPU架构近些年也有不少的变化,具体的可以参考别的博主的介绍,都比较详细。还有一些cuda中的专有名词的含义,可以参考《详解CUDA的Context、Stream、Warp、SM、SP、Kernel、Block、Grid》
常见的NppStatus,可以看这里。
7 是图像的傅里叶变换,还在学习中
本文主要讲述的是形态学变换
Dilation
膨胀操作(对二值化物体边界点进行扩充,将与物体接触的所有背景点合并到该物体中,使边界向外部扩张。如果两个物体间隔较近,会将两物体连通在一起。)
// 返回mask下的最大像素值作为输出的pixel,如果mask的值为0,则不参与最大值查询
NppStatus nppiDilate_8u_C3R(const Npp8u *pSrc,
Npp32s nSrcStep,
Npp8u *pDst,
Npp32s nDstStep,
NppiSize oSizeROI,
const Npp8u *pMask,
NppiSize oMaskSize,
NppiPoint oAnchor);
// 与前一个接口的区别是多了一个borderType的类型指定
/*
NppiBorderType {
NPP_BORDER_UNDEFINED,
NPP_BORDER_NONE,
NPP_BORDER_CONSTANT,
NPP_BORDER_REPLICATE,
NPP_BORDER_WARP,
NPP_BORDER_MIRROR
};
*/
NppStatus nppiDilateBorder_8u_C3R(const Npp8u *pSrc,
Npp32s nSrcStep,
NppiSize oSrcSize,
NppiPoint oSrcOffset,
Npp8u *pDst,
Npp32s nDstStep,
NppiSize oSizeROI,
const Npp8u *pMask,
NppiSize oMaskSize,
NppiPoint oAnchor,
NppiBorderType eBorderType);
// 特定大小的kernel
NppStatus nppiDilate3x3_8u_C3R(const Npp8u *pSrc,
Npp32s nSrcStep,
Npp8u *pDst,
Npp32s nDstStep,
NppiSize oSizeROI);
code
#include <iostream>
#include <cuda_runtime.h>
#include <npp.h>
#include <opencv2/opencv.hpp>
#define CUDA_FREE(ptr) { if (ptr != nullptr) { cudaFree(ptr); ptr = nullptr; } }
int main() {
std::string directory = "../";
cv::Mat image_dog = cv::imread(directory + "dog.png");
int image_width = image_dog.cols;
int image_height = image_dog.rows;
int image_size = image_width * image_height;
// =============== device memory ===============
// input
uint8_t *in_image;
cudaMalloc((void**)&in_image, image_size * 3 * sizeof(uint8_t));
cudaMemcpy(in_image, image_dog.data, image_size * 3 * sizeof(uint8_t), cudaMemcpyHostToDevice);
// output
uint8_t *out_ptr1, *out_ptr2;
cudaMalloc((void**)&out_ptr1, image_size * 3 * sizeof(uint8_t)); // 三通道
cudaMalloc((void**)&out_ptr2, image_size * 3 * sizeof(uint8_t)); // 三通道
NppiSize in_size;
in_size.width = image_width;
in_size.height = image_height;
NppiRect rc;
rc.x = 0;
rc.y = 0;
rc.width = image_width;
rc.height = image_height;
int mask_size = 10;
cv::Mat mat_mask = cv::Mat::ones(mask_size, mask_size, CV_8UC1);
uint8_t *mask;
cudaMalloc((void**)&mask, mask_size * mask_size * sizeof(uint8_t));
cudaMemcpy(mask, mat_mask.data, mask_size * mask_size * sizeof(uint8_t), cudaMemcpyHostToDevice);
cv::Mat out_image = cv::Mat::zeros(image_height, image_width, CV_8UC3);
NppStatus status;
NppiSize npp_mask_size;
npp_mask_size.width = mask_size;
npp_mask_size.height = mask_size;
NppiPoint pt;
pt.x = 0;
pt.y = 0;
// =============== nppiDilate_8u_C3R ===============
status = nppiDilate_8u_C3R(in_image, image_width * 3, out_ptr1, image_width * 3,
in_size, mask, npp_mask_size, pt);
if (status != NPP_SUCCESS) {
std::cout << "[GPU] ERROR nppiDilate_8u_C3R failed, status = " << status << std::endl;
return false;
}
cudaMemcpy(out_image.data, out_ptr1, image_size * 3, cudaMemcpyDeviceToHost);
cv::imwrite(directory + "dilate.jpg", out_image);
// =============== nppiDilateBorder_8u_C3R ===============
NppiPoint src_pt;
src_pt.x = 100;
src_pt.y = 100;
status = nppiDilateBorder_8u_C3R(in_image, image_width * 3, in_size, src_pt, out_ptr2,
image_width * 3, in_size, mask, npp_mask_size, pt,
NPP_BORDER_REPLICATE);
if (status != NPP_SUCCESS) {
std::cout << "[GPU] ERROR nppiDilateBorder_8u_C3R failed, status = " << status << std::endl;
return false;
}
cudaMemcpy(out_image.data, out_ptr2, image_size * 3, cudaMemcpyDeviceToHost);
cv::imwrite(directory + "dilate_border.jpg", out_image);
// free
CUDA_FREE(in_image)
CUDA_FREE(out_ptr1)
CUDA_FREE(out_ptr2)
}
make
cmake_minimum_required(VERSION 3.20)
project(test)
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
find_package(CUDA REQUIRED)
include_directories(${CUDA_INCLUDE_DIRS})
file(GLOB CUDA_LIBS "/usr/local/cuda/lib64/*.so")
add_executable(test test.cpp)
target_link_libraries(test
${OpenCV_LIBS}
${CUDA_LIBS}
)
result
注意:
- nppiDilateBorder_8u_C3R 仅支持border的模式为 NPP_BORDER_REPLICATE,其他模式会报错,错误码为-9999。
Erode
腐蚀操作
NppStatus nppiErode_8u_C3R(const Npp8u *pSrc,
Npp32s nSrcStep,
Npp8u *pDst,
Npp32s nDstStep,
NppiSize oSizeROI,
const Npp8u *pMask,
NppiSize oMaskSize,
NppiPoint oAnchor);
NppStatus nppiErodeBorder_8u_C3R(const Npp8u *pSrc,
Npp32s nSrcStep,
NppiSize oSrcSize,
NppiPoint oSrcOffset,
Npp8u *pDst,
Npp32s nDstStep,
NppiSize oSizeROI,
const Npp8u *pMask,
NppiSize oMaskSize,
NppiPoint oAnchor,
NppiBorderType eBorderType);
// 固定大小的Erode
NppStatus nppiErode3x3_8u_C3R(const Npp8u *pSrc,
Npp32s nSrcStep,
Npp8u *pDst,
Npp32s nDstStep,
NppiSize oSizeROI);
// nppiErode3x3Border_8u_C3R 不详细介绍了
再此使用上一个实验膨胀之后的图像作为腐蚀的输入。
code
#include <iostream>
#include <cuda_runtime.h>
#include <npp.h>
#include <opencv2/opencv.hpp>
#define CUDA_FREE(ptr) { if (ptr != nullptr) { cudaFree(ptr); ptr = nullptr; } }
int main() {
std::string directory = "../";
cv::Mat image_dog = cv::imread(directory + "dilate.jpg");
int image_width = image_dog.cols;
int image_height = image_dog.rows;
int image_size = image_width * image_height;
// =============== device memory ===============
// input
uint8_t *in_image;
cudaMalloc((void**)&in_image, image_size * 3 * sizeof(uint8_t));
cudaMemcpy(in_image, image_dog.data, image_size * 3 * sizeof(uint8_t), cudaMemcpyHostToDevice);
// output
uint8_t *out_ptr1, *out_ptr2;
cudaMalloc((void**)&out_ptr1, image_size * 3 * sizeof(uint8_t)); // 三通道
cudaMalloc((void**)&out_ptr2, image_size * 3 * sizeof(uint8_t)); // 三通道
NppiSize in_size;
in_size.width = image_width;
in_size.height = image_height;
NppiRect rc;
rc.x = 0;
rc.y = 0;
rc.width = image_width;
rc.height = image_height;
int mask_size = 10;
cv::Mat mat_mask = cv::Mat::ones(mask_size, mask_size, CV_8UC1);
uint8_t *mask;
cudaMalloc((void**)&mask, mask_size * mask_size * sizeof(uint8_t));
cudaMemcpy(mask, mat_mask.data, mask_size * mask_size * sizeof(uint8_t), cudaMemcpyHostToDevice);
cv::Mat out_image = cv::Mat::zeros(image_height, image_width, CV_8UC3);
NppStatus status;
NppiSize npp_mask_size;
npp_mask_size.width = mask_size;
npp_mask_size.height = mask_size;
NppiPoint pt;
pt.x = 0;
pt.y = 0;
// =============== nppiErode_8u_C3R ===============
status = nppiErode_8u_C3R(in_image, image_width * 3, out_ptr1, image_width * 3,
in_size, mask, npp_mask_size, pt);
if (status != NPP_SUCCESS) {
std::cout << "[GPU] ERROR nppiErode_8u_C3R failed, status = " << status << std::endl;
return false;
}
cudaMemcpy(out_image.data, out_ptr1, image_size * 3, cudaMemcpyDeviceToHost);
cv::imwrite(directory + "erode.jpg", out_image);
// =============== nppiErodeBorder_8u_C3R ===============
NppiPoint src_pt;
src_pt.x = 100;
src_pt.y = 100;
status = nppiErodeBorder_8u_C3R(in_image, image_width * 3, in_size, src_pt, out_ptr2,
image_width * 3, in_size, mask, npp_mask_size, pt,
NPP_BORDER_REPLICATE);
if (status != NPP_SUCCESS) {
std::cout << "[GPU] ERROR nppiErodeBorder_8u_C3R failed, status = " << status << std::endl;
return false;
}
cudaMemcpy(out_image.data, out_ptr2, image_size * 3, cudaMemcpyDeviceToHost);
cv::imwrite(directory + "erode_border.jpg", out_image);
// free
CUDA_FREE(in_image)
CUDA_FREE(out_ptr1)
CUDA_FREE(out_ptr2)
}
make
cmake_minimum_required(VERSION 3.20)
project(test)
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
find_package(CUDA REQUIRED)
include_directories(${CUDA_INCLUDE_DIRS})
file(GLOB CUDA_LIBS "/usr/local/cuda/lib64/*.so")
add_executable(test test.cpp)
target_link_libraries(test
${OpenCV_LIBS}
${CUDA_LIBS}
)
result
注意点:
- nppiErodeBorder_8u_C3R 仅支持border的模式为 NPP_BORDER_REPLICATE,其他模式会报错,错误码为-9999。
ComplexImageMorphology
复杂图像形态学,暂时不做介绍,后续视情况而定
<<<链接>>>