cuda入门demo（2）——最基础的二方向sobel

news2026/2/9 4:52:52

⚠️主要是自己温习用，只保证代码正确性，不保证讲解的详细性。

今天继续总结cuda最基本的入门demo。很多教程会给你说conv怎么写，实际上sobel也是conv，并且conv本身已经用torch实现了。

之前在课题中尝试了sobel的变体，为了起一个复习的作用，我把原始sobel（不是变体）的代码备份到这里，比起变体，这个原始的非常简单，很适合学习。

最简单的sobel

#include <opencv2/opencv.hpp>
#include <iostream>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>

using namespace std;
using namespace cv;


__global__ void sobel_gpu(unsigned char* in, unsigned char* out, const int Height, const int Width) {
    int x = blockDim.y * blockIdx.y + threadIdx.y;
    int y = blockDim.x * blockIdx.x + threadIdx.x;

    // 只对非边缘地带进行计算
    if (x > 0 && x < Height - 1 && y > 0 && y < Width - 1) {
        int idx = x * Width + y;
        int idx_up = idx - Width;
        int idx_down = idx + Width;
        char gx_res = in[idx_up - 1] + 2 * in[idx - 1] + in[idx_down - 1] - in[idx_up + 1] - 2 * in[idx + 1] - in[idx_down + 1];
        char gy_res = in[idx_up - 1] + 2 * in[idx_up] + in[idx_up + 1] - in[idx_down - 1] - 2 * in[idx_down] - in[idx_down + 1];
        out[idx] = (gx_res + gy_res) / 2;
    }
    else if (x == 0 || x == Height - 1 || y == 0 || y == Width - 1) {
        int idx = x * Width + y;
        out[idx] = in[idx];
    }
}

int main() {
    Mat img = imread("lena.jpg", 0);
    int height = img.rows;
    int width = img.cols;

    int memsize = height * width * sizeof(unsigned char);

    Mat gaussImg;
    GaussianBlur(img, gaussImg, Size(3, 3), 0, 0, BORDER_DEFAULT);

    Mat dst_gpu(height, width, CV_8UC1, Scalar(0));

    unsigned char* in_gpu;
    unsigned char* out_gpu;

    // 接下来对模糊之后的图像进行处理
    cudaMalloc((void**)&in_gpu, memsize);
    cudaMalloc((void**)&out_gpu, memsize);

    dim3 threadsPerBlock(32, 32);
    dim3 blocksPerGrid((width + threadsPerBlock.x - 1) / threadsPerBlock.x, (height + threadsPerBlock.y - 1) / threadsPerBlock.y);

    cudaMemcpy(in_gpu, gaussImg.data, memsize, cudaMemcpyHostToDevice);

    sobel_gpu << < blocksPerGrid, threadsPerBlock > >> (in_gpu, out_gpu, height, width);

    cudaMemcpy(dst_gpu.data, out_gpu, memsize, cudaMemcpyDeviceToHost);

    imwrite("save.png", dst_gpu);
    cudaFree(in_gpu);
    cudaFree(out_gpu);

    return 0;
}