工作需要把cuda的代码移植到QT中,和Qt项目一起编译,这里记录一下。
前期准备
1.安装CUDA
电脑需要安装好 CUDA,可以看我的这篇文章安装:Windows和WSL安装CUDA-CSDN博客
使用命令 nvcc --version 查看安装的 CUDA 版本。
CUDA 的默认的安装路径是:
# CUDA_PATH 这个是安装cuda时自动配置到环境变量中的路径,也是cuda安装的路径
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6
2.安装CUDA Samples
电脑需要安装CUDA Samples,CUDA Samples 是 CUDA 官方提供的示例程序,默认的安装路径是
C:\Program Files\NVIDIA Corporation\CUDA Samples\v12.6
# CUDA_SDK sdk包的位置
C:\Program Files\NVIDIA Corporation\CUDA Samples\v12.6\common
官方文档中说 CUDA Samples 在 CUDA 11.6 版本之后就不会随着 CUDA 一起默认安装了,所以如果你安装的是高于CUDA 11.6 版本的话,需要去 GitHub 上GitHub - NVIDIA/cuda-samples at v12.5 自己下载 CUDA Samples。
3. 新建文件夹
由于在配置Qt时,pro文件中的 CUDA 路径不能有空格,而 CUDA 和 CUDA Samples 的默认安装路径都有空格。所以新建一个文件夹,把上面两步 CUDA_PATH 和 CUDA_SDK 路径下的内容全部拷贝过去。
# 我新建的文件夹
E:\MyWorkSpace\Qt\cuda
Qt配置
首先新建一个带界面的Qt程序,
修改pro文件,里面的路径和部分配置需要改成匹配你自己电脑的:
QT += core gui
greaterThan(QT_MAJOR_VERSION, 4): QT += widgets
TARGET = TestCuda
CONFIG += c++17
# You can make your code fail to compile if it uses deprecated APIs.
# In order to do so, uncomment the following line.
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
SOURCES += \
main.cpp \
MainWindow.cpp
HEADERS += \
MainWindow.h \
kernel.h
FORMS += \
MainWindow.ui
# Default rules for deployment.
qnx: target.path = /tmp/$${TARGET}/bin
else: unix:!android: target.path = /opt/$${TARGET}/bin
!isEmpty(target.path): INSTALLS += target
#-------------------------------------------------
# CUDA
# This makes the .cu files appear in your project
CUDA_SOURCES += \
kernel.cu
# CUDA settings
SYSTEM_NAME = x64 # Depending on your system either 'Win32', 'x64', or 'Win64'
SYSTEM_TYPE = 64 # '32' or '64', depending on your system
#不同的显卡注意填适当的选项,使用命令 nvidia-smi --query-gpu=compute_cap --format=csv 查询
CUDA_ARCH = sm_86 # Type of CUDA architecture
NVCC_OPTIONS = --use_fast_math
#-------------------------------------------------
win32 {
# Define output directories
DESTDIR = ./bin
CUDA_OBJECTS_DIR = ./
#注意文件路径不能有空格!
CUDA_DIR = "E:/MyWorkSpace/Qt/cuda" # Path to cuda toolkit install
LIBS += -L$$CUDA_DIR/lib/x64 \
-lcudart \
-lcublas \
-lcufft
# MSVCRT link option (static or dynamic, it must be the same with your Qt SDK link option)
MSVCRT_LINK_FLAG_DEBUG = "/MDd" #表示使用DLL的调试版
MSVCRT_LINK_FLAG_RELEASE = "/MD" #使用DLL版的C和C++运行库 具体可以看vs的属性设置
# include paths
INCLUDEPATH += $$CUDA_DIR\include \
$$CUDA_DIR/Common
# library directories
QMAKE_LIBDIR += $$CUDA_DIR\lib\$$SYSTEM_NAME
# The following makes sure all path names (which often include spaces) are put between quotation marks
CUDA_INC = $$join(INCLUDEPATH,'" -I"','-I"','"')
# Add the necessary libraries
CUDA_LIB_NAMES = cudart_static
for(lib, CUDA_LIB_NAMES) {
CUDA_LIBS += -l$$lib
}
LIBS += $$CUDA_LIBS
# Configuration of the Cuda compiler
CONFIG(debug, debug|release) {
# Debug mode
cuda_d.input = CUDA_SOURCES
cuda_d.output = $$CUDA_OBJECTS_DIR\${QMAKE_FILE_BASE}_cuda.obj
cuda_d.commands = "$$CUDA_DIR\bin\nvcc.exe" -D_DEBUG $$NVCC_OPTIONS $$CUDA_INC $$CUDA_LIBS \
--machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH \
--compile -cudart static -g -DWIN32 -D_MBCS \
-Xcompiler "/wd4819,/EHsc,/W3,/nologo,/Od,/Zi,/RTC1" \
-Xcompiler $$MSVCRT_LINK_FLAG_DEBUG \
-c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
cuda_d.dependency_type = TYPE_C
QMAKE_EXTRA_COMPILERS += cuda_d
}
else {
# Release mode
cuda.input = CUDA_SOURCES
cuda.output = $$CUDA_OBJECTS_DIR\${QMAKE_FILE_BASE}_cuda.obj
cuda.commands = $$CUDA_DIR\bin\nvcc.exe $$NVCC_OPTIONS $$CUDA_INC $$CUDA_LIBS \
--machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH \
--compile -cudart static -DWIN32 -D_MBCS \
-Xcompiler "/wd4819,/EHsc,/W3,/nologo,/O2,/Zi" \
-Xcompiler $$MSVCRT_LINK_FLAG_RELEASE \
-c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
cuda.dependency_type = TYPE_C
QMAKE_EXTRA_COMPILERS += cuda
}
}
#-------------------------------------------------
unix {
# Define output directories
DESTDIR = ./bin
CUDA_OBJECTS_DIR = ./
CUDA_DIR = "/usr/local/cuda-10.0" # Path to cuda toolkit install
LIBS += -L"/usr/local/lib" \
-L"/usr/local/cuda/lib64" \
-lcudart
# include paths
INCLUDEPATH += $$CUDA_DIR/include \
$$CUDA_DIR/common/inc \
$$CUDA_DIR/../shared/inc
# library directories
QMAKE_LIBDIR += $$CUDA_DIR\lib64
# The following makes sure all path names (which often include spaces) are put between quotation marks
CUDA_INC = $$join(INCLUDEPATH,'" -I"','-I"','"')
# Add the necessary libraries
CUDA_LIB_NAMES = cudart_static
for(lib, CUDA_LIB_NAMES) {
CUDA_LIBS += -l$$lib
}
LIBS += $$CUDA_LIBS
# Configuration of the Cuda compiler
CONFIG(debug, debug|release) {
# Debug mode
cuda_d.input = CUDA_SOURCES
cuda_d.output = $$CUDA_OBJECTS_DIR\${QMAKE_FILE_BASE}_cuda.o
cuda_d.commands = $$CUDA_DIR/bin/nvcc -D_DEBUG $$NVCC_OPTIONS $$CUDA_INC $$CUDA_LIBS \
--machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH \
--compile -cudart static -g -DWIN32 -D_MBCS \
-c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
cuda_d.dependency_type = TYPE_C
QMAKE_EXTRA_COMPILERS += cuda_d
}
else {
# Release mode
cuda.input = CUDA_SOURCES
cuda.output = $$CUDA_OBJECTS_DIR\${QMAKE_FILE_BASE}_cuda.obj
cuda.commands = $$CUDA_DIR/bin/nvcc $$NVCC_OPTIONS $$CUDA_INC $$CUDA_LIBS \
--machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH \
--compile -cudart static -DWIN32 -D_MBCS \
-c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
cuda.dependency_type = TYPE_C
QMAKE_EXTRA_COMPILERS += cuda
}
}
CUDA 文件的头文件,kernel.h
#ifndef KERNEL_H
#define KERNEL_H
#define WIDTH 11
#define HEIGHT 10
#define X_INTER 3
#define Y_INTER 3
#define BLOCK_SIZE 8
// Adds two arrays
void runCudaPart();
#endif // KERNEL_H
CUDA 文件,kernel.cu,可以通过添加新文件中—>通用—>空文件创建:
#include "kernel.h"
// CUDA-C includes
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdio.h>
#include <iostream>
#include <chrono>
const int Width = 4;
// CUDA内核函数:计算矩阵乘积
__global__ void MatrixMulKernel(float* Md, float* Nd, float* Pd) {
unsigned long long start_time = clock();
int row = blockIdx.y * blockDim.y + threadIdx.y;
int col = blockIdx.x * blockDim.x + threadIdx.x;
float sum = 0.0f;
for (int k = 0; k < Width; ++k) {
sum += Md[row * Width + k] * Nd[k * Width + col];
}
Pd[row * Width + col] = sum;
// 获取结束时间戳
unsigned long long end_time = clock();
// 打印时间差(精确到微秒)
if (threadIdx.x == 0 && threadIdx.y == 0) {
printf("Thread [%d, %d] Execution Time: %llu microseconds\n", blockIdx.x, blockIdx.y, end_time - start_time);
}
}
void runCudaPart()
{
float array1[Width][Width] = {
{1.0f, 2.0f, 3.0f, 4.0f},
{5.0f, 6.0f, 7.0f, 8.0f},
{9.0f, 10.0f, 1.0f, 2.0f},
{3.0f, 4.0f, 5.0f, 6.0f}
};
float array2[Width][Width] = {
{7.0f, 8.0f, 9.0f, 10.0f},
{1.0f, 2.0f, 3.0f, 4.0f},
{5.0f, 6.0f, 7.0f, 8.0f},
{9.0f, 10.0f, 1.0f, 2.0f}
};
float* Md;
float* Nd;
float* Pd;
// 分配内存并复制数据到设备
cudaMalloc(&Md, Width * Width * sizeof(float));
cudaMalloc(&Nd, Width * Width * sizeof(float));
cudaMalloc(&Pd, Width * Width * sizeof(float));
cudaMemcpy(Md, array1, Width * Width * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(Nd, array2, Width * Width * sizeof(float), cudaMemcpyHostToDevice);
const int tileSize = 2;
// 定义网格和线程块大小
dim3 threadsPerBlock(Width / tileSize, Width / tileSize);
dim3 numBlocks(tileSize, tileSize);
// 获取开始时间
auto start_time = std::chrono::high_resolution_clock::now();
// 启动内核
MatrixMulKernel<<<numBlocks, threadsPerBlock>>>(Md, Nd, Pd);
// 获取结束时间
auto end_time = std::chrono::high_resolution_clock::now();
// 计算执行时间
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count();
std::cout << "Execution Time: " << duration << " microseconds" << std::endl;
// 将结果从设备复制回主机
float result[Width][Width];
cudaMemcpy(result, Pd, Width * Width * sizeof(float), cudaMemcpyDeviceToHost);
// 打印结果
for (int i = 0; i < Width; ++i) {
for (int j = 0; j < Width; ++j) {
std::cout << result[i][j] << " ";
}
std::cout << std::endl;
}
// 释放内存
cudaFree(Md);
cudaFree(Nd);
cudaFree(Pd);
}
MainWindow的UI文件中加一个按钮,MainWindow.h
#ifndef MAINWINDOW_H
#define MAINWINDOW_H
#include <QMainWindow>
QT_BEGIN_NAMESPACE
namespace Ui { class MainWindow; }
QT_END_NAMESPACE
class MainWindow : public QMainWindow
{
Q_OBJECT
public:
MainWindow(QWidget *parent = nullptr);
~MainWindow();
private slots:
void on_pushButton_clicked();
private:
Ui::MainWindow *ui;
};
#endif // MAINWINDOW_H
MainWindow.cpp
#include "MainWindow.h"
#include "ui_MainWindow.h"
#include "kernel.h"
#include <iostream>
MainWindow::MainWindow(QWidget *parent)
: QMainWindow(parent)
, ui(new Ui::MainWindow)
{
ui->setupUi(this);
}
MainWindow::~MainWindow()
{
delete ui;
}
void MainWindow::on_pushButton_clicked()
{
std::cout << "11111111" << std::endl;
runCudaPart();
std::cout << "22222222" << std::endl;
}
运行程序可以看到打印,大功告成!