目录
- 前言
- 核函数
- 一维
- 二维
- 三维
- 结果分析
前言
所有的代码下载链接:code。以下代码展示了如何在 CUDA 中打印网格和线程的索引信息。代码包括一维、二维和三维的网格和块的设置,并定义了多个内核函数来输出当前的索引信息。
核函数
- 打印线程索引
__global__ void print_idx_kernel(){
printf("block idx: (%3d, %3d, %3d), thread idx: (%3d, %3d, %3d)\n",
blockIdx.z, blockIdx.y, blockIdx.x,
threadIdx.z, threadIdx.y, threadIdx.x);
}
- 打印网格和块的维度
__global__ void print_dim_kernel(){
printf("grid dimension: (%3d, %3d, %3d), block dimension: (%3d, %3d, %3d)\n",
gridDim.z, gridDim.y, gridDim.x,
blockDim.z, blockDim.y, blockDim.x);
}
- 打印每个块的线程索引
__global__ void print_thread_idx_per_block_kernel(){
int index = threadIdx.z * blockDim.x * blockDim.y + \
threadIdx.y * blockDim.x + \
threadIdx.x;
printf("block idx: (%3d, %3d, %3d), thread idx: %3d\n",
blockIdx.z, blockIdx.y, blockIdx.x,
index);
}
- 打印网格和块的维度
__global__ void print_thread_idx_per_grid_kernel(){
int bSize = blockDim.z * blockDim.y * blockDim.x;
int bIndex = blockIdx.z * gridDim.x * gridDim.y + \
blockIdx.y * gridDim.x + \
blockIdx.x;
int tIndex = threadIdx.z * blockDim.x * blockDim.y + \
threadIdx.y * blockDim.x + \
threadIdx.x;
int index = bIndex * bSize + tIndex;
printf("block idx: %3d, thread idx in block: %3d, thread idx: %3d\n",
bIndex, tIndex, index);
}
- 打印坐标
__global__ void print_cord_kernel(){
int index = threadIdx.z * blockDim.x * blockDim.y + \
threadIdx.y * blockDim.x + \
threadIdx.x;
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
int z = blockIdx.z * blockDim.z + threadIdx.z;
printf("block idx: (%3d, %3d, %3d), thread idx: %3d, cord: (%3d, %3d, %3d)\n",
blockIdx.z, blockIdx.y, blockIdx.x,
index, x, y, z);
}
一维
不可以缺少,cudaDeviceSynchronize( CPU与GPU端完成同步),当主函数在cpu中执行到需要调用核函数的时候不会等GPU全部完成返回结果,需要加上这个同步函数,不然运行可执行文件的时候会得到空的结果。
代码
void print_one_dim() {
int inputSize = 8;
int blockDim = 4;
int gridDim = inputSize / blockDim;
dim3 block(blockDim);
dim3 grid(gridDim);
printf("grid dimension: %d, block dimension: %d,\n", grid.x, block.x);
cudaDeviceSynchronize();
}
二维
代码
void print_two_dim() {
int inputWidth = 4;
int blockDim = 2;
int gridDim = inputWidth / blockDim;
dim3 block(blockDim, blockDim);
dim3 grid(gridDim, gridDim);
printf("grid dimension: (%d, %d), block dimension: (%d, %d)\n",
grid.y, grid.x, block.y, block.x);
cudaDeviceSynchronize();
}
三维打印
代码
void print_three_dim() {
int depth = 3;
int height = 3;
int width = 3;
int blockDim = 2;
dim3 block(blockDim, blockDim, blockDim);
dim3 grid((width + blockDim - 1) / blockDim,
(height + blockDim - 1) / blockDim,
(depth + blockDim - 1) / blockDim);
printf("grid dimension: (%d, %d, %d), block dimension: (%d, %d, %d)\n",
grid.z, grid.y, grid.x,
block.z, block.y, block.x);
cudaDeviceSynchronize();
}
主函数
这里就可以自己来使用print_one_dim, print_two_dim, print_three_dim测试不同网格不块的情况。可以自行组合定义核函数来测试所有情况。
int main() {
// 选择打印的维度
// print_one_dim();
// print_two_dim();
print_three_dim();
return 0;
}
结果分析
这个只是一个小的.cu代码,所以我直接使用了笔记(点击代码链接可以看到)中得第一种方法编译。
打开当前代码目录下运行
nvcc grid_block_123D.cu -o test
./test
得到结果
···txt
grid dimension: (2, 2, 2), block dimension: (2, 2, 2)
block idx: ( 1, 0, 1), thread idx: 0, cord: ( 2, 0, 2)
block idx: ( 1, 0, 1), thread idx: 1, cord: ( 3, 0, 2)
block idx: ( 1, 0, 1), thread idx: 2, cord: ( 2, 1, 2)
block idx: ( 1, 0, 1), thread idx: 3, cord: ( 3, 1, 2)
block idx: ( 1, 0, 1), thread idx: 4, cord: ( 2, 0, 3)
block idx: ( 1, 0, 1), thread idx: 5, cord: ( 3, 0, 3)
block idx: ( 1, 0, 1), thread idx: 6, cord: ( 2, 1, 3)
block idx: ( 1, 0, 1), thread idx: 7, cord: ( 3, 1, 3)
block idx: ( 0, 1, 0), thread idx: 0, cord: ( 0, 2, 0)
block idx: ( 0, 1, 0), thread idx: 1, cord: ( 1, 2, 0)
block idx: ( 0, 1, 0), thread idx: 2, cord: ( 0, 3, 0)
block idx: ( 0, 1, 0), thread idx: 3, cord: ( 1, 3, 0)
block idx: ( 0, 1, 0), thread idx: 4, cord: ( 0, 2, 1)
block idx: ( 0, 1, 0), thread idx: 5, cord: ( 1, 2, 1)
block idx: ( 0, 1, 0), thread idx: 6, cord: ( 0, 3, 1)
block idx: ( 0, 1, 0), thread idx: 7, cord: ( 1, 3, 1)
block idx: ( 1, 0, 0), thread idx: 0, cord: ( 0, 0, 2)
block idx: ( 1, 0, 0), thread idx: 1, cord: ( 1, 0, 2)
block idx: ( 1, 0, 0), thread idx: 2, cord: ( 0, 1, 2)
block idx: ( 1, 0, 0), thread idx: 3, cord: ( 1, 1, 2)
block idx: ( 1, 0, 0), thread idx: 4, cord: ( 0, 0, 3)
block idx: ( 1, 0, 0), thread idx: 5, cord: ( 1, 0, 3)
block idx: ( 1, 0, 0), thread idx: 6, cord: ( 0, 1, 3)
block idx: ( 1, 0, 0), thread idx: 7, cord: ( 1, 1, 3)
block idx: ( 0, 0, 1), thread idx: 0, cord: ( 2, 0, 0)
block idx: ( 0, 0, 1), thread idx: 1, cord: ( 3, 0, 0)
block idx: ( 0, 0, 1), thread idx: 2, cord: ( 2, 1, 0)
block idx: ( 0, 0, 1), thread idx: 3, cord: ( 3, 1, 0)
block idx: ( 0, 0, 1), thread idx: 4, cord: ( 2, 0, 1)
block idx: ( 0, 0, 1), thread idx: 5, cord: ( 3, 0, 1)
block idx: ( 0, 0, 1), thread idx: 6, cord: ( 2, 1, 1)
block idx: ( 0, 0, 1), thread idx: 7, cord: ( 3, 1, 1)
block idx: ( 1, 1, 1), thread idx: 0, cord: ( 2, 2, 2)
block idx: ( 1, 1, 1), thread idx: 1, cord: ( 3, 2, 2)
block idx: ( 1, 1, 1), thread idx: 2, cord: ( 2, 3, 2)
block idx: ( 1, 1, 1), thread idx: 3, cord: ( 3, 3, 2)
block idx: ( 1, 1, 1), thread idx: 4, cord: ( 2, 2, 3)
block idx: ( 1, 1, 1), thread idx: 5, cord: ( 3, 2, 3)
block idx: ( 1, 1, 1), thread idx: 6, cord: ( 2, 3, 3)
block idx: ( 1, 1, 1), thread idx: 7, cord: ( 3, 3, 3)
block idx: ( 0, 1, 1), thread idx: 0, cord: ( 2, 2, 0)
block idx: ( 0, 1, 1), thread idx: 1, cord: ( 3, 2, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 0, 0), thread idx: 1, cord: ( 1, 0, 0)
block idx: ( 0, 0, 0), thread idx: 2, cord: ( 0, 1, 0)
block idx: ( 0, 0, 0), thread idx: 3, cord: ( 1, 1, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 0, 0), thread idx: 1, cord: ( 1, 0, 0)
block idx: ( 0, 0, 0), thread idx: 2, cord: ( 0, 1, 0)
block idx: ( 0, 0, 0), thread idx: 3, cord: ( 1, 1, 0)
block idx: ( 0, 0, 0), thread idx: 4, cord: ( 0, 0, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 0, 0), thread idx: 1, cord: ( 1, 0, 0)
block idx: ( 0, 0, 0), thread idx: 2, cord: ( 0, 1, 0)
block idx: ( 0, 0, 0), thread idx: 3, cord: ( 1, 1, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 0, 0), thread idx: 1, cord: ( 1, 0, 0)
block idx: ( 0, 0, 0), thread idx: 2, cord: ( 0, 1, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 0, 0), thread idx: 1, cord: ( 1, 0, 0)
block idx: ( 0, 0, 0), thread idx: 2, cord: ( 0, 1, 0)
block idx: ( 0, 0, 0), thread idx: 3, cord: ( 1, 1, 0)
block idx: ( 0, 0, 0), thread idx: 4, cord: ( 0, 0, 1)
block idx: ( 0, 0, 0), thread idx: 5, cord: ( 1, 0, 1)
block idx: ( 0, 0, 0), thread idx: 6, cord: ( 0, 1, 1)
block idx: ( 0, 0, 0), thread idx: 7, cord: ( 1, 1, 1)
block idx: ( 1, 1, 0), thread idx: 0, cord: ( 0, 2, 2)
block idx: ( 1, 1, 0), thread idx: 1, cord: ( 1, 2, 2)
block idx: ( 1, 1, 0), thread idx: 2, cord: ( 0, 3, 2)
block idx: ( 1, 1, 0), thread idx: 3, cord: ( 1, 3, 2)
block idx: ( 1, 1, 0), thread idx: 4, cord: ( 0, 2, 3)
block idx: ( 1, 1, 0), thread idx: 5, cord: ( 1, 2, 3)
block idx: ( 1, 1, 0), thread idx: 6, cord: ( 0, 3, 3)
block idx: ( 1, 1, 0), thread idx: 7, cord: ( 1, 3, 3)