for(int i=0;i<image_height;i++){for(int j=0;j<image_width;j++){//Pixel Processing code for pixel located at(i,j)}}
将像素处理映射到CUDA地一批线程上:
int i = blockidx.y * blockDim.y + threadIdx.y
int j = blockidx.x * blockDim.x + threadIdx.x
1. 在GPU上通过CUDA进行直方图统计
首先介绍CPU版本的直方图统计,实现如下:
int h_a[1000]= Random values between 0and15//假设图像取值范围在【0-15】,定义数组并初始化int histogram[16];for(int i=0;i<16;i++){
histogram[i]=0;}//统计每个值的个数for(int i=0;i<1000;i++){
histogram[h_a[i]]+=1;}
intmain(){//定义设备变量并分配内存int h_a[SIZE];for(int i =0; i < SIZE; i++){
h_a[i]= i % NUM_BIN;}int h_b[NUM_BIN];for(int i =0; i < NUM_BIN; i++){
h_b[i]=0;}// 声明GPU指针变量int* d_a;int* d_b;// 分配GPU变量内存cudaMalloc((void**)&d_a, SIZE *sizeof(int));cudaMalloc((void**)&d_b, NUM_BIN *sizeof(int));// transfer the arrays to the GPUcudaMemcpy(d_a, h_a, SIZE *sizeof(int), cudaMemcpyHostToDevice);cudaMemcpy(d_b, h_b, NUM_BIN *sizeof(int), cudaMemcpyHostToDevice);// 进行直方图统计//histogram_without_atomic << <((SIZE + NUM_BIN - 1) / NUM_BIN), NUM_BIN >> > (d_b, d_a);
histogram_atomic <<<((SIZE+NUM_BIN-1)/ NUM_BIN), NUM_BIN >>>(d_b, d_a);// copy back the sum from GPUcudaMemcpy(h_b, d_b, NUM_BIN *sizeof(int), cudaMemcpyDeviceToHost);printf("Histogram using 16 bin without shared Memory is: \n");for(int i =0; i < NUM_BIN; i++){printf("bin %d: count %d\n", i, h_b[i]);}// free GPU memory allocationcudaFree(d_a);cudaFree(d_b);return0;}
intmain(){// generate the input array on the hostint h_a[SIZE];for(int i =0; i < SIZE; i++){//h_a[i] = bit_reverse(i, log2(SIZE));
h_a[i]= i % NUM_BIN;}int h_b[NUM_BIN];for(int i =0; i < NUM_BIN; i++){
h_b[i]=0;}// declare GPU memory pointersint* d_a;int* d_b;// allocate GPU memorycudaMalloc((void**)&d_a, SIZE *sizeof(int));cudaMalloc((void**)&d_b, NUM_BIN *sizeof(int));// transfer the arrays to the GPUcudaMemcpy(d_a, h_a, SIZE *sizeof(int), cudaMemcpyHostToDevice);cudaMemcpy(d_b, h_b, NUM_BIN *sizeof(int), cudaMemcpyHostToDevice);// launch the kernel
histogram_shared_memory <<<SIZE /256,256>>>(d_b, d_a);// copy back the result from GPUcudaMemcpy(h_b, d_b, NUM_BIN *sizeof(int), cudaMemcpyDeviceToHost);printf("Histogram using 16 bin is: ");for(int i =0; i < NUM_BIN; i++){printf("bin %d: count %d\n", i, h_b[i]);}// free GPU memory allocationcudaFree(d_a);cudaFree(d_b);return0;}
🌝博客主页:泥菩萨 💖专栏:Linux探索之旅 | 网络安全的神秘世界 | 专接本 磁盘空间告急?如何解决“no space left on device”的困扰
🙋♂️问题描述
错误信息 "write /var/lib/docker/tmp/GetIma…
打开origin官网,点击learning Edition,教育版只能维持六个月,但是过期之后可以在官网更新,能够免费使用六次,也就是三年。 OriginLab - Origin and OriginPro - Data Analysis and Graphing Software 填写学校信息&…