1,环境
ubuntu 18.04
x86_64
cuda 11.01
gpgpu-sim master
commit 90ec3399763d7c8512cfe7dc193473086c38ca38
2,预备环境
一个比较新的 ubuntu 18.04,为了迎合 cuda 11.01 的版本需求
安装如下软件:
sudo apt-get install -y xutils-dev bison zlib1g-dev flex libglu1-mesa-dev doxygen graphviz python-pmw python-ply python-numpy python-matplotlib python-pip libpng-dev
3,安装cuda sdk 11.01
下载:
wget https://developer.download.nvidia.com/compute/cuda/11.0.1/local_installers/cuda_11.0.1_450.36.06_linux.run
安装在目录 /home/hanmeimei/cuda/cuda
bash cuda_11.0.1_450.36.06_linux.run --silent --toolkit --toolkitpath=/home/hanmeimei/cuda/cuda
设置环境变量:
export CUDA_INSTALL_PATH=/home/hanmeimei/cuda/cuda
4,下载编译 gpgpu-sim master
git clone https://github.com/gpgpu-sim/gpgpu-sim_distribution.git
cd gpgpu-sim_distribution/
设置环境:
. setup_environment
make -j
5. 编译运行 cuda app
此时 nvcc 是刚才安装的 nvcc
vim vectorAdd.cu
#include <iostream>
#include <cuda_runtime.h>
#define N 16384
// write kernel function of vector addition
__global__ void vecAdd(float *a, float *b, float *c, int n)
{
int i = threadIdx.x + blockDim.x * blockIdx.x;
if (i < n)
c[i] = a[i] + b[i];
}
int main()
{
float *a, *b, *c;
float *d_a, *d_b, *d_c;
int size = N * sizeof(float);
// allocate space for device copies of a, b, c
cudaMalloc((void **)&d_a, size);
cudaMalloc((void **)&d_b, size);
cudaMalloc((void **)&d_c, size);
// allocate space for host copies of a, b, c and setup input values
a = (float *)malloc(size);
b = (float *)malloc(size);
c = (float *)malloc(size);
for (int i = 0; i < N; i++)
{
a[i] = i;
b[i] = i * i;
}
// copy inputs to device
cudaMemcpy(d_a, a, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_b, b, size, cudaMemcpyHostToDevice);
// launch vecAdd() kernel on GPU
vecAdd<<<(N + 255) / 256, 256>>>(d_a, d_b, d_c, N);
cudaDeviceSynchronize();
// copy result back to host
cudaMemcpy(c, d_c, size, cudaMemcpyDeviceToHost);
// verify result
for (int i = 0; i < N; i++)
{
if (a[i] + b[i] != c[i])
{
std::cout << "Error: " << a[i] << " + " << b[i] << " != " << c[i] << std::endl;
break;
}
}
std::cout << "Done!" << std::endl;
// clean up
free(a);
free(b);
free(c);
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
return 0;
}
编译:
nvcc vectorAdd.cu --cudart shared -o vectorAdd
拷贝 配置文件:
cp gpgpu-sim_distribution/configs/tested-cfgs/SM7_QV100/config_volta_islip.icnt ./
cp gpgpu-sim_distribution/configs/tested-cfgs/SM7_QV100/gpgpusim.config ./
运行app;
./vectorAdd
运行结束: