cuda 官网文档名:CUDA_Runtime_API
运行时API查询GPU信息
调用
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, device_id)
定义
由此可见,只能在主机上调用。
#include <cuda_runtime.h>
#include <iostream>
#include <stdio.h>
static void CheckCudaErrorAux(const char*, unsigned, const char*, cudaError_t);
#define CUDA_CHECK_RETURN(value) CheckCudaErrorAux(__FILE__,__LINE__, #value, value)
int main()
{
int iDeviceId = 0;
CUDA_CHECK_RETURN(cudaGetDevice(&iDeviceId));
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, iDeviceId);
std::cout << "device_id : " << iDeviceId << std::endl;
std::cout << "device_name : " << prop.name << std::endl;
std::cout << "compute capability : " << prop.major << "." << prop.minor << std::endl;
std::cout << "amount of global memory : " << prop.totalGlobalMem / (1024.0*1024*1024)<<"GB" << std::endl;
std::cout << "amount of constant memory : " << prop.totalConstMem / 1024.0 << "KB" << std::endl;
std::cout << "maximum grid size : " << prop.maxGridSize[0] <<"," << prop.maxGridSize[1] << "," << prop.maxGridSize[2] << std::endl;
std::cout << "maximum block size : " << prop.maxThreadsDim[0] << "," << prop.maxThreadsDim[1] << "," << prop.maxThreadsDim[2] << std::endl;
std::cout << "number of SMs : " << prop.multiProcessorCount << std::endl;
std::cout << "maximum amount of shared memory per block : " << prop.sharedMemPerBlock / 1024.0 << "KB" << std::endl;
std::cout << "maximum amount of shared memory per SM : " << prop.sharedMemPerMultiprocessor / 1024.0 << "KB" << std::endl;
std::cout << "maximum number of registers per block : " << prop.regsPerBlock / 1024.0 << "K" << std::endl;
std::cout << "maximum number of registers per SM : " << prop.regsPerMultiprocessor / 1024.0 << "K" << std::endl;
std::cout << "maximum number of thread per block : " << prop.maxThreadsPerBlock << std::endl;
std::cout << "maximum number of thread per SM : " << prop.maxThreadsPerMultiProcessor << std::endl;
return 0;
}
static void CheckCudaErrorAux(const char* file, unsigned line, const char* statement, cudaError_t err)
{
if (err == cudaSuccess)
return;
std::cerr << statement << " returned: " << cudaGetErrorName(err) << " \t : " << cudaGetErrorString(err) << "(" << err << ") at " << file << ":" << line << std::endl;
exit(1);
}
结果
查询GPU计算核心数量
#include "cuda_runtime_api.h"
#include <stdio.h>
#include <iostream>
static void CheckCudaErrorAux(const char*, unsigned, const char*, cudaError_t);
#define CUDA_CHECK_RETURN(value) CheckCudaErrorAux(__FILE__,__LINE__, #value, value)
int getSPcores(cudaDeviceProp devProp)
{
int cores = 0;
int mp = devProp.multiProcessorCount;
switch (devProp.major) {
case 2: // Fermi
if (devProp.minor == 1) cores = mp * 48;
else cores = mp * 32;
break;
case 3: // Kepler
cores = mp * 192;
break;
case 5: // Maxwell
cores = mp * 128;
break;
case 6: // Pascal
if ((devProp.minor == 1) || (devProp.minor == 2)) cores = mp * 128;
else if (devProp.minor == 0) cores = mp * 64;
else printf("Unknown device type\n");
break;
case 7: // Volta and Turing
if ((devProp.minor == 0) || (devProp.minor == 5)) cores = mp * 64;
else printf("Unknown device type\n");
break;
case 8: // Ampere
if (devProp.minor == 0) cores = mp * 64;
else if (devProp.minor == 6) cores = mp * 128;
else if (devProp.minor == 9) cores = mp * 128; // ada lovelace
else printf("Unknown device type\n");
break;
case 9: // Hopper
if (devProp.minor == 0) cores = mp * 128;
else printf("Unknown device type\n");
break;
default:
printf("Unknown device type\n");
break;
}
return cores;
}
int main()
{
int iDeviceId = 0;
CUDA_CHECK_RETURN(cudaGetDevice(&iDeviceId));
cudaDeviceProp prop;
CUDA_CHECK_RETURN(cudaGetDeviceProperties(&prop, iDeviceId));
std::cout << "Compute cores is " << getSPcores(prop) << std::endl;
return 0;
}
static void CheckCudaErrorAux(const char* file, unsigned line, const char* statement, cudaError_t err)
{
if (err == cudaSuccess)
return;
std::cerr << statement << " returned: " << cudaGetErrorName(err) << " \t : " << cudaGetErrorString(err) << "(" << err << ") at " << file << ":" << line << std::endl;
exit(1);
}