官方文档:
GPGPU-Sim 3.x Manual
__cudaRegisterBinary(void*) 被执行到的代码逻辑如下:
void** CUDARTAPI __cudaRegisterFatBinary( void *fatCubin )
{
#if (CUDART_VERSION < 2010)
printf("GPGPU-Sim PTX: ERROR ** this version of GPGPU-Sim requires CUDA 2.1 or higher\n");
exit(1);
#endif
CUctx_st *context = GPGPUSim_Context();
static unsigned next_fat_bin_handle = 1;
if(context->get_device()->get_gpgpu()->get_config().use_cuobjdump()) {
// The following workaround has only been verified on 64-bit systems.
if (sizeof(void*) == 4)
printf("GPGPU-Sim PTX: FatBin file name extraction has not been tested on 32-bit system.\n");
// FatBin handle from the .fatbin.c file (one of the intermediate files generated by NVCC)
typedef struct {int m; int v; const unsigned long long* d; char* f;} __fatDeviceText __attribute__ ((aligned (8)));
__fatDeviceText * fatDeviceText = (__fatDeviceText *) fatCubin;
// Extract the source code file name that generate the given FatBin.
// - Obtains the pointer to the actual fatbin structure from the FatBin handle (fatCubin).
// - An integer inside the fatbin structure contains the relative offset to the source code file name.
// - This offset differs among different CUDA and GCC versions.
char * pfatbin = (char*) fatDeviceText->d;
int offset = *((int*)(pfatbin+48));
char * filename = (pfatbin+16+offset);
// The extracted file name is associated with a fat_cubin_handle passed
// into cudaLaunch(). Inside cudaLaunch(), the associated file name is
// used to find the PTX/SASS section from cuobjdump, which contains the
// PTX/SASS code for the launched kernel function.
// This allows us to work around the fact that cuobjdump only outputs the
// file name associated with each section.
unsigned long long fat_cubin_handle = next_fat_bin_handle;
next_fat_bin_handle++;
printf("GPGPU-Sim PTX: __cudaRegisterFatBinary, fat_cubin_handle = %llu, filename=%s\n", fat_cubin_handle, filename);
/*!
* This function extracts all data from all files in first call
* then for next calls, only returns the appropriate number
*/
assert(fat_cubin_handle >= 1);
if (fat_cubin_handle==1) cuobjdumpInit();
cuobjdumpRegisterFatBinary(fat_cubin_handle, filename);
return (void**)fat_cubin_handle;
}else{ ... }
}
1. 调用关系
刚开始一波的调用关系如下:
代码方便索引,此处整理的整体关系为下面的函数调用上面首先定义的函数:
class gpgpu_functional_sim_config
{ ...
int m_ptx_use_cuobjdump;
...
}
void gpgpu_functional_sim_config::reg_options(class OptionParser * opp)
{ ...
option_parser_register(opp,
"-gpgpu_ptx_use_cuobjdump", OPT_BOOL,
&m_ptx_use_cuobjdump,
"Use cuobjdump to extract ptx and sass from binaries",
"1");//CUDART_VERSION >= 4000
...
}
gpgpu_sim *gpgpu_ptx_sim_init_perf()
{ ...
g_the_gpu_config.reg_options(opp);
...
}
class _cuda_device_id *GPGPUSim_Init()
{ ...
gpgpu_sim *the_gpu = gpgpu_ptx_sim_init_perf();
the_gpu->set_prop(prop);
the_device = new _cuda_device_id(the_gpu);
start_sim_thread(1);
...
}
void** CUDARTAPI __cudaRegisterFatBinary( void *fatCubin )
{ ...
static CUctx_st* GPGPUSim_Context()
class _cuda_device_id *GPGPUSim_Init()
CUctx_st( _cuda_device_id *gpu ) { m_gpu = gpu; }//the_context = new CUctx_st(the_gpu);
cuobjdumpInit();
cuobjdumpRegisterFatBinary(fat_cubin_handle, filename);
...
}
2. GPGPUSim_Context() 做了什么
3. 表示什么含义
GPGPUSim_Context()->get_device()->get_gpgpu()->get_config().use_cuobjdump() 表示什么含义