1,找一只活麻雀,下载编译 ucx
git clone https://github.com/openucx/ucx.git
cd ucx/
git checkout v1.16.0
./autogen.sh
./autogen.sh
mkdir build
cd build
../contrib/configure-devel --with-cuda=/usr/local/cuda --without-rocm --without-java --prefix=${PWD}/../../local_d_nv
make -j
make install
2,运行 ucx 普通示例
ls build/test/apps/
示例解析:
test_tcmalloc
未完待续 。。。。
3,运行 ucx cuda 相关示例
$ ./test_cuda_hook_static
示例解析:
test_cuda_hook_static
未完待续 。。。。
4,挖掘更深入的 ucx cuda 功能
5,剖析 ucx cuda 功能的达成
6,ucx 使用了那些 cuda API
cuda 开头的:
cudaEventQuery
cudaEventCreateWithFlags
cudaEventDestroy
cudaStreamCreateWithFlags
cudaStreamDestroy
cudaMemcpyDefault
cudaMemcpyAsync
cudaFreeHost
cudaMallocFromPoolAsync
cudaGetErrorString
cudaSetDevice
cudaDeviceSynchronize
cudaGetDeviceCount
cudaFree
cudaMallocManaged
cudaMalloc
cudaMemcpy
cudaMemset
cudaFreeAsync
cudaMallocAsync
cudaMallocPitch
cudaEventDestroy
cudaHostUnregister
cudaHostRegister
cudaHostRegisterPortable
cudaStreamSynchronize
cudaEventRecord
cudaErrorUnsupportedPtxVersion???????????
cu开头的driver api:
cuDeviceGetName
cuPointerGetAttribute
cuMemGetAddress
cuPointerGetAttributes
cuMemRangeGetAttribute
cuMemGetHandleForAddressRange
cuPointerSetAttribute
cuDeviceGetCount
cuEventQuery
cuLaunchHostFunc
cuStreamAddCallback
cuEventCreate
cuStreamCreate
cuStreamDestroy
cuMemcpyDtoDAsync(dst, src, iov[0].length,
cuEventRecord(cuda_ipc_event->event,
cuIpcOpenMemHandle(mapped_addr, key->ph,
cuIpcCloseMemHandle((CUdeviceptr)region->mapped_addr));
cuIpcGetMemHandle(&key->ph, (CUdeviceptr)addr)
cuDeviceGetAttribute(&attrib, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID,
cuGetErrorString(result, &error_str)
cuCtxGetDevice(&cuda_device)
cuMemGetAddressRange(&pbase, &length, ptr)
cuMemAlloc
cuMemAlloc_v2
cuMemAllocManaged
cuMemAllocPitch
cuMemAllocPitch_v2
cuMemAllocAsync
cuMemAllocFromPoolAsync
cuMemFree_v2
cuMemFreeHost_v2
cuDeviceTotalMem
cuDeviceTotalMem_v2
cuCtxDestroy(m_context)
cuMemAllocHost(&ptr, 64)
cuMemFreeHost(ptr)
cuMemAllocManaged(&dptr, 64, CU_MEM_ATTACH_GLOBAL)
cuMemFree(dptr)
cuMemAllocPitch(&dptr, &pitch, width, height, element_size)
cuMemAllocAsync(&dptr, 64, 0)
cuMemFreeAsync(dptr, 0);
cuDeviceGet(&device, 0)
cuCtxCreate(&context, 0, device)
cuMemAlloc(&dptr, 4096)
cuMemFree(dptr)
cuCtxDetach(context)
cuGetErrorString(_cu_result, &_error_string)
cuInit(0)
cuCtxGetCurrent(&cu_context)
cuDeviceGetUuid ();