1*1306a73bSElena Agostini /* SPDX-License-Identifier: BSD-3-Clause 2*1306a73bSElena Agostini * Copyright (c) 2021 NVIDIA Corporation & Affiliates 3*1306a73bSElena Agostini */ 4*1306a73bSElena Agostini 5*1306a73bSElena Agostini #include <dlfcn.h> 6*1306a73bSElena Agostini 7*1306a73bSElena Agostini #include <rte_common.h> 8*1306a73bSElena Agostini #include <rte_log.h> 9*1306a73bSElena Agostini #include <rte_malloc.h> 10*1306a73bSElena Agostini #include <rte_errno.h> 11*1306a73bSElena Agostini #include <rte_pci.h> 12*1306a73bSElena Agostini #include <rte_bus_pci.h> 13*1306a73bSElena Agostini #include <rte_byteorder.h> 14*1306a73bSElena Agostini #include <rte_dev.h> 15*1306a73bSElena Agostini 16*1306a73bSElena Agostini #include <gpudev_driver.h> 17*1306a73bSElena Agostini #include <cuda.h> 18*1306a73bSElena Agostini #include <cudaTypedefs.h> 19*1306a73bSElena Agostini 20*1306a73bSElena Agostini #define CUDA_DRIVER_MIN_VERSION 11040 21*1306a73bSElena Agostini #define CUDA_API_MIN_VERSION 3020 22*1306a73bSElena Agostini 23*1306a73bSElena Agostini /* CUDA Driver functions loaded with dlsym() */ 24*1306a73bSElena Agostini static CUresult CUDAAPI (*sym_cuInit)(unsigned int flags); 25*1306a73bSElena Agostini static CUresult CUDAAPI (*sym_cuDriverGetVersion)(int *driverVersion); 26*1306a73bSElena Agostini static CUresult CUDAAPI (*sym_cuGetProcAddress)(const char *symbol, 27*1306a73bSElena Agostini void **pfn, int cudaVersion, uint64_t flags); 28*1306a73bSElena Agostini 29*1306a73bSElena Agostini /* CUDA Driver functions loaded with cuGetProcAddress for versioning */ 30*1306a73bSElena Agostini static PFN_cuGetErrorString pfn_cuGetErrorString; 31*1306a73bSElena Agostini static PFN_cuGetErrorName pfn_cuGetErrorName; 32*1306a73bSElena Agostini static PFN_cuPointerSetAttribute pfn_cuPointerSetAttribute; 33*1306a73bSElena Agostini static PFN_cuDeviceGetAttribute pfn_cuDeviceGetAttribute; 34*1306a73bSElena Agostini static PFN_cuDeviceGetByPCIBusId pfn_cuDeviceGetByPCIBusId; 35*1306a73bSElena Agostini static PFN_cuDevicePrimaryCtxRetain pfn_cuDevicePrimaryCtxRetain; 36*1306a73bSElena Agostini static PFN_cuDevicePrimaryCtxRelease pfn_cuDevicePrimaryCtxRelease; 37*1306a73bSElena Agostini static PFN_cuDeviceTotalMem pfn_cuDeviceTotalMem; 38*1306a73bSElena Agostini static PFN_cuDeviceGetName pfn_cuDeviceGetName; 39*1306a73bSElena Agostini static PFN_cuCtxGetApiVersion pfn_cuCtxGetApiVersion; 40*1306a73bSElena Agostini static PFN_cuCtxSetCurrent pfn_cuCtxSetCurrent; 41*1306a73bSElena Agostini static PFN_cuCtxGetCurrent pfn_cuCtxGetCurrent; 42*1306a73bSElena Agostini static PFN_cuCtxGetDevice pfn_cuCtxGetDevice; 43*1306a73bSElena Agostini static PFN_cuCtxGetExecAffinity pfn_cuCtxGetExecAffinity; 44*1306a73bSElena Agostini static PFN_cuMemAlloc pfn_cuMemAlloc; 45*1306a73bSElena Agostini static PFN_cuMemFree pfn_cuMemFree; 46*1306a73bSElena Agostini static PFN_cuMemHostRegister pfn_cuMemHostRegister; 47*1306a73bSElena Agostini static PFN_cuMemHostUnregister pfn_cuMemHostUnregister; 48*1306a73bSElena Agostini static PFN_cuMemHostGetDevicePointer pfn_cuMemHostGetDevicePointer; 49*1306a73bSElena Agostini static PFN_cuFlushGPUDirectRDMAWrites pfn_cuFlushGPUDirectRDMAWrites; 50*1306a73bSElena Agostini 51*1306a73bSElena Agostini static void *cudalib; 52*1306a73bSElena Agostini static unsigned int cuda_api_version; 53*1306a73bSElena Agostini static int cuda_driver_version; 54*1306a73bSElena Agostini 55*1306a73bSElena Agostini /* NVIDIA GPU vendor */ 56*1306a73bSElena Agostini #define NVIDIA_GPU_VENDOR_ID (0x10de) 57*1306a73bSElena Agostini 58*1306a73bSElena Agostini /* NVIDIA GPU device IDs */ 59*1306a73bSElena Agostini #define NVIDIA_GPU_A100_40GB_DEVICE_ID (0x20f1) 60*1306a73bSElena Agostini #define NVIDIA_GPU_A100_80GB_DEVICE_ID (0x20b5) 61*1306a73bSElena Agostini 62*1306a73bSElena Agostini #define NVIDIA_GPU_A30_24GB_DEVICE_ID (0x20b7) 63*1306a73bSElena Agostini #define NVIDIA_GPU_A10_24GB_DEVICE_ID (0x2236) 64*1306a73bSElena Agostini 65*1306a73bSElena Agostini #define NVIDIA_GPU_V100_32GB_DEVICE_ID (0x1db6) 66*1306a73bSElena Agostini #define NVIDIA_GPU_V100_16GB_DEVICE_ID (0x1db4) 67*1306a73bSElena Agostini 68*1306a73bSElena Agostini #define NVIDIA_GPU_T4_16GB_DEVICE_ID (0x1eb8) 69*1306a73bSElena Agostini 70*1306a73bSElena Agostini #define CUDA_MAX_ALLOCATION_NUM 512 71*1306a73bSElena Agostini 72*1306a73bSElena Agostini #define GPU_PAGE_SHIFT 16 73*1306a73bSElena Agostini #define GPU_PAGE_SIZE (1UL << GPU_PAGE_SHIFT) 74*1306a73bSElena Agostini 75*1306a73bSElena Agostini static RTE_LOG_REGISTER_DEFAULT(cuda_logtype, NOTICE); 76*1306a73bSElena Agostini 77*1306a73bSElena Agostini /* Helper macro for logging */ 78*1306a73bSElena Agostini #define rte_cuda_log(level, fmt, ...) \ 79*1306a73bSElena Agostini rte_log(RTE_LOG_ ## level, cuda_logtype, fmt "\n", ##__VA_ARGS__) 80*1306a73bSElena Agostini 81*1306a73bSElena Agostini #define rte_cuda_debug(fmt, ...) \ 82*1306a73bSElena Agostini rte_cuda_log(DEBUG, RTE_STR(__LINE__) ":%s() " fmt, __func__, \ 83*1306a73bSElena Agostini ##__VA_ARGS__) 84*1306a73bSElena Agostini 85*1306a73bSElena Agostini /* NVIDIA GPU address map */ 86*1306a73bSElena Agostini static const struct rte_pci_id pci_id_cuda_map[] = { 87*1306a73bSElena Agostini { 88*1306a73bSElena Agostini RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 89*1306a73bSElena Agostini NVIDIA_GPU_A100_40GB_DEVICE_ID) 90*1306a73bSElena Agostini }, 91*1306a73bSElena Agostini { 92*1306a73bSElena Agostini RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 93*1306a73bSElena Agostini NVIDIA_GPU_A100_80GB_DEVICE_ID) 94*1306a73bSElena Agostini }, 95*1306a73bSElena Agostini { 96*1306a73bSElena Agostini RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 97*1306a73bSElena Agostini NVIDIA_GPU_A30_24GB_DEVICE_ID) 98*1306a73bSElena Agostini }, 99*1306a73bSElena Agostini { 100*1306a73bSElena Agostini RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 101*1306a73bSElena Agostini NVIDIA_GPU_A10_24GB_DEVICE_ID) 102*1306a73bSElena Agostini }, 103*1306a73bSElena Agostini { 104*1306a73bSElena Agostini RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 105*1306a73bSElena Agostini NVIDIA_GPU_V100_32GB_DEVICE_ID) 106*1306a73bSElena Agostini }, 107*1306a73bSElena Agostini { 108*1306a73bSElena Agostini RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 109*1306a73bSElena Agostini NVIDIA_GPU_V100_16GB_DEVICE_ID) 110*1306a73bSElena Agostini }, 111*1306a73bSElena Agostini { 112*1306a73bSElena Agostini RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 113*1306a73bSElena Agostini NVIDIA_GPU_T4_16GB_DEVICE_ID) 114*1306a73bSElena Agostini }, 115*1306a73bSElena Agostini { 116*1306a73bSElena Agostini .device_id = 0 117*1306a73bSElena Agostini } 118*1306a73bSElena Agostini }; 119*1306a73bSElena Agostini 120*1306a73bSElena Agostini /* Device private info */ 121*1306a73bSElena Agostini struct cuda_info { 122*1306a73bSElena Agostini char gpu_name[RTE_DEV_NAME_MAX_LEN]; 123*1306a73bSElena Agostini CUdevice cu_dev; 124*1306a73bSElena Agostini int gdr_supported; 125*1306a73bSElena Agostini int gdr_write_ordering; 126*1306a73bSElena Agostini int gdr_flush_type; 127*1306a73bSElena Agostini }; 128*1306a73bSElena Agostini 129*1306a73bSElena Agostini /* Type of memory allocated by CUDA driver */ 130*1306a73bSElena Agostini enum mem_type { 131*1306a73bSElena Agostini GPU_MEM = 0, 132*1306a73bSElena Agostini CPU_REGISTERED, 133*1306a73bSElena Agostini GPU_REGISTERED /* Not used yet */ 134*1306a73bSElena Agostini }; 135*1306a73bSElena Agostini 136*1306a73bSElena Agostini /* key associated to a memory address */ 137*1306a73bSElena Agostini typedef uintptr_t cuda_ptr_key; 138*1306a73bSElena Agostini 139*1306a73bSElena Agostini /* Single entry of the memory list */ 140*1306a73bSElena Agostini struct mem_entry { 141*1306a73bSElena Agostini CUdeviceptr ptr_d; 142*1306a73bSElena Agostini void *ptr_h; 143*1306a73bSElena Agostini size_t size; 144*1306a73bSElena Agostini struct rte_gpu *dev; 145*1306a73bSElena Agostini CUcontext ctx; 146*1306a73bSElena Agostini cuda_ptr_key pkey; 147*1306a73bSElena Agostini enum mem_type mtype; 148*1306a73bSElena Agostini struct mem_entry *prev; 149*1306a73bSElena Agostini struct mem_entry *next; 150*1306a73bSElena Agostini }; 151*1306a73bSElena Agostini 152*1306a73bSElena Agostini static struct mem_entry *mem_alloc_list_head; 153*1306a73bSElena Agostini static struct mem_entry *mem_alloc_list_tail; 154*1306a73bSElena Agostini static uint32_t mem_alloc_list_last_elem; 155*1306a73bSElena Agostini 156*1306a73bSElena Agostini /* Load the CUDA symbols */ 157*1306a73bSElena Agostini 158*1306a73bSElena Agostini static int 159*1306a73bSElena Agostini cuda_loader(void) 160*1306a73bSElena Agostini { 161*1306a73bSElena Agostini char cuda_path[1024]; 162*1306a73bSElena Agostini 163*1306a73bSElena Agostini if (getenv("CUDA_PATH_L") == NULL) 164*1306a73bSElena Agostini snprintf(cuda_path, 1024, "%s", "libcuda.so"); 165*1306a73bSElena Agostini else 166*1306a73bSElena Agostini snprintf(cuda_path, 1024, "%s%s", getenv("CUDA_PATH_L"), "libcuda.so"); 167*1306a73bSElena Agostini 168*1306a73bSElena Agostini cudalib = dlopen(cuda_path, RTLD_LAZY); 169*1306a73bSElena Agostini if (cudalib == NULL) { 170*1306a73bSElena Agostini rte_cuda_log(ERR, "Failed to find CUDA library in %s (CUDA_PATH_L=%s)", 171*1306a73bSElena Agostini cuda_path, getenv("CUDA_PATH_L")); 172*1306a73bSElena Agostini return -1; 173*1306a73bSElena Agostini } 174*1306a73bSElena Agostini 175*1306a73bSElena Agostini return 0; 176*1306a73bSElena Agostini } 177*1306a73bSElena Agostini 178*1306a73bSElena Agostini static int 179*1306a73bSElena Agostini cuda_sym_func_loader(void) 180*1306a73bSElena Agostini { 181*1306a73bSElena Agostini if (cudalib == NULL) 182*1306a73bSElena Agostini return -1; 183*1306a73bSElena Agostini 184*1306a73bSElena Agostini sym_cuInit = dlsym(cudalib, "cuInit"); 185*1306a73bSElena Agostini if (sym_cuInit == NULL) { 186*1306a73bSElena Agostini rte_cuda_log(ERR, "Failed to load CUDA missing symbol cuInit"); 187*1306a73bSElena Agostini return -1; 188*1306a73bSElena Agostini } 189*1306a73bSElena Agostini 190*1306a73bSElena Agostini sym_cuDriverGetVersion = dlsym(cudalib, "cuDriverGetVersion"); 191*1306a73bSElena Agostini if (sym_cuDriverGetVersion == NULL) { 192*1306a73bSElena Agostini rte_cuda_log(ERR, "Failed to load CUDA missing symbol cuDriverGetVersion"); 193*1306a73bSElena Agostini return -1; 194*1306a73bSElena Agostini } 195*1306a73bSElena Agostini 196*1306a73bSElena Agostini sym_cuGetProcAddress = dlsym(cudalib, "cuGetProcAddress"); 197*1306a73bSElena Agostini if (sym_cuGetProcAddress == NULL) { 198*1306a73bSElena Agostini rte_cuda_log(ERR, "Failed to load CUDA missing symbol cuGetProcAddress"); 199*1306a73bSElena Agostini return -1; 200*1306a73bSElena Agostini } 201*1306a73bSElena Agostini 202*1306a73bSElena Agostini return 0; 203*1306a73bSElena Agostini } 204*1306a73bSElena Agostini 205*1306a73bSElena Agostini static int 206*1306a73bSElena Agostini cuda_pfn_func_loader(void) 207*1306a73bSElena Agostini { 208*1306a73bSElena Agostini CUresult res; 209*1306a73bSElena Agostini 210*1306a73bSElena Agostini res = sym_cuGetProcAddress("cuGetErrorString", 211*1306a73bSElena Agostini (void **) (&pfn_cuGetErrorString), cuda_driver_version, 0); 212*1306a73bSElena Agostini if (res != 0) { 213*1306a73bSElena Agostini rte_cuda_log(ERR, "Retrieve pfn_cuGetErrorString failed with %d", res); 214*1306a73bSElena Agostini return -1; 215*1306a73bSElena Agostini } 216*1306a73bSElena Agostini 217*1306a73bSElena Agostini res = sym_cuGetProcAddress("cuGetErrorName", 218*1306a73bSElena Agostini (void **)(&pfn_cuGetErrorName), cuda_driver_version, 0); 219*1306a73bSElena Agostini if (res != 0) { 220*1306a73bSElena Agostini rte_cuda_log(ERR, "Retrieve pfn_cuGetErrorName failed with %d", res); 221*1306a73bSElena Agostini return -1; 222*1306a73bSElena Agostini } 223*1306a73bSElena Agostini 224*1306a73bSElena Agostini res = sym_cuGetProcAddress("cuPointerSetAttribute", 225*1306a73bSElena Agostini (void **)(&pfn_cuPointerSetAttribute), cuda_driver_version, 0); 226*1306a73bSElena Agostini if (res != 0) { 227*1306a73bSElena Agostini rte_cuda_log(ERR, "Retrieve pfn_cuPointerSetAttribute failed with %d", res); 228*1306a73bSElena Agostini return -1; 229*1306a73bSElena Agostini } 230*1306a73bSElena Agostini 231*1306a73bSElena Agostini res = sym_cuGetProcAddress("cuDeviceGetAttribute", 232*1306a73bSElena Agostini (void **)(&pfn_cuDeviceGetAttribute), cuda_driver_version, 0); 233*1306a73bSElena Agostini if (res != 0) { 234*1306a73bSElena Agostini rte_cuda_log(ERR, "Retrieve pfn_cuDeviceGetAttribute failed with %d", res); 235*1306a73bSElena Agostini return -1; 236*1306a73bSElena Agostini } 237*1306a73bSElena Agostini 238*1306a73bSElena Agostini res = sym_cuGetProcAddress("cuDeviceGetByPCIBusId", 239*1306a73bSElena Agostini (void **)(&pfn_cuDeviceGetByPCIBusId), cuda_driver_version, 0); 240*1306a73bSElena Agostini if (res != 0) { 241*1306a73bSElena Agostini rte_cuda_log(ERR, "Retrieve pfn_cuDeviceGetByPCIBusId failed with %d", res); 242*1306a73bSElena Agostini return -1; 243*1306a73bSElena Agostini } 244*1306a73bSElena Agostini 245*1306a73bSElena Agostini res = sym_cuGetProcAddress("cuDeviceGetName", 246*1306a73bSElena Agostini (void **)(&pfn_cuDeviceGetName), cuda_driver_version, 0); 247*1306a73bSElena Agostini if (res != 0) { 248*1306a73bSElena Agostini rte_cuda_log(ERR, "Retrieve pfn_cuDeviceGetName failed with %d", res); 249*1306a73bSElena Agostini return -1; 250*1306a73bSElena Agostini } 251*1306a73bSElena Agostini 252*1306a73bSElena Agostini res = sym_cuGetProcAddress("cuDevicePrimaryCtxRetain", 253*1306a73bSElena Agostini (void **)(&pfn_cuDevicePrimaryCtxRetain), cuda_driver_version, 0); 254*1306a73bSElena Agostini if (res != 0) { 255*1306a73bSElena Agostini rte_cuda_log(ERR, "Retrieve pfn_cuDevicePrimaryCtxRetain failed with %d", res); 256*1306a73bSElena Agostini return -1; 257*1306a73bSElena Agostini } 258*1306a73bSElena Agostini 259*1306a73bSElena Agostini res = sym_cuGetProcAddress("cuDevicePrimaryCtxRelease", 260*1306a73bSElena Agostini (void **)(&pfn_cuDevicePrimaryCtxRelease), cuda_driver_version, 0); 261*1306a73bSElena Agostini if (res != 0) { 262*1306a73bSElena Agostini rte_cuda_log(ERR, "Retrieve pfn_cuDevicePrimaryCtxRelease failed with %d", res); 263*1306a73bSElena Agostini return -1; 264*1306a73bSElena Agostini } 265*1306a73bSElena Agostini 266*1306a73bSElena Agostini res = sym_cuGetProcAddress("cuDeviceTotalMem", 267*1306a73bSElena Agostini (void **)(&pfn_cuDeviceTotalMem), cuda_driver_version, 0); 268*1306a73bSElena Agostini if (res != 0) { 269*1306a73bSElena Agostini rte_cuda_log(ERR, "Retrieve pfn_cuDeviceTotalMem failed with %d", res); 270*1306a73bSElena Agostini return -1; 271*1306a73bSElena Agostini } 272*1306a73bSElena Agostini 273*1306a73bSElena Agostini res = sym_cuGetProcAddress("cuCtxGetApiVersion", 274*1306a73bSElena Agostini (void **)(&pfn_cuCtxGetApiVersion), cuda_driver_version, 0); 275*1306a73bSElena Agostini if (res != 0) { 276*1306a73bSElena Agostini rte_cuda_log(ERR, "Retrieve pfn_cuCtxGetApiVersion failed with %d", res); 277*1306a73bSElena Agostini return -1; 278*1306a73bSElena Agostini } 279*1306a73bSElena Agostini 280*1306a73bSElena Agostini res = sym_cuGetProcAddress("cuCtxGetDevice", 281*1306a73bSElena Agostini (void **)(&pfn_cuCtxGetDevice), cuda_driver_version, 0); 282*1306a73bSElena Agostini if (res != 0) { 283*1306a73bSElena Agostini rte_cuda_log(ERR, "Retrieve pfn_cuCtxGetDevice failed with %d", res); 284*1306a73bSElena Agostini return -1; 285*1306a73bSElena Agostini } 286*1306a73bSElena Agostini 287*1306a73bSElena Agostini res = sym_cuGetProcAddress("cuCtxSetCurrent", 288*1306a73bSElena Agostini (void **)(&pfn_cuCtxSetCurrent), cuda_driver_version, 0); 289*1306a73bSElena Agostini if (res != 0) { 290*1306a73bSElena Agostini rte_cuda_log(ERR, "Retrieve pfn_cuCtxSetCurrent failed with %d", res); 291*1306a73bSElena Agostini return -1; 292*1306a73bSElena Agostini } 293*1306a73bSElena Agostini 294*1306a73bSElena Agostini res = sym_cuGetProcAddress("cuCtxGetCurrent", 295*1306a73bSElena Agostini (void **)(&pfn_cuCtxGetCurrent), cuda_driver_version, 0); 296*1306a73bSElena Agostini if (res != 0) { 297*1306a73bSElena Agostini rte_cuda_log(ERR, "Retrieve pfn_cuCtxGetCurrent failed with %d", res); 298*1306a73bSElena Agostini return -1; 299*1306a73bSElena Agostini } 300*1306a73bSElena Agostini 301*1306a73bSElena Agostini res = sym_cuGetProcAddress("cuCtxGetExecAffinity", 302*1306a73bSElena Agostini (void **)(&pfn_cuCtxGetExecAffinity), cuda_driver_version, 0); 303*1306a73bSElena Agostini if (res != 0) { 304*1306a73bSElena Agostini rte_cuda_log(ERR, "Retrieve pfn_cuCtxGetExecAffinity failed with %d", res); 305*1306a73bSElena Agostini return -1; 306*1306a73bSElena Agostini } 307*1306a73bSElena Agostini 308*1306a73bSElena Agostini res = sym_cuGetProcAddress("cuMemAlloc", 309*1306a73bSElena Agostini (void **)(&pfn_cuMemAlloc), cuda_driver_version, 0); 310*1306a73bSElena Agostini if (res != 0) { 311*1306a73bSElena Agostini rte_cuda_log(ERR, "Retrieve pfn_cuMemAlloc failed with %d", res); 312*1306a73bSElena Agostini return -1; 313*1306a73bSElena Agostini } 314*1306a73bSElena Agostini 315*1306a73bSElena Agostini res = sym_cuGetProcAddress("cuMemFree", 316*1306a73bSElena Agostini (void **)(&pfn_cuMemFree), cuda_driver_version, 0); 317*1306a73bSElena Agostini if (res != 0) { 318*1306a73bSElena Agostini rte_cuda_log(ERR, "Retrieve pfn_cuMemFree failed with %d", res); 319*1306a73bSElena Agostini return -1; 320*1306a73bSElena Agostini } 321*1306a73bSElena Agostini 322*1306a73bSElena Agostini res = sym_cuGetProcAddress("cuMemHostRegister", 323*1306a73bSElena Agostini (void **)(&pfn_cuMemHostRegister), cuda_driver_version, 0); 324*1306a73bSElena Agostini if (res != 0) { 325*1306a73bSElena Agostini rte_cuda_log(ERR, "Retrieve pfn_cuMemHostRegister failed with %d", res); 326*1306a73bSElena Agostini return -1; 327*1306a73bSElena Agostini } 328*1306a73bSElena Agostini 329*1306a73bSElena Agostini res = sym_cuGetProcAddress("cuMemHostUnregister", 330*1306a73bSElena Agostini (void **)(&pfn_cuMemHostUnregister), cuda_driver_version, 0); 331*1306a73bSElena Agostini if (res != 0) { 332*1306a73bSElena Agostini rte_cuda_log(ERR, "Retrieve pfn_cuMemHostUnregister failed with %d", res); 333*1306a73bSElena Agostini return -1; 334*1306a73bSElena Agostini } 335*1306a73bSElena Agostini 336*1306a73bSElena Agostini res = sym_cuGetProcAddress("cuMemHostGetDevicePointer", 337*1306a73bSElena Agostini (void **)(&pfn_cuMemHostGetDevicePointer), cuda_driver_version, 0); 338*1306a73bSElena Agostini if (res != 0) { 339*1306a73bSElena Agostini rte_cuda_log(ERR, "Retrieve pfn_cuMemHostGetDevicePointer failed with %d", res); 340*1306a73bSElena Agostini return -1; 341*1306a73bSElena Agostini } 342*1306a73bSElena Agostini 343*1306a73bSElena Agostini res = sym_cuGetProcAddress("cuFlushGPUDirectRDMAWrites", 344*1306a73bSElena Agostini (void **)(&pfn_cuFlushGPUDirectRDMAWrites), cuda_driver_version, 0); 345*1306a73bSElena Agostini if (res != 0) { 346*1306a73bSElena Agostini rte_cuda_log(ERR, "Retrieve cuFlushGPUDirectRDMAWrites failed with %d", res); 347*1306a73bSElena Agostini return -1; 348*1306a73bSElena Agostini } 349*1306a73bSElena Agostini 350*1306a73bSElena Agostini return 0; 351*1306a73bSElena Agostini } 352*1306a73bSElena Agostini 353*1306a73bSElena Agostini /* Generate a key from a memory pointer */ 354*1306a73bSElena Agostini static cuda_ptr_key 355*1306a73bSElena Agostini get_hash_from_ptr(void *ptr) 356*1306a73bSElena Agostini { 357*1306a73bSElena Agostini return (uintptr_t)ptr; 358*1306a73bSElena Agostini } 359*1306a73bSElena Agostini 360*1306a73bSElena Agostini static uint32_t 361*1306a73bSElena Agostini mem_list_count_item(void) 362*1306a73bSElena Agostini { 363*1306a73bSElena Agostini return mem_alloc_list_last_elem; 364*1306a73bSElena Agostini } 365*1306a73bSElena Agostini 366*1306a73bSElena Agostini /* Initiate list of memory allocations if not done yet */ 367*1306a73bSElena Agostini static struct mem_entry * 368*1306a73bSElena Agostini mem_list_add_item(void) 369*1306a73bSElena Agostini { 370*1306a73bSElena Agostini /* Initiate list of memory allocations if not done yet */ 371*1306a73bSElena Agostini if (mem_alloc_list_head == NULL) { 372*1306a73bSElena Agostini mem_alloc_list_head = rte_zmalloc(NULL, 373*1306a73bSElena Agostini sizeof(struct mem_entry), 374*1306a73bSElena Agostini RTE_CACHE_LINE_SIZE); 375*1306a73bSElena Agostini if (mem_alloc_list_head == NULL) { 376*1306a73bSElena Agostini rte_cuda_log(ERR, "Failed to allocate memory for memory list"); 377*1306a73bSElena Agostini return NULL; 378*1306a73bSElena Agostini } 379*1306a73bSElena Agostini 380*1306a73bSElena Agostini mem_alloc_list_head->next = NULL; 381*1306a73bSElena Agostini mem_alloc_list_head->prev = NULL; 382*1306a73bSElena Agostini mem_alloc_list_tail = mem_alloc_list_head; 383*1306a73bSElena Agostini } else { 384*1306a73bSElena Agostini struct mem_entry *mem_alloc_list_cur = rte_zmalloc(NULL, 385*1306a73bSElena Agostini sizeof(struct mem_entry), 386*1306a73bSElena Agostini RTE_CACHE_LINE_SIZE); 387*1306a73bSElena Agostini 388*1306a73bSElena Agostini if (mem_alloc_list_cur == NULL) { 389*1306a73bSElena Agostini rte_cuda_log(ERR, "Failed to allocate memory for memory list"); 390*1306a73bSElena Agostini return NULL; 391*1306a73bSElena Agostini } 392*1306a73bSElena Agostini 393*1306a73bSElena Agostini mem_alloc_list_tail->next = mem_alloc_list_cur; 394*1306a73bSElena Agostini mem_alloc_list_cur->prev = mem_alloc_list_tail; 395*1306a73bSElena Agostini mem_alloc_list_tail = mem_alloc_list_tail->next; 396*1306a73bSElena Agostini mem_alloc_list_tail->next = NULL; 397*1306a73bSElena Agostini } 398*1306a73bSElena Agostini 399*1306a73bSElena Agostini mem_alloc_list_last_elem++; 400*1306a73bSElena Agostini 401*1306a73bSElena Agostini return mem_alloc_list_tail; 402*1306a73bSElena Agostini } 403*1306a73bSElena Agostini 404*1306a73bSElena Agostini static struct mem_entry * 405*1306a73bSElena Agostini mem_list_find_item(cuda_ptr_key pk) 406*1306a73bSElena Agostini { 407*1306a73bSElena Agostini struct mem_entry *mem_alloc_list_cur = NULL; 408*1306a73bSElena Agostini 409*1306a73bSElena Agostini if (mem_alloc_list_head == NULL) { 410*1306a73bSElena Agostini rte_cuda_log(ERR, "Memory list doesn't exist"); 411*1306a73bSElena Agostini return NULL; 412*1306a73bSElena Agostini } 413*1306a73bSElena Agostini 414*1306a73bSElena Agostini if (mem_list_count_item() == 0) { 415*1306a73bSElena Agostini rte_cuda_log(ERR, "No items in memory list"); 416*1306a73bSElena Agostini return NULL; 417*1306a73bSElena Agostini } 418*1306a73bSElena Agostini 419*1306a73bSElena Agostini mem_alloc_list_cur = mem_alloc_list_head; 420*1306a73bSElena Agostini 421*1306a73bSElena Agostini while (mem_alloc_list_cur != NULL) { 422*1306a73bSElena Agostini if (mem_alloc_list_cur->pkey == pk) 423*1306a73bSElena Agostini return mem_alloc_list_cur; 424*1306a73bSElena Agostini mem_alloc_list_cur = mem_alloc_list_cur->next; 425*1306a73bSElena Agostini } 426*1306a73bSElena Agostini 427*1306a73bSElena Agostini return mem_alloc_list_cur; 428*1306a73bSElena Agostini } 429*1306a73bSElena Agostini 430*1306a73bSElena Agostini static int 431*1306a73bSElena Agostini mem_list_del_item(cuda_ptr_key pk) 432*1306a73bSElena Agostini { 433*1306a73bSElena Agostini struct mem_entry *mem_alloc_list_cur = NULL; 434*1306a73bSElena Agostini 435*1306a73bSElena Agostini mem_alloc_list_cur = mem_list_find_item(pk); 436*1306a73bSElena Agostini if (mem_alloc_list_cur == NULL) 437*1306a73bSElena Agostini return -EINVAL; 438*1306a73bSElena Agostini 439*1306a73bSElena Agostini /* if key is in head */ 440*1306a73bSElena Agostini if (mem_alloc_list_cur->prev == NULL) 441*1306a73bSElena Agostini mem_alloc_list_head = mem_alloc_list_cur->next; 442*1306a73bSElena Agostini else { 443*1306a73bSElena Agostini mem_alloc_list_cur->prev->next = mem_alloc_list_cur->next; 444*1306a73bSElena Agostini if (mem_alloc_list_cur->next != NULL) 445*1306a73bSElena Agostini mem_alloc_list_cur->next->prev = mem_alloc_list_cur->prev; 446*1306a73bSElena Agostini } 447*1306a73bSElena Agostini 448*1306a73bSElena Agostini rte_free(mem_alloc_list_cur); 449*1306a73bSElena Agostini 450*1306a73bSElena Agostini mem_alloc_list_last_elem--; 451*1306a73bSElena Agostini 452*1306a73bSElena Agostini return 0; 453*1306a73bSElena Agostini } 454*1306a73bSElena Agostini 455*1306a73bSElena Agostini static int 456*1306a73bSElena Agostini cuda_dev_info_get(struct rte_gpu *dev, struct rte_gpu_info *info) 457*1306a73bSElena Agostini { 458*1306a73bSElena Agostini int ret = 0; 459*1306a73bSElena Agostini CUresult res; 460*1306a73bSElena Agostini struct rte_gpu_info parent_info; 461*1306a73bSElena Agostini CUexecAffinityParam affinityPrm; 462*1306a73bSElena Agostini const char *err_string; 463*1306a73bSElena Agostini struct cuda_info *private; 464*1306a73bSElena Agostini CUcontext current_ctx; 465*1306a73bSElena Agostini CUcontext input_ctx; 466*1306a73bSElena Agostini 467*1306a73bSElena Agostini if (dev == NULL) 468*1306a73bSElena Agostini return -ENODEV; 469*1306a73bSElena Agostini 470*1306a73bSElena Agostini /* Child initialization time probably called by rte_gpu_add_child() */ 471*1306a73bSElena Agostini if (dev->mpshared->info.parent != RTE_GPU_ID_NONE && 472*1306a73bSElena Agostini dev->mpshared->dev_private == NULL) { 473*1306a73bSElena Agostini /* Store current ctx */ 474*1306a73bSElena Agostini res = pfn_cuCtxGetCurrent(¤t_ctx); 475*1306a73bSElena Agostini if (res != 0) { 476*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 477*1306a73bSElena Agostini rte_cuda_log(ERR, "cuCtxGetCurrent failed with %s", 478*1306a73bSElena Agostini err_string); 479*1306a73bSElena Agostini return -EPERM; 480*1306a73bSElena Agostini } 481*1306a73bSElena Agostini 482*1306a73bSElena Agostini /* Set child ctx as current ctx */ 483*1306a73bSElena Agostini input_ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 484*1306a73bSElena Agostini res = pfn_cuCtxSetCurrent(input_ctx); 485*1306a73bSElena Agostini if (res != 0) { 486*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 487*1306a73bSElena Agostini rte_cuda_log(ERR, "cuCtxSetCurrent input failed with %s", 488*1306a73bSElena Agostini err_string); 489*1306a73bSElena Agostini return -EPERM; 490*1306a73bSElena Agostini } 491*1306a73bSElena Agostini 492*1306a73bSElena Agostini /* 493*1306a73bSElena Agostini * Ctx capacity info 494*1306a73bSElena Agostini */ 495*1306a73bSElena Agostini 496*1306a73bSElena Agostini /* MPS compatible */ 497*1306a73bSElena Agostini res = pfn_cuCtxGetExecAffinity(&affinityPrm, 498*1306a73bSElena Agostini CU_EXEC_AFFINITY_TYPE_SM_COUNT); 499*1306a73bSElena Agostini if (res != 0) { 500*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 501*1306a73bSElena Agostini rte_cuda_log(ERR, "cuCtxGetExecAffinity failed with %s", 502*1306a73bSElena Agostini err_string); 503*1306a73bSElena Agostini } 504*1306a73bSElena Agostini dev->mpshared->info.processor_count = 505*1306a73bSElena Agostini (uint32_t)affinityPrm.param.smCount.val; 506*1306a73bSElena Agostini 507*1306a73bSElena Agostini ret = rte_gpu_info_get(dev->mpshared->info.parent, &parent_info); 508*1306a73bSElena Agostini if (ret) 509*1306a73bSElena Agostini return -ENODEV; 510*1306a73bSElena Agostini dev->mpshared->info.total_memory = parent_info.total_memory; 511*1306a73bSElena Agostini 512*1306a73bSElena Agostini /* 513*1306a73bSElena Agostini * GPU Device private info 514*1306a73bSElena Agostini */ 515*1306a73bSElena Agostini dev->mpshared->dev_private = rte_zmalloc(NULL, 516*1306a73bSElena Agostini sizeof(struct cuda_info), 517*1306a73bSElena Agostini RTE_CACHE_LINE_SIZE); 518*1306a73bSElena Agostini if (dev->mpshared->dev_private == NULL) { 519*1306a73bSElena Agostini rte_cuda_log(ERR, "Failed to allocate memory for GPU process private"); 520*1306a73bSElena Agostini return -EPERM; 521*1306a73bSElena Agostini } 522*1306a73bSElena Agostini 523*1306a73bSElena Agostini private = (struct cuda_info *)dev->mpshared->dev_private; 524*1306a73bSElena Agostini 525*1306a73bSElena Agostini res = pfn_cuCtxGetDevice(&(private->cu_dev)); 526*1306a73bSElena Agostini if (res != 0) { 527*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 528*1306a73bSElena Agostini rte_cuda_log(ERR, "cuCtxGetDevice failed with %s", 529*1306a73bSElena Agostini err_string); 530*1306a73bSElena Agostini return -EPERM; 531*1306a73bSElena Agostini } 532*1306a73bSElena Agostini 533*1306a73bSElena Agostini res = pfn_cuDeviceGetName(private->gpu_name, 534*1306a73bSElena Agostini RTE_DEV_NAME_MAX_LEN, private->cu_dev); 535*1306a73bSElena Agostini if (res != 0) { 536*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 537*1306a73bSElena Agostini rte_cuda_log(ERR, "cuDeviceGetName failed with %s", 538*1306a73bSElena Agostini err_string); 539*1306a73bSElena Agostini return -EPERM; 540*1306a73bSElena Agostini } 541*1306a73bSElena Agostini 542*1306a73bSElena Agostini /* Restore original ctx as current ctx */ 543*1306a73bSElena Agostini res = pfn_cuCtxSetCurrent(current_ctx); 544*1306a73bSElena Agostini if (res != 0) { 545*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 546*1306a73bSElena Agostini rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 547*1306a73bSElena Agostini err_string); 548*1306a73bSElena Agostini return -EPERM; 549*1306a73bSElena Agostini } 550*1306a73bSElena Agostini } 551*1306a73bSElena Agostini 552*1306a73bSElena Agostini *info = dev->mpshared->info; 553*1306a73bSElena Agostini 554*1306a73bSElena Agostini return 0; 555*1306a73bSElena Agostini } 556*1306a73bSElena Agostini 557*1306a73bSElena Agostini /* 558*1306a73bSElena Agostini * GPU Memory 559*1306a73bSElena Agostini */ 560*1306a73bSElena Agostini 561*1306a73bSElena Agostini static int 562*1306a73bSElena Agostini cuda_mem_alloc(struct rte_gpu *dev, size_t size, void **ptr) 563*1306a73bSElena Agostini { 564*1306a73bSElena Agostini CUresult res; 565*1306a73bSElena Agostini const char *err_string; 566*1306a73bSElena Agostini CUcontext current_ctx; 567*1306a73bSElena Agostini CUcontext input_ctx; 568*1306a73bSElena Agostini unsigned int flag = 1; 569*1306a73bSElena Agostini 570*1306a73bSElena Agostini if (dev == NULL) 571*1306a73bSElena Agostini return -ENODEV; 572*1306a73bSElena Agostini if (size == 0) 573*1306a73bSElena Agostini return -EINVAL; 574*1306a73bSElena Agostini 575*1306a73bSElena Agostini /* Store current ctx */ 576*1306a73bSElena Agostini res = pfn_cuCtxGetCurrent(¤t_ctx); 577*1306a73bSElena Agostini if (res != 0) { 578*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 579*1306a73bSElena Agostini rte_cuda_log(ERR, "cuCtxGetCurrent failed with %s", 580*1306a73bSElena Agostini err_string); 581*1306a73bSElena Agostini return -EPERM; 582*1306a73bSElena Agostini } 583*1306a73bSElena Agostini 584*1306a73bSElena Agostini /* Set child ctx as current ctx */ 585*1306a73bSElena Agostini input_ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 586*1306a73bSElena Agostini res = pfn_cuCtxSetCurrent(input_ctx); 587*1306a73bSElena Agostini if (res != 0) { 588*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 589*1306a73bSElena Agostini rte_cuda_log(ERR, "cuCtxSetCurrent input failed with %s", 590*1306a73bSElena Agostini err_string); 591*1306a73bSElena Agostini return -EPERM; 592*1306a73bSElena Agostini } 593*1306a73bSElena Agostini 594*1306a73bSElena Agostini /* Get next memory list item */ 595*1306a73bSElena Agostini mem_alloc_list_tail = mem_list_add_item(); 596*1306a73bSElena Agostini if (mem_alloc_list_tail == NULL) 597*1306a73bSElena Agostini return -ENOMEM; 598*1306a73bSElena Agostini 599*1306a73bSElena Agostini /* Allocate memory */ 600*1306a73bSElena Agostini mem_alloc_list_tail->size = size; 601*1306a73bSElena Agostini res = pfn_cuMemAlloc(&(mem_alloc_list_tail->ptr_d), 602*1306a73bSElena Agostini mem_alloc_list_tail->size); 603*1306a73bSElena Agostini if (res != 0) { 604*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 605*1306a73bSElena Agostini rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 606*1306a73bSElena Agostini err_string); 607*1306a73bSElena Agostini return -EPERM; 608*1306a73bSElena Agostini } 609*1306a73bSElena Agostini 610*1306a73bSElena Agostini /* GPUDirect RDMA attribute required */ 611*1306a73bSElena Agostini res = pfn_cuPointerSetAttribute(&flag, 612*1306a73bSElena Agostini CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, 613*1306a73bSElena Agostini mem_alloc_list_tail->ptr_d); 614*1306a73bSElena Agostini if (res != 0) { 615*1306a73bSElena Agostini rte_cuda_log(ERR, "Could not set SYNC MEMOP attribute for " 616*1306a73bSElena Agostini "GPU memory at %"PRIu32", err %d", 617*1306a73bSElena Agostini (uint32_t)mem_alloc_list_tail->ptr_d, res); 618*1306a73bSElena Agostini return -EPERM; 619*1306a73bSElena Agostini } 620*1306a73bSElena Agostini 621*1306a73bSElena Agostini mem_alloc_list_tail->pkey = get_hash_from_ptr((void *)mem_alloc_list_tail->ptr_d); 622*1306a73bSElena Agostini mem_alloc_list_tail->ptr_h = NULL; 623*1306a73bSElena Agostini mem_alloc_list_tail->size = size; 624*1306a73bSElena Agostini mem_alloc_list_tail->dev = dev; 625*1306a73bSElena Agostini mem_alloc_list_tail->ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 626*1306a73bSElena Agostini mem_alloc_list_tail->mtype = GPU_MEM; 627*1306a73bSElena Agostini 628*1306a73bSElena Agostini /* Restore original ctx as current ctx */ 629*1306a73bSElena Agostini res = pfn_cuCtxSetCurrent(current_ctx); 630*1306a73bSElena Agostini if (res != 0) { 631*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 632*1306a73bSElena Agostini rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 633*1306a73bSElena Agostini err_string); 634*1306a73bSElena Agostini return -EPERM; 635*1306a73bSElena Agostini } 636*1306a73bSElena Agostini 637*1306a73bSElena Agostini *ptr = (void *)mem_alloc_list_tail->ptr_d; 638*1306a73bSElena Agostini 639*1306a73bSElena Agostini return 0; 640*1306a73bSElena Agostini } 641*1306a73bSElena Agostini 642*1306a73bSElena Agostini static int 643*1306a73bSElena Agostini cuda_mem_register(struct rte_gpu *dev, size_t size, void *ptr) 644*1306a73bSElena Agostini { 645*1306a73bSElena Agostini CUresult res; 646*1306a73bSElena Agostini const char *err_string; 647*1306a73bSElena Agostini CUcontext current_ctx; 648*1306a73bSElena Agostini CUcontext input_ctx; 649*1306a73bSElena Agostini unsigned int flag = 1; 650*1306a73bSElena Agostini int use_ptr_h = 0; 651*1306a73bSElena Agostini 652*1306a73bSElena Agostini if (dev == NULL) 653*1306a73bSElena Agostini return -ENODEV; 654*1306a73bSElena Agostini 655*1306a73bSElena Agostini if (size == 0 || ptr == NULL) 656*1306a73bSElena Agostini return -EINVAL; 657*1306a73bSElena Agostini 658*1306a73bSElena Agostini /* Store current ctx */ 659*1306a73bSElena Agostini res = pfn_cuCtxGetCurrent(¤t_ctx); 660*1306a73bSElena Agostini if (res != 0) { 661*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 662*1306a73bSElena Agostini rte_cuda_log(ERR, "cuCtxGetCurrent failed with %s", 663*1306a73bSElena Agostini err_string); 664*1306a73bSElena Agostini return -EPERM; 665*1306a73bSElena Agostini } 666*1306a73bSElena Agostini 667*1306a73bSElena Agostini /* Set child ctx as current ctx */ 668*1306a73bSElena Agostini input_ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 669*1306a73bSElena Agostini res = pfn_cuCtxSetCurrent(input_ctx); 670*1306a73bSElena Agostini if (res != 0) { 671*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 672*1306a73bSElena Agostini rte_cuda_log(ERR, "cuCtxSetCurrent input failed with %s", 673*1306a73bSElena Agostini err_string); 674*1306a73bSElena Agostini return -EPERM; 675*1306a73bSElena Agostini } 676*1306a73bSElena Agostini 677*1306a73bSElena Agostini /* Get next memory list item */ 678*1306a73bSElena Agostini mem_alloc_list_tail = mem_list_add_item(); 679*1306a73bSElena Agostini if (mem_alloc_list_tail == NULL) 680*1306a73bSElena Agostini return -ENOMEM; 681*1306a73bSElena Agostini 682*1306a73bSElena Agostini /* Allocate memory */ 683*1306a73bSElena Agostini mem_alloc_list_tail->size = size; 684*1306a73bSElena Agostini mem_alloc_list_tail->ptr_h = ptr; 685*1306a73bSElena Agostini 686*1306a73bSElena Agostini res = pfn_cuMemHostRegister(mem_alloc_list_tail->ptr_h, 687*1306a73bSElena Agostini mem_alloc_list_tail->size, 688*1306a73bSElena Agostini CU_MEMHOSTREGISTER_PORTABLE | 689*1306a73bSElena Agostini CU_MEMHOSTREGISTER_DEVICEMAP); 690*1306a73bSElena Agostini if (res != 0) { 691*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 692*1306a73bSElena Agostini rte_cuda_log(ERR, "cuMemHostRegister failed with %s ptr %p size %zd", 693*1306a73bSElena Agostini err_string, 694*1306a73bSElena Agostini mem_alloc_list_tail->ptr_h, 695*1306a73bSElena Agostini mem_alloc_list_tail->size); 696*1306a73bSElena Agostini return -EPERM; 697*1306a73bSElena Agostini } 698*1306a73bSElena Agostini 699*1306a73bSElena Agostini res = pfn_cuDeviceGetAttribute(&(use_ptr_h), 700*1306a73bSElena Agostini CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM, 701*1306a73bSElena Agostini ((struct cuda_info *)(dev->mpshared->dev_private))->cu_dev); 702*1306a73bSElena Agostini if (res != 0) { 703*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 704*1306a73bSElena Agostini rte_cuda_log(ERR, "cuDeviceGetAttribute failed with %s", 705*1306a73bSElena Agostini err_string); 706*1306a73bSElena Agostini return -EPERM; 707*1306a73bSElena Agostini } 708*1306a73bSElena Agostini 709*1306a73bSElena Agostini if (use_ptr_h == 0) { 710*1306a73bSElena Agostini res = pfn_cuMemHostGetDevicePointer(&(mem_alloc_list_tail->ptr_d), 711*1306a73bSElena Agostini mem_alloc_list_tail->ptr_h, 0); 712*1306a73bSElena Agostini if (res != 0) { 713*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 714*1306a73bSElena Agostini rte_cuda_log(ERR, "cuMemHostGetDevicePointer failed with %s", 715*1306a73bSElena Agostini err_string); 716*1306a73bSElena Agostini return -EPERM; 717*1306a73bSElena Agostini } 718*1306a73bSElena Agostini 719*1306a73bSElena Agostini if ((uintptr_t)mem_alloc_list_tail->ptr_d != 720*1306a73bSElena Agostini (uintptr_t)mem_alloc_list_tail->ptr_h) { 721*1306a73bSElena Agostini rte_cuda_log(ERR, "Host input pointer is different wrt GPU registered pointer"); 722*1306a73bSElena Agostini return -ENOTSUP; 723*1306a73bSElena Agostini } 724*1306a73bSElena Agostini } else { 725*1306a73bSElena Agostini mem_alloc_list_tail->ptr_d = (CUdeviceptr)mem_alloc_list_tail->ptr_h; 726*1306a73bSElena Agostini } 727*1306a73bSElena Agostini 728*1306a73bSElena Agostini /* GPUDirect RDMA attribute required */ 729*1306a73bSElena Agostini res = pfn_cuPointerSetAttribute(&flag, 730*1306a73bSElena Agostini CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, 731*1306a73bSElena Agostini mem_alloc_list_tail->ptr_d); 732*1306a73bSElena Agostini if (res != 0) { 733*1306a73bSElena Agostini rte_cuda_log(ERR, "Could not set SYNC MEMOP attribute for GPU memory at %"PRIu32 734*1306a73bSElena Agostini ", err %d", (uint32_t)mem_alloc_list_tail->ptr_d, res); 735*1306a73bSElena Agostini return -EPERM; 736*1306a73bSElena Agostini } 737*1306a73bSElena Agostini 738*1306a73bSElena Agostini mem_alloc_list_tail->pkey = get_hash_from_ptr((void *)mem_alloc_list_tail->ptr_h); 739*1306a73bSElena Agostini mem_alloc_list_tail->size = size; 740*1306a73bSElena Agostini mem_alloc_list_tail->dev = dev; 741*1306a73bSElena Agostini mem_alloc_list_tail->ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 742*1306a73bSElena Agostini mem_alloc_list_tail->mtype = CPU_REGISTERED; 743*1306a73bSElena Agostini 744*1306a73bSElena Agostini /* Restore original ctx as current ctx */ 745*1306a73bSElena Agostini res = pfn_cuCtxSetCurrent(current_ctx); 746*1306a73bSElena Agostini if (res != 0) { 747*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 748*1306a73bSElena Agostini rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 749*1306a73bSElena Agostini err_string); 750*1306a73bSElena Agostini return -EPERM; 751*1306a73bSElena Agostini } 752*1306a73bSElena Agostini 753*1306a73bSElena Agostini return 0; 754*1306a73bSElena Agostini } 755*1306a73bSElena Agostini 756*1306a73bSElena Agostini static int 757*1306a73bSElena Agostini cuda_mem_free(struct rte_gpu *dev, void *ptr) 758*1306a73bSElena Agostini { 759*1306a73bSElena Agostini CUresult res; 760*1306a73bSElena Agostini struct mem_entry *mem_item; 761*1306a73bSElena Agostini const char *err_string; 762*1306a73bSElena Agostini cuda_ptr_key hk; 763*1306a73bSElena Agostini 764*1306a73bSElena Agostini if (dev == NULL) 765*1306a73bSElena Agostini return -ENODEV; 766*1306a73bSElena Agostini 767*1306a73bSElena Agostini if (ptr == NULL) 768*1306a73bSElena Agostini return -EINVAL; 769*1306a73bSElena Agostini 770*1306a73bSElena Agostini hk = get_hash_from_ptr((void *)ptr); 771*1306a73bSElena Agostini 772*1306a73bSElena Agostini mem_item = mem_list_find_item(hk); 773*1306a73bSElena Agostini if (mem_item == NULL) { 774*1306a73bSElena Agostini rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory", ptr); 775*1306a73bSElena Agostini return -EPERM; 776*1306a73bSElena Agostini } 777*1306a73bSElena Agostini 778*1306a73bSElena Agostini if (mem_item->mtype == GPU_MEM) { 779*1306a73bSElena Agostini res = pfn_cuMemFree(mem_item->ptr_d); 780*1306a73bSElena Agostini if (res != 0) { 781*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 782*1306a73bSElena Agostini rte_cuda_log(ERR, "cuMemFree current failed with %s", 783*1306a73bSElena Agostini err_string); 784*1306a73bSElena Agostini return -EPERM; 785*1306a73bSElena Agostini } 786*1306a73bSElena Agostini 787*1306a73bSElena Agostini return mem_list_del_item(hk); 788*1306a73bSElena Agostini } 789*1306a73bSElena Agostini 790*1306a73bSElena Agostini rte_cuda_log(ERR, "Memory type %d not supported", mem_item->mtype); 791*1306a73bSElena Agostini 792*1306a73bSElena Agostini return -EPERM; 793*1306a73bSElena Agostini } 794*1306a73bSElena Agostini 795*1306a73bSElena Agostini static int 796*1306a73bSElena Agostini cuda_mem_unregister(struct rte_gpu *dev, void *ptr) 797*1306a73bSElena Agostini { 798*1306a73bSElena Agostini CUresult res; 799*1306a73bSElena Agostini struct mem_entry *mem_item; 800*1306a73bSElena Agostini const char *err_string; 801*1306a73bSElena Agostini cuda_ptr_key hk; 802*1306a73bSElena Agostini 803*1306a73bSElena Agostini if (dev == NULL) 804*1306a73bSElena Agostini return -ENODEV; 805*1306a73bSElena Agostini 806*1306a73bSElena Agostini if (ptr == NULL) 807*1306a73bSElena Agostini return -EINVAL; 808*1306a73bSElena Agostini 809*1306a73bSElena Agostini hk = get_hash_from_ptr((void *)ptr); 810*1306a73bSElena Agostini 811*1306a73bSElena Agostini mem_item = mem_list_find_item(hk); 812*1306a73bSElena Agostini if (mem_item == NULL) { 813*1306a73bSElena Agostini rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory", ptr); 814*1306a73bSElena Agostini return -EPERM; 815*1306a73bSElena Agostini } 816*1306a73bSElena Agostini 817*1306a73bSElena Agostini if (mem_item->mtype == CPU_REGISTERED) { 818*1306a73bSElena Agostini res = pfn_cuMemHostUnregister(ptr); 819*1306a73bSElena Agostini if (res != 0) { 820*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 821*1306a73bSElena Agostini rte_cuda_log(ERR, "cuMemHostUnregister current failed with %s", 822*1306a73bSElena Agostini err_string); 823*1306a73bSElena Agostini return -EPERM; 824*1306a73bSElena Agostini } 825*1306a73bSElena Agostini 826*1306a73bSElena Agostini return mem_list_del_item(hk); 827*1306a73bSElena Agostini } 828*1306a73bSElena Agostini 829*1306a73bSElena Agostini rte_cuda_log(ERR, "Memory type %d not supported", mem_item->mtype); 830*1306a73bSElena Agostini 831*1306a73bSElena Agostini return -EPERM; 832*1306a73bSElena Agostini } 833*1306a73bSElena Agostini 834*1306a73bSElena Agostini static int 835*1306a73bSElena Agostini cuda_dev_close(struct rte_gpu *dev) 836*1306a73bSElena Agostini { 837*1306a73bSElena Agostini if (dev == NULL) 838*1306a73bSElena Agostini return -EINVAL; 839*1306a73bSElena Agostini 840*1306a73bSElena Agostini rte_free(dev->mpshared->dev_private); 841*1306a73bSElena Agostini 842*1306a73bSElena Agostini return 0; 843*1306a73bSElena Agostini } 844*1306a73bSElena Agostini 845*1306a73bSElena Agostini static int 846*1306a73bSElena Agostini cuda_wmb(struct rte_gpu *dev) 847*1306a73bSElena Agostini { 848*1306a73bSElena Agostini CUresult res; 849*1306a73bSElena Agostini const char *err_string; 850*1306a73bSElena Agostini CUcontext current_ctx; 851*1306a73bSElena Agostini CUcontext input_ctx; 852*1306a73bSElena Agostini struct cuda_info *private; 853*1306a73bSElena Agostini 854*1306a73bSElena Agostini if (dev == NULL) 855*1306a73bSElena Agostini return -ENODEV; 856*1306a73bSElena Agostini 857*1306a73bSElena Agostini private = (struct cuda_info *)dev->mpshared->dev_private; 858*1306a73bSElena Agostini 859*1306a73bSElena Agostini if (private->gdr_write_ordering != CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE) { 860*1306a73bSElena Agostini /* 861*1306a73bSElena Agostini * No need to explicitly force the write ordering because 862*1306a73bSElena Agostini * the device natively supports it 863*1306a73bSElena Agostini */ 864*1306a73bSElena Agostini return 0; 865*1306a73bSElena Agostini } 866*1306a73bSElena Agostini 867*1306a73bSElena Agostini if (private->gdr_flush_type != CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST) { 868*1306a73bSElena Agostini /* 869*1306a73bSElena Agostini * Can't flush GDR writes with cuFlushGPUDirectRDMAWrites CUDA function. 870*1306a73bSElena Agostini * Application needs to use alternative methods. 871*1306a73bSElena Agostini */ 872*1306a73bSElena Agostini rte_cuda_log(WARNING, "Can't flush GDR writes with cuFlushGPUDirectRDMAWrites CUDA function." 873*1306a73bSElena Agostini "Application needs to use alternative methods."); 874*1306a73bSElena Agostini return -ENOTSUP; 875*1306a73bSElena Agostini } 876*1306a73bSElena Agostini 877*1306a73bSElena Agostini /* Store current ctx */ 878*1306a73bSElena Agostini res = pfn_cuCtxGetCurrent(¤t_ctx); 879*1306a73bSElena Agostini if (res != 0) { 880*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 881*1306a73bSElena Agostini rte_cuda_log(ERR, "cuCtxGetCurrent failed with %s", 882*1306a73bSElena Agostini err_string); 883*1306a73bSElena Agostini return -EPERM; 884*1306a73bSElena Agostini } 885*1306a73bSElena Agostini 886*1306a73bSElena Agostini /* Set child ctx as current ctx */ 887*1306a73bSElena Agostini input_ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 888*1306a73bSElena Agostini res = pfn_cuCtxSetCurrent(input_ctx); 889*1306a73bSElena Agostini if (res != 0) { 890*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 891*1306a73bSElena Agostini rte_cuda_log(ERR, "cuCtxSetCurrent input failed with %s", 892*1306a73bSElena Agostini err_string); 893*1306a73bSElena Agostini return -EPERM; 894*1306a73bSElena Agostini } 895*1306a73bSElena Agostini 896*1306a73bSElena Agostini res = pfn_cuFlushGPUDirectRDMAWrites(CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX, 897*1306a73bSElena Agostini CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES); 898*1306a73bSElena Agostini if (res != 0) { 899*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 900*1306a73bSElena Agostini rte_cuda_log(ERR, "cuFlushGPUDirectRDMAWrites current failed with %s", 901*1306a73bSElena Agostini err_string); 902*1306a73bSElena Agostini return -EPERM; 903*1306a73bSElena Agostini } 904*1306a73bSElena Agostini 905*1306a73bSElena Agostini /* Restore original ctx as current ctx */ 906*1306a73bSElena Agostini res = pfn_cuCtxSetCurrent(current_ctx); 907*1306a73bSElena Agostini if (res != 0) { 908*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 909*1306a73bSElena Agostini rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 910*1306a73bSElena Agostini err_string); 911*1306a73bSElena Agostini return -EPERM; 912*1306a73bSElena Agostini } 913*1306a73bSElena Agostini 914*1306a73bSElena Agostini return 0; 915*1306a73bSElena Agostini } 916*1306a73bSElena Agostini 917*1306a73bSElena Agostini static int 918*1306a73bSElena Agostini cuda_gpu_probe(__rte_unused struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) 919*1306a73bSElena Agostini { 920*1306a73bSElena Agostini struct rte_gpu *dev = NULL; 921*1306a73bSElena Agostini CUresult res; 922*1306a73bSElena Agostini CUdevice cu_dev_id; 923*1306a73bSElena Agostini CUcontext pctx; 924*1306a73bSElena Agostini char dev_name[RTE_DEV_NAME_MAX_LEN]; 925*1306a73bSElena Agostini const char *err_string; 926*1306a73bSElena Agostini int processor_count = 0; 927*1306a73bSElena Agostini struct cuda_info *private; 928*1306a73bSElena Agostini 929*1306a73bSElena Agostini if (pci_dev == NULL) { 930*1306a73bSElena Agostini rte_cuda_log(ERR, "NULL PCI device"); 931*1306a73bSElena Agostini return -EINVAL; 932*1306a73bSElena Agostini } 933*1306a73bSElena Agostini 934*1306a73bSElena Agostini rte_pci_device_name(&pci_dev->addr, dev_name, sizeof(dev_name)); 935*1306a73bSElena Agostini 936*1306a73bSElena Agostini /* Allocate memory to be used privately by drivers */ 937*1306a73bSElena Agostini dev = rte_gpu_allocate(pci_dev->device.name); 938*1306a73bSElena Agostini if (dev == NULL) 939*1306a73bSElena Agostini return -ENODEV; 940*1306a73bSElena Agostini 941*1306a73bSElena Agostini /* Initialize values only for the first CUDA driver call */ 942*1306a73bSElena Agostini if (dev->mpshared->info.dev_id == 0) { 943*1306a73bSElena Agostini mem_alloc_list_head = NULL; 944*1306a73bSElena Agostini mem_alloc_list_tail = NULL; 945*1306a73bSElena Agostini mem_alloc_list_last_elem = 0; 946*1306a73bSElena Agostini 947*1306a73bSElena Agostini /* Load libcuda.so library */ 948*1306a73bSElena Agostini if (cuda_loader()) { 949*1306a73bSElena Agostini rte_cuda_log(ERR, "CUDA Driver library not found"); 950*1306a73bSElena Agostini return -ENOTSUP; 951*1306a73bSElena Agostini } 952*1306a73bSElena Agostini 953*1306a73bSElena Agostini /* Load initial CUDA functions */ 954*1306a73bSElena Agostini if (cuda_sym_func_loader()) { 955*1306a73bSElena Agostini rte_cuda_log(ERR, "CUDA functions not found in library"); 956*1306a73bSElena Agostini return -ENOTSUP; 957*1306a73bSElena Agostini } 958*1306a73bSElena Agostini 959*1306a73bSElena Agostini /* 960*1306a73bSElena Agostini * Required to initialize the CUDA Driver. 961*1306a73bSElena Agostini * Multiple calls of cuInit() will return immediately 962*1306a73bSElena Agostini * without making any relevant change 963*1306a73bSElena Agostini */ 964*1306a73bSElena Agostini sym_cuInit(0); 965*1306a73bSElena Agostini 966*1306a73bSElena Agostini res = sym_cuDriverGetVersion(&cuda_driver_version); 967*1306a73bSElena Agostini if (res != 0) { 968*1306a73bSElena Agostini rte_cuda_log(ERR, "cuDriverGetVersion failed with %d", res); 969*1306a73bSElena Agostini return -ENOTSUP; 970*1306a73bSElena Agostini } 971*1306a73bSElena Agostini 972*1306a73bSElena Agostini if (cuda_driver_version < CUDA_DRIVER_MIN_VERSION) { 973*1306a73bSElena Agostini rte_cuda_log(ERR, "CUDA Driver version found is %d. " 974*1306a73bSElena Agostini "Minimum requirement is %d", 975*1306a73bSElena Agostini cuda_driver_version, 976*1306a73bSElena Agostini CUDA_DRIVER_MIN_VERSION); 977*1306a73bSElena Agostini return -ENOTSUP; 978*1306a73bSElena Agostini } 979*1306a73bSElena Agostini 980*1306a73bSElena Agostini if (cuda_pfn_func_loader()) { 981*1306a73bSElena Agostini rte_cuda_log(ERR, "CUDA PFN functions not found in library"); 982*1306a73bSElena Agostini return -ENOTSUP; 983*1306a73bSElena Agostini } 984*1306a73bSElena Agostini } 985*1306a73bSElena Agostini 986*1306a73bSElena Agostini /* Fill HW specific part of device structure */ 987*1306a73bSElena Agostini dev->device = &pci_dev->device; 988*1306a73bSElena Agostini dev->mpshared->info.numa_node = pci_dev->device.numa_node; 989*1306a73bSElena Agostini 990*1306a73bSElena Agostini /* Get NVIDIA GPU Device descriptor */ 991*1306a73bSElena Agostini res = pfn_cuDeviceGetByPCIBusId(&cu_dev_id, dev->device->name); 992*1306a73bSElena Agostini if (res != 0) { 993*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 994*1306a73bSElena Agostini rte_cuda_log(ERR, "cuDeviceGetByPCIBusId name %s failed with %d: %s", 995*1306a73bSElena Agostini dev->device->name, res, err_string); 996*1306a73bSElena Agostini return -EPERM; 997*1306a73bSElena Agostini } 998*1306a73bSElena Agostini 999*1306a73bSElena Agostini res = pfn_cuDevicePrimaryCtxRetain(&pctx, cu_dev_id); 1000*1306a73bSElena Agostini if (res != 0) { 1001*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 1002*1306a73bSElena Agostini rte_cuda_log(ERR, "cuDevicePrimaryCtxRetain name %s failed with %d: %s", 1003*1306a73bSElena Agostini dev->device->name, res, err_string); 1004*1306a73bSElena Agostini return -EPERM; 1005*1306a73bSElena Agostini } 1006*1306a73bSElena Agostini 1007*1306a73bSElena Agostini res = pfn_cuCtxGetApiVersion(pctx, &cuda_api_version); 1008*1306a73bSElena Agostini if (res != 0) { 1009*1306a73bSElena Agostini rte_cuda_log(ERR, "cuCtxGetApiVersion failed with %d", res); 1010*1306a73bSElena Agostini return -ENOTSUP; 1011*1306a73bSElena Agostini } 1012*1306a73bSElena Agostini 1013*1306a73bSElena Agostini if (cuda_api_version < CUDA_API_MIN_VERSION) { 1014*1306a73bSElena Agostini rte_cuda_log(ERR, "CUDA API version found is %d Minimum requirement is %d", 1015*1306a73bSElena Agostini cuda_api_version, CUDA_API_MIN_VERSION); 1016*1306a73bSElena Agostini return -ENOTSUP; 1017*1306a73bSElena Agostini } 1018*1306a73bSElena Agostini 1019*1306a73bSElena Agostini dev->mpshared->info.context = (uint64_t)pctx; 1020*1306a73bSElena Agostini 1021*1306a73bSElena Agostini /* 1022*1306a73bSElena Agostini * GPU Device generic info 1023*1306a73bSElena Agostini */ 1024*1306a73bSElena Agostini 1025*1306a73bSElena Agostini /* Processor count */ 1026*1306a73bSElena Agostini res = pfn_cuDeviceGetAttribute(&(processor_count), 1027*1306a73bSElena Agostini CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, 1028*1306a73bSElena Agostini cu_dev_id); 1029*1306a73bSElena Agostini if (res != 0) { 1030*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 1031*1306a73bSElena Agostini rte_cuda_log(ERR, "cuDeviceGetAttribute failed with %s", 1032*1306a73bSElena Agostini err_string); 1033*1306a73bSElena Agostini return -EPERM; 1034*1306a73bSElena Agostini } 1035*1306a73bSElena Agostini dev->mpshared->info.processor_count = (uint32_t)processor_count; 1036*1306a73bSElena Agostini 1037*1306a73bSElena Agostini /* Total memory */ 1038*1306a73bSElena Agostini res = pfn_cuDeviceTotalMem(&dev->mpshared->info.total_memory, cu_dev_id); 1039*1306a73bSElena Agostini if (res != 0) { 1040*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 1041*1306a73bSElena Agostini rte_cuda_log(ERR, "cuDeviceTotalMem failed with %s", 1042*1306a73bSElena Agostini err_string); 1043*1306a73bSElena Agostini return -EPERM; 1044*1306a73bSElena Agostini } 1045*1306a73bSElena Agostini 1046*1306a73bSElena Agostini /* 1047*1306a73bSElena Agostini * GPU Device private info 1048*1306a73bSElena Agostini */ 1049*1306a73bSElena Agostini dev->mpshared->dev_private = rte_zmalloc(NULL, 1050*1306a73bSElena Agostini sizeof(struct cuda_info), 1051*1306a73bSElena Agostini RTE_CACHE_LINE_SIZE); 1052*1306a73bSElena Agostini if (dev->mpshared->dev_private == NULL) { 1053*1306a73bSElena Agostini rte_cuda_log(ERR, "Failed to allocate memory for GPU process private"); 1054*1306a73bSElena Agostini return -ENOMEM; 1055*1306a73bSElena Agostini } 1056*1306a73bSElena Agostini 1057*1306a73bSElena Agostini private = (struct cuda_info *)dev->mpshared->dev_private; 1058*1306a73bSElena Agostini private->cu_dev = cu_dev_id; 1059*1306a73bSElena Agostini res = pfn_cuDeviceGetName(private->gpu_name, 1060*1306a73bSElena Agostini RTE_DEV_NAME_MAX_LEN, 1061*1306a73bSElena Agostini cu_dev_id); 1062*1306a73bSElena Agostini if (res != 0) { 1063*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 1064*1306a73bSElena Agostini rte_cuda_log(ERR, "cuDeviceGetName failed with %s", 1065*1306a73bSElena Agostini err_string); 1066*1306a73bSElena Agostini return -EPERM; 1067*1306a73bSElena Agostini } 1068*1306a73bSElena Agostini 1069*1306a73bSElena Agostini res = pfn_cuDeviceGetAttribute(&(private->gdr_supported), 1070*1306a73bSElena Agostini CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED, 1071*1306a73bSElena Agostini cu_dev_id); 1072*1306a73bSElena Agostini if (res != 0) { 1073*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 1074*1306a73bSElena Agostini rte_cuda_log(ERR, "cuDeviceGetAttribute failed with %s", 1075*1306a73bSElena Agostini err_string); 1076*1306a73bSElena Agostini return -EPERM; 1077*1306a73bSElena Agostini } 1078*1306a73bSElena Agostini 1079*1306a73bSElena Agostini if (private->gdr_supported == 0) 1080*1306a73bSElena Agostini rte_cuda_log(WARNING, "GPU %s doesn't support GPUDirect RDMA", 1081*1306a73bSElena Agostini pci_dev->device.name); 1082*1306a73bSElena Agostini 1083*1306a73bSElena Agostini res = pfn_cuDeviceGetAttribute(&(private->gdr_write_ordering), 1084*1306a73bSElena Agostini CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING, 1085*1306a73bSElena Agostini cu_dev_id); 1086*1306a73bSElena Agostini if (res != 0) { 1087*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 1088*1306a73bSElena Agostini rte_cuda_log(ERR, 1089*1306a73bSElena Agostini "cuDeviceGetAttribute failed with %s", 1090*1306a73bSElena Agostini err_string); 1091*1306a73bSElena Agostini return -EPERM; 1092*1306a73bSElena Agostini } 1093*1306a73bSElena Agostini 1094*1306a73bSElena Agostini if (private->gdr_write_ordering == CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE) { 1095*1306a73bSElena Agostini res = pfn_cuDeviceGetAttribute(&(private->gdr_flush_type), 1096*1306a73bSElena Agostini CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS, 1097*1306a73bSElena Agostini cu_dev_id); 1098*1306a73bSElena Agostini if (res != 0) { 1099*1306a73bSElena Agostini pfn_cuGetErrorString(res, &(err_string)); 1100*1306a73bSElena Agostini rte_cuda_log(ERR, "cuDeviceGetAttribute failed with %s", 1101*1306a73bSElena Agostini err_string); 1102*1306a73bSElena Agostini return -EPERM; 1103*1306a73bSElena Agostini } 1104*1306a73bSElena Agostini 1105*1306a73bSElena Agostini if (private->gdr_flush_type != CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST) 1106*1306a73bSElena Agostini rte_cuda_log(ERR, "GPUDirect RDMA flush writes API is not supported"); 1107*1306a73bSElena Agostini } 1108*1306a73bSElena Agostini 1109*1306a73bSElena Agostini dev->ops.dev_info_get = cuda_dev_info_get; 1110*1306a73bSElena Agostini dev->ops.dev_close = cuda_dev_close; 1111*1306a73bSElena Agostini dev->ops.mem_alloc = cuda_mem_alloc; 1112*1306a73bSElena Agostini dev->ops.mem_free = cuda_mem_free; 1113*1306a73bSElena Agostini dev->ops.mem_register = cuda_mem_register; 1114*1306a73bSElena Agostini dev->ops.mem_unregister = cuda_mem_unregister; 1115*1306a73bSElena Agostini dev->ops.wmb = cuda_wmb; 1116*1306a73bSElena Agostini 1117*1306a73bSElena Agostini rte_gpu_complete_new(dev); 1118*1306a73bSElena Agostini 1119*1306a73bSElena Agostini rte_cuda_debug("dev id = %u name = %s", 1120*1306a73bSElena Agostini dev->mpshared->info.dev_id, private->gpu_name); 1121*1306a73bSElena Agostini 1122*1306a73bSElena Agostini return 0; 1123*1306a73bSElena Agostini } 1124*1306a73bSElena Agostini 1125*1306a73bSElena Agostini static int 1126*1306a73bSElena Agostini cuda_gpu_remove(struct rte_pci_device *pci_dev) 1127*1306a73bSElena Agostini { 1128*1306a73bSElena Agostini struct rte_gpu *dev; 1129*1306a73bSElena Agostini int ret; 1130*1306a73bSElena Agostini uint8_t gpu_id; 1131*1306a73bSElena Agostini 1132*1306a73bSElena Agostini if (pci_dev == NULL) 1133*1306a73bSElena Agostini return -EINVAL; 1134*1306a73bSElena Agostini 1135*1306a73bSElena Agostini dev = rte_gpu_get_by_name(pci_dev->device.name); 1136*1306a73bSElena Agostini if (dev == NULL) { 1137*1306a73bSElena Agostini rte_cuda_log(ERR, "Couldn't find HW dev \"%s\" to uninitialise it", 1138*1306a73bSElena Agostini pci_dev->device.name); 1139*1306a73bSElena Agostini return -ENODEV; 1140*1306a73bSElena Agostini } 1141*1306a73bSElena Agostini gpu_id = dev->mpshared->info.dev_id; 1142*1306a73bSElena Agostini 1143*1306a73bSElena Agostini /* release dev from library */ 1144*1306a73bSElena Agostini ret = rte_gpu_release(dev); 1145*1306a73bSElena Agostini if (ret) 1146*1306a73bSElena Agostini rte_cuda_log(ERR, "Device %i failed to uninit: %i", gpu_id, ret); 1147*1306a73bSElena Agostini 1148*1306a73bSElena Agostini rte_cuda_debug("Destroyed dev = %u", gpu_id); 1149*1306a73bSElena Agostini 1150*1306a73bSElena Agostini return 0; 1151*1306a73bSElena Agostini } 1152*1306a73bSElena Agostini 1153*1306a73bSElena Agostini static struct rte_pci_driver rte_cuda_driver = { 1154*1306a73bSElena Agostini .id_table = pci_id_cuda_map, 1155*1306a73bSElena Agostini .drv_flags = RTE_PCI_DRV_WC_ACTIVATE, 1156*1306a73bSElena Agostini .probe = cuda_gpu_probe, 1157*1306a73bSElena Agostini .remove = cuda_gpu_remove, 1158*1306a73bSElena Agostini }; 1159*1306a73bSElena Agostini 1160*1306a73bSElena Agostini RTE_PMD_REGISTER_PCI(gpu_cuda, rte_cuda_driver); 1161*1306a73bSElena Agostini RTE_PMD_REGISTER_PCI_TABLE(gpu_cuda, pci_id_cuda_map); 1162*1306a73bSElena Agostini RTE_PMD_REGISTER_KMOD_DEP(gpu_cuda, "* nvidia & (nv_peer_mem | nvpeer_mem)"); 1163