1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2021 NVIDIA Corporation & Affiliates 3 */ 4 5 #include <dlfcn.h> 6 7 #include <rte_malloc.h> 8 #include <rte_pci.h> 9 #include <rte_bus_pci.h> 10 #include <rte_byteorder.h> 11 #include <rte_dev.h> 12 13 #include <gpudev_driver.h> 14 15 #include <cuda.h> 16 #include <cudaTypedefs.h> 17 18 #include "common.h" 19 20 #define CUDA_DRIVER_MIN_VERSION 11040 21 #define CUDA_API_MIN_VERSION 3020 22 23 /* CUDA Driver functions loaded with dlsym() */ 24 static CUresult CUDAAPI (*sym_cuInit)(unsigned int flags); 25 static CUresult CUDAAPI (*sym_cuDriverGetVersion)(int *driverVersion); 26 static CUresult CUDAAPI (*sym_cuGetProcAddress)(const char *symbol, 27 void **pfn, int cudaVersion, uint64_t flags); 28 29 /* CUDA Driver functions loaded with cuGetProcAddress for versioning */ 30 static PFN_cuGetErrorString pfn_cuGetErrorString; 31 static PFN_cuGetErrorName pfn_cuGetErrorName; 32 static PFN_cuPointerSetAttribute pfn_cuPointerSetAttribute; 33 static PFN_cuDeviceGetAttribute pfn_cuDeviceGetAttribute; 34 static PFN_cuDeviceGetByPCIBusId pfn_cuDeviceGetByPCIBusId; 35 static PFN_cuDevicePrimaryCtxRetain pfn_cuDevicePrimaryCtxRetain; 36 static PFN_cuDevicePrimaryCtxRelease pfn_cuDevicePrimaryCtxRelease; 37 static PFN_cuDeviceTotalMem pfn_cuDeviceTotalMem; 38 static PFN_cuDeviceGetName pfn_cuDeviceGetName; 39 static PFN_cuCtxGetApiVersion pfn_cuCtxGetApiVersion; 40 static PFN_cuCtxSetCurrent pfn_cuCtxSetCurrent; 41 static PFN_cuCtxGetCurrent pfn_cuCtxGetCurrent; 42 static PFN_cuCtxGetDevice pfn_cuCtxGetDevice; 43 static PFN_cuCtxGetExecAffinity pfn_cuCtxGetExecAffinity; 44 static PFN_cuMemAlloc pfn_cuMemAlloc; 45 static PFN_cuMemFree pfn_cuMemFree; 46 static PFN_cuMemHostRegister pfn_cuMemHostRegister; 47 static PFN_cuMemHostUnregister pfn_cuMemHostUnregister; 48 static PFN_cuMemHostGetDevicePointer pfn_cuMemHostGetDevicePointer; 49 static PFN_cuFlushGPUDirectRDMAWrites pfn_cuFlushGPUDirectRDMAWrites; 50 51 static void *cudalib; 52 static unsigned int cuda_api_version; 53 static int cuda_driver_version; 54 static gdr_t gdrc_h; 55 56 /* NVIDIA GPU vendor */ 57 #define NVIDIA_GPU_VENDOR_ID (0x10de) 58 59 /* NVIDIA GPU device IDs */ 60 #define NVIDIA_GPU_A100_40GB_DEVICE_ID (0x20f1) 61 #define NVIDIA_GPU_A100_80GB_DEVICE_ID (0x20b5) 62 #define NVIDIA_GPU_A100_80GB_DPU_DEVICE_ID (0x20b8) 63 64 #define NVIDIA_GPU_A30_24GB_DEVICE_ID (0x20b7) 65 #define NVIDIA_GPU_A10_24GB_DEVICE_ID (0x2236) 66 67 #define NVIDIA_GPU_V100_32GB_SXM_DEVICE_ID (0x1db5) 68 #define NVIDIA_GPU_V100_32GB_PCIE_DEVICE_ID (0x1db6) 69 #define NVIDIA_GPU_V100_16GB_DEVICE_ID (0x1db4) 70 71 #define NVIDIA_GPU_T4_16GB_DEVICE_ID (0x1eb8) 72 73 #define CUDA_MAX_ALLOCATION_NUM 512 74 75 #define GPU_PAGE_SHIFT 16 76 #define GPU_PAGE_SIZE (1UL << GPU_PAGE_SHIFT) 77 78 RTE_LOG_REGISTER_DEFAULT(cuda_logtype, NOTICE); 79 80 /* NVIDIA GPU address map */ 81 static const struct rte_pci_id pci_id_cuda_map[] = { 82 { 83 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 84 NVIDIA_GPU_A100_40GB_DEVICE_ID) 85 }, 86 { 87 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 88 NVIDIA_GPU_A100_80GB_DEVICE_ID) 89 }, 90 { 91 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 92 NVIDIA_GPU_A100_80GB_DPU_DEVICE_ID) 93 }, 94 { 95 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 96 NVIDIA_GPU_A30_24GB_DEVICE_ID) 97 }, 98 { 99 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 100 NVIDIA_GPU_A10_24GB_DEVICE_ID) 101 }, 102 { 103 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 104 NVIDIA_GPU_V100_32GB_SXM_DEVICE_ID) 105 }, 106 { 107 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 108 NVIDIA_GPU_V100_32GB_PCIE_DEVICE_ID) 109 }, 110 { 111 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 112 NVIDIA_GPU_V100_16GB_DEVICE_ID) 113 }, 114 { 115 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 116 NVIDIA_GPU_T4_16GB_DEVICE_ID) 117 }, 118 { 119 .device_id = 0 120 } 121 }; 122 123 /* Device private info */ 124 struct cuda_info { 125 char gpu_name[RTE_DEV_NAME_MAX_LEN]; 126 CUdevice cu_dev; 127 int gdr_supported; 128 int gdr_write_ordering; 129 int gdr_flush_type; 130 }; 131 132 /* Type of memory allocated by CUDA driver */ 133 enum mem_type { 134 GPU_MEM = 0, 135 CPU_REGISTERED, 136 GPU_REGISTERED /* Not used yet */ 137 }; 138 139 /* key associated to a memory address */ 140 typedef uintptr_t cuda_ptr_key; 141 142 /* Single entry of the memory list */ 143 struct mem_entry { 144 CUdeviceptr ptr_d; 145 CUdeviceptr ptr_orig_d; 146 void *ptr_h; 147 size_t size; 148 size_t size_orig; 149 struct rte_gpu *dev; 150 CUcontext ctx; 151 cuda_ptr_key pkey; 152 enum mem_type mtype; 153 gdr_mh_t mh; 154 struct mem_entry *prev; 155 struct mem_entry *next; 156 }; 157 158 static struct mem_entry *mem_alloc_list_head; 159 static struct mem_entry *mem_alloc_list_tail; 160 static uint32_t mem_alloc_list_last_elem; 161 162 /* Load the CUDA symbols */ 163 164 static int 165 cuda_loader(void) 166 { 167 char cuda_path[1024]; 168 169 if (getenv("CUDA_PATH_L") == NULL) 170 snprintf(cuda_path, 1024, "%s", "libcuda.so"); 171 else 172 snprintf(cuda_path, 1024, "%s%s", getenv("CUDA_PATH_L"), "libcuda.so"); 173 174 cudalib = dlopen(cuda_path, RTLD_LAZY); 175 if (cudalib == NULL) { 176 rte_cuda_log(ERR, "Failed to find CUDA library in %s (CUDA_PATH_L=%s)", 177 cuda_path, getenv("CUDA_PATH_L")); 178 return -1; 179 } 180 181 return 0; 182 } 183 184 static int 185 cuda_sym_func_loader(void) 186 { 187 if (cudalib == NULL) 188 return -1; 189 190 sym_cuInit = dlsym(cudalib, "cuInit"); 191 if (sym_cuInit == NULL) { 192 rte_cuda_log(ERR, "Failed to load CUDA missing symbol cuInit"); 193 return -1; 194 } 195 196 sym_cuDriverGetVersion = dlsym(cudalib, "cuDriverGetVersion"); 197 if (sym_cuDriverGetVersion == NULL) { 198 rte_cuda_log(ERR, "Failed to load CUDA missing symbol cuDriverGetVersion"); 199 return -1; 200 } 201 202 sym_cuGetProcAddress = dlsym(cudalib, "cuGetProcAddress"); 203 if (sym_cuGetProcAddress == NULL) { 204 rte_cuda_log(ERR, "Failed to load CUDA missing symbol cuGetProcAddress"); 205 return -1; 206 } 207 208 return 0; 209 } 210 211 static int 212 cuda_pfn_func_loader(void) 213 { 214 CUresult res; 215 216 res = sym_cuGetProcAddress("cuGetErrorString", 217 (void **) (&pfn_cuGetErrorString), cuda_driver_version, 0); 218 if (res != 0) { 219 rte_cuda_log(ERR, "Retrieve pfn_cuGetErrorString failed with %d", res); 220 return -1; 221 } 222 223 res = sym_cuGetProcAddress("cuGetErrorName", 224 (void **)(&pfn_cuGetErrorName), cuda_driver_version, 0); 225 if (res != 0) { 226 rte_cuda_log(ERR, "Retrieve pfn_cuGetErrorName failed with %d", res); 227 return -1; 228 } 229 230 res = sym_cuGetProcAddress("cuPointerSetAttribute", 231 (void **)(&pfn_cuPointerSetAttribute), cuda_driver_version, 0); 232 if (res != 0) { 233 rte_cuda_log(ERR, "Retrieve pfn_cuPointerSetAttribute failed with %d", res); 234 return -1; 235 } 236 237 res = sym_cuGetProcAddress("cuDeviceGetAttribute", 238 (void **)(&pfn_cuDeviceGetAttribute), cuda_driver_version, 0); 239 if (res != 0) { 240 rte_cuda_log(ERR, "Retrieve pfn_cuDeviceGetAttribute failed with %d", res); 241 return -1; 242 } 243 244 res = sym_cuGetProcAddress("cuDeviceGetByPCIBusId", 245 (void **)(&pfn_cuDeviceGetByPCIBusId), cuda_driver_version, 0); 246 if (res != 0) { 247 rte_cuda_log(ERR, "Retrieve pfn_cuDeviceGetByPCIBusId failed with %d", res); 248 return -1; 249 } 250 251 res = sym_cuGetProcAddress("cuDeviceGetName", 252 (void **)(&pfn_cuDeviceGetName), cuda_driver_version, 0); 253 if (res != 0) { 254 rte_cuda_log(ERR, "Retrieve pfn_cuDeviceGetName failed with %d", res); 255 return -1; 256 } 257 258 res = sym_cuGetProcAddress("cuDevicePrimaryCtxRetain", 259 (void **)(&pfn_cuDevicePrimaryCtxRetain), cuda_driver_version, 0); 260 if (res != 0) { 261 rte_cuda_log(ERR, "Retrieve pfn_cuDevicePrimaryCtxRetain failed with %d", res); 262 return -1; 263 } 264 265 res = sym_cuGetProcAddress("cuDevicePrimaryCtxRelease", 266 (void **)(&pfn_cuDevicePrimaryCtxRelease), cuda_driver_version, 0); 267 if (res != 0) { 268 rte_cuda_log(ERR, "Retrieve pfn_cuDevicePrimaryCtxRelease failed with %d", res); 269 return -1; 270 } 271 272 res = sym_cuGetProcAddress("cuDeviceTotalMem", 273 (void **)(&pfn_cuDeviceTotalMem), cuda_driver_version, 0); 274 if (res != 0) { 275 rte_cuda_log(ERR, "Retrieve pfn_cuDeviceTotalMem failed with %d", res); 276 return -1; 277 } 278 279 res = sym_cuGetProcAddress("cuCtxGetApiVersion", 280 (void **)(&pfn_cuCtxGetApiVersion), cuda_driver_version, 0); 281 if (res != 0) { 282 rte_cuda_log(ERR, "Retrieve pfn_cuCtxGetApiVersion failed with %d", res); 283 return -1; 284 } 285 286 res = sym_cuGetProcAddress("cuCtxGetDevice", 287 (void **)(&pfn_cuCtxGetDevice), cuda_driver_version, 0); 288 if (res != 0) { 289 rte_cuda_log(ERR, "Retrieve pfn_cuCtxGetDevice failed with %d", res); 290 return -1; 291 } 292 293 res = sym_cuGetProcAddress("cuCtxSetCurrent", 294 (void **)(&pfn_cuCtxSetCurrent), cuda_driver_version, 0); 295 if (res != 0) { 296 rte_cuda_log(ERR, "Retrieve pfn_cuCtxSetCurrent failed with %d", res); 297 return -1; 298 } 299 300 res = sym_cuGetProcAddress("cuCtxGetCurrent", 301 (void **)(&pfn_cuCtxGetCurrent), cuda_driver_version, 0); 302 if (res != 0) { 303 rte_cuda_log(ERR, "Retrieve pfn_cuCtxGetCurrent failed with %d", res); 304 return -1; 305 } 306 307 res = sym_cuGetProcAddress("cuCtxGetExecAffinity", 308 (void **)(&pfn_cuCtxGetExecAffinity), cuda_driver_version, 0); 309 if (res != 0) { 310 rte_cuda_log(ERR, "Retrieve pfn_cuCtxGetExecAffinity failed with %d", res); 311 return -1; 312 } 313 314 res = sym_cuGetProcAddress("cuMemAlloc", 315 (void **)(&pfn_cuMemAlloc), cuda_driver_version, 0); 316 if (res != 0) { 317 rte_cuda_log(ERR, "Retrieve pfn_cuMemAlloc failed with %d", res); 318 return -1; 319 } 320 321 res = sym_cuGetProcAddress("cuMemFree", 322 (void **)(&pfn_cuMemFree), cuda_driver_version, 0); 323 if (res != 0) { 324 rte_cuda_log(ERR, "Retrieve pfn_cuMemFree failed with %d", res); 325 return -1; 326 } 327 328 res = sym_cuGetProcAddress("cuMemHostRegister", 329 (void **)(&pfn_cuMemHostRegister), cuda_driver_version, 0); 330 if (res != 0) { 331 rte_cuda_log(ERR, "Retrieve pfn_cuMemHostRegister failed with %d", res); 332 return -1; 333 } 334 335 res = sym_cuGetProcAddress("cuMemHostUnregister", 336 (void **)(&pfn_cuMemHostUnregister), cuda_driver_version, 0); 337 if (res != 0) { 338 rte_cuda_log(ERR, "Retrieve pfn_cuMemHostUnregister failed with %d", res); 339 return -1; 340 } 341 342 res = sym_cuGetProcAddress("cuMemHostGetDevicePointer", 343 (void **)(&pfn_cuMemHostGetDevicePointer), cuda_driver_version, 0); 344 if (res != 0) { 345 rte_cuda_log(ERR, "Retrieve pfn_cuMemHostGetDevicePointer failed with %d", res); 346 return -1; 347 } 348 349 res = sym_cuGetProcAddress("cuFlushGPUDirectRDMAWrites", 350 (void **)(&pfn_cuFlushGPUDirectRDMAWrites), cuda_driver_version, 0); 351 if (res != 0) { 352 rte_cuda_log(ERR, "Retrieve cuFlushGPUDirectRDMAWrites failed with %d", res); 353 return -1; 354 } 355 356 return 0; 357 } 358 359 /* Generate a key from a memory pointer */ 360 static cuda_ptr_key 361 get_hash_from_ptr(void *ptr) 362 { 363 return (uintptr_t)ptr; 364 } 365 366 static uint32_t 367 mem_list_count_item(void) 368 { 369 return mem_alloc_list_last_elem; 370 } 371 372 /* Initiate list of memory allocations if not done yet */ 373 static struct mem_entry * 374 mem_list_add_item(void) 375 { 376 /* Initiate list of memory allocations if not done yet */ 377 if (mem_alloc_list_head == NULL) { 378 mem_alloc_list_head = rte_zmalloc(NULL, 379 sizeof(struct mem_entry), 380 RTE_CACHE_LINE_SIZE); 381 if (mem_alloc_list_head == NULL) { 382 rte_cuda_log(ERR, "Failed to allocate memory for memory list"); 383 return NULL; 384 } 385 386 mem_alloc_list_head->next = NULL; 387 mem_alloc_list_head->prev = NULL; 388 mem_alloc_list_tail = mem_alloc_list_head; 389 } else { 390 struct mem_entry *mem_alloc_list_cur = rte_zmalloc(NULL, 391 sizeof(struct mem_entry), 392 RTE_CACHE_LINE_SIZE); 393 394 if (mem_alloc_list_cur == NULL) { 395 rte_cuda_log(ERR, "Failed to allocate memory for memory list"); 396 return NULL; 397 } 398 399 mem_alloc_list_tail->next = mem_alloc_list_cur; 400 mem_alloc_list_cur->prev = mem_alloc_list_tail; 401 mem_alloc_list_tail = mem_alloc_list_tail->next; 402 mem_alloc_list_tail->next = NULL; 403 } 404 405 mem_alloc_list_last_elem++; 406 407 return mem_alloc_list_tail; 408 } 409 410 static struct mem_entry * 411 mem_list_find_item(cuda_ptr_key pk) 412 { 413 struct mem_entry *mem_alloc_list_cur = NULL; 414 415 if (mem_alloc_list_head == NULL) { 416 rte_cuda_log(ERR, "Memory list doesn't exist"); 417 return NULL; 418 } 419 420 if (mem_list_count_item() == 0) { 421 rte_cuda_log(ERR, "No items in memory list"); 422 return NULL; 423 } 424 425 mem_alloc_list_cur = mem_alloc_list_head; 426 427 while (mem_alloc_list_cur != NULL) { 428 if (mem_alloc_list_cur->pkey == pk) 429 return mem_alloc_list_cur; 430 mem_alloc_list_cur = mem_alloc_list_cur->next; 431 } 432 433 return mem_alloc_list_cur; 434 } 435 436 static int 437 mem_list_del_item(cuda_ptr_key pk) 438 { 439 struct mem_entry *mem_alloc_list_cur = NULL; 440 441 mem_alloc_list_cur = mem_list_find_item(pk); 442 if (mem_alloc_list_cur == NULL) 443 return -EINVAL; 444 445 /* if key is in head */ 446 if (mem_alloc_list_cur->prev == NULL) { 447 mem_alloc_list_head = mem_alloc_list_cur->next; 448 if (mem_alloc_list_head != NULL) 449 mem_alloc_list_head->prev = NULL; 450 } else { 451 mem_alloc_list_cur->prev->next = mem_alloc_list_cur->next; 452 if (mem_alloc_list_cur->next != NULL) 453 mem_alloc_list_cur->next->prev = mem_alloc_list_cur->prev; 454 } 455 456 rte_free(mem_alloc_list_cur); 457 458 mem_alloc_list_last_elem--; 459 460 return 0; 461 } 462 463 static int 464 cuda_dev_info_get(struct rte_gpu *dev, struct rte_gpu_info *info) 465 { 466 int ret = 0; 467 CUresult res; 468 struct rte_gpu_info parent_info; 469 CUexecAffinityParam affinityPrm; 470 const char *err_string; 471 struct cuda_info *private; 472 CUcontext current_ctx; 473 CUcontext input_ctx; 474 475 if (dev == NULL) { 476 rte_errno = ENODEV; 477 return -rte_errno; 478 } 479 480 /* Child initialization time probably called by rte_gpu_add_child() */ 481 if (dev->mpshared->info.parent != RTE_GPU_ID_NONE && 482 dev->mpshared->dev_private == NULL) { 483 /* Store current ctx */ 484 res = pfn_cuCtxGetCurrent(¤t_ctx); 485 if (res != 0) { 486 pfn_cuGetErrorString(res, &(err_string)); 487 rte_cuda_log(ERR, "cuCtxGetCurrent failed with %s", 488 err_string); 489 rte_errno = EPERM; 490 return -rte_errno; 491 } 492 493 /* Set child ctx as current ctx */ 494 input_ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 495 res = pfn_cuCtxSetCurrent(input_ctx); 496 if (res != 0) { 497 pfn_cuGetErrorString(res, &(err_string)); 498 rte_cuda_log(ERR, "cuCtxSetCurrent input failed with %s", 499 err_string); 500 rte_errno = EPERM; 501 return -rte_errno; 502 } 503 504 /* 505 * Ctx capacity info 506 */ 507 508 /* MPS compatible */ 509 res = pfn_cuCtxGetExecAffinity(&affinityPrm, 510 CU_EXEC_AFFINITY_TYPE_SM_COUNT); 511 if (res != 0) { 512 pfn_cuGetErrorString(res, &(err_string)); 513 rte_cuda_log(ERR, "cuCtxGetExecAffinity failed with %s", 514 err_string); 515 } 516 dev->mpshared->info.processor_count = 517 (uint32_t)affinityPrm.param.smCount.val; 518 519 ret = rte_gpu_info_get(dev->mpshared->info.parent, &parent_info); 520 if (ret) { 521 rte_errno = ENODEV; 522 return -rte_errno; 523 } 524 dev->mpshared->info.total_memory = parent_info.total_memory; 525 526 /* 527 * GPU Device private info 528 */ 529 dev->mpshared->dev_private = rte_zmalloc(NULL, 530 sizeof(struct cuda_info), 531 RTE_CACHE_LINE_SIZE); 532 if (dev->mpshared->dev_private == NULL) { 533 rte_cuda_log(ERR, "Failed to allocate memory for GPU process private"); 534 rte_errno = EPERM; 535 return -rte_errno; 536 } 537 538 private = (struct cuda_info *)dev->mpshared->dev_private; 539 540 res = pfn_cuCtxGetDevice(&(private->cu_dev)); 541 if (res != 0) { 542 pfn_cuGetErrorString(res, &(err_string)); 543 rte_cuda_log(ERR, "cuCtxGetDevice failed with %s", 544 err_string); 545 rte_errno = EPERM; 546 return -rte_errno; 547 } 548 549 res = pfn_cuDeviceGetName(private->gpu_name, 550 RTE_DEV_NAME_MAX_LEN, private->cu_dev); 551 if (res != 0) { 552 pfn_cuGetErrorString(res, &(err_string)); 553 rte_cuda_log(ERR, "cuDeviceGetName failed with %s", 554 err_string); 555 rte_errno = EPERM; 556 return -rte_errno; 557 } 558 559 /* Restore original ctx as current ctx */ 560 res = pfn_cuCtxSetCurrent(current_ctx); 561 if (res != 0) { 562 pfn_cuGetErrorString(res, &(err_string)); 563 rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 564 err_string); 565 rte_errno = EPERM; 566 return -rte_errno; 567 } 568 } 569 570 *info = dev->mpshared->info; 571 572 return 0; 573 } 574 575 /* 576 * GPU Memory 577 */ 578 579 static int 580 cuda_mem_alloc(struct rte_gpu *dev, size_t size, unsigned int align, void **ptr) 581 { 582 CUresult res; 583 const char *err_string; 584 CUcontext current_ctx; 585 CUcontext input_ctx; 586 unsigned int flag = 1; 587 588 if (dev == NULL) 589 return -ENODEV; 590 591 /* Store current ctx */ 592 res = pfn_cuCtxGetCurrent(¤t_ctx); 593 if (res != 0) { 594 pfn_cuGetErrorString(res, &(err_string)); 595 rte_cuda_log(ERR, "cuCtxGetCurrent failed with %s", 596 err_string); 597 rte_errno = EPERM; 598 return -rte_errno; 599 } 600 601 /* Set child ctx as current ctx */ 602 input_ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 603 res = pfn_cuCtxSetCurrent(input_ctx); 604 if (res != 0) { 605 pfn_cuGetErrorString(res, &(err_string)); 606 rte_cuda_log(ERR, "cuCtxSetCurrent input failed with %s", 607 err_string); 608 rte_errno = EPERM; 609 return -rte_errno; 610 } 611 612 /* Get next memory list item */ 613 mem_alloc_list_tail = mem_list_add_item(); 614 if (mem_alloc_list_tail == NULL) { 615 rte_errno = EPERM; 616 return -rte_errno; 617 } 618 619 /* Allocate memory */ 620 mem_alloc_list_tail->size = size; 621 mem_alloc_list_tail->size_orig = size + align; 622 623 res = pfn_cuMemAlloc(&(mem_alloc_list_tail->ptr_orig_d), 624 mem_alloc_list_tail->size_orig); 625 if (res != 0) { 626 pfn_cuGetErrorString(res, &(err_string)); 627 rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 628 err_string); 629 rte_errno = EPERM; 630 return -rte_errno; 631 } 632 633 /* Align memory address */ 634 mem_alloc_list_tail->ptr_d = mem_alloc_list_tail->ptr_orig_d; 635 if (align && ((uintptr_t)mem_alloc_list_tail->ptr_d) % align) 636 mem_alloc_list_tail->ptr_d += (align - 637 (((uintptr_t)mem_alloc_list_tail->ptr_d) % align)); 638 639 /* GPUDirect RDMA attribute required */ 640 res = pfn_cuPointerSetAttribute(&flag, 641 CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, 642 mem_alloc_list_tail->ptr_d); 643 if (res != 0) { 644 rte_cuda_log(ERR, "Could not set SYNC MEMOP attribute for " 645 "GPU memory at %"PRIu32", err %d", 646 (uint32_t)mem_alloc_list_tail->ptr_d, res); 647 rte_errno = EPERM; 648 return -rte_errno; 649 } 650 651 mem_alloc_list_tail->pkey = get_hash_from_ptr((void *)mem_alloc_list_tail->ptr_d); 652 mem_alloc_list_tail->ptr_h = NULL; 653 mem_alloc_list_tail->dev = dev; 654 mem_alloc_list_tail->ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 655 mem_alloc_list_tail->mtype = GPU_MEM; 656 657 /* Restore original ctx as current ctx */ 658 res = pfn_cuCtxSetCurrent(current_ctx); 659 if (res != 0) { 660 pfn_cuGetErrorString(res, &(err_string)); 661 rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 662 err_string); 663 rte_errno = EPERM; 664 return -rte_errno; 665 } 666 667 *ptr = (void *)mem_alloc_list_tail->ptr_d; 668 669 return 0; 670 } 671 672 static int 673 cuda_mem_register(struct rte_gpu *dev, size_t size, void *ptr) 674 { 675 CUresult res; 676 const char *err_string; 677 CUcontext current_ctx; 678 CUcontext input_ctx; 679 unsigned int flag = 1; 680 int use_ptr_h = 0; 681 682 if (dev == NULL) 683 return -ENODEV; 684 685 /* Store current ctx */ 686 res = pfn_cuCtxGetCurrent(¤t_ctx); 687 if (res != 0) { 688 pfn_cuGetErrorString(res, &(err_string)); 689 rte_cuda_log(ERR, "cuCtxGetCurrent failed with %s", 690 err_string); 691 rte_errno = EPERM; 692 return -rte_errno; 693 } 694 695 /* Set child ctx as current ctx */ 696 input_ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 697 res = pfn_cuCtxSetCurrent(input_ctx); 698 if (res != 0) { 699 pfn_cuGetErrorString(res, &(err_string)); 700 rte_cuda_log(ERR, "cuCtxSetCurrent input failed with %s", 701 err_string); 702 rte_errno = EPERM; 703 return -rte_errno; 704 } 705 706 /* Get next memory list item */ 707 mem_alloc_list_tail = mem_list_add_item(); 708 if (mem_alloc_list_tail == NULL) { 709 rte_errno = EPERM; 710 return -rte_errno; 711 } 712 713 /* Allocate memory */ 714 mem_alloc_list_tail->size = size; 715 mem_alloc_list_tail->ptr_h = ptr; 716 717 res = pfn_cuMemHostRegister(mem_alloc_list_tail->ptr_h, 718 mem_alloc_list_tail->size, 719 CU_MEMHOSTREGISTER_PORTABLE | 720 CU_MEMHOSTREGISTER_DEVICEMAP); 721 if (res != 0) { 722 pfn_cuGetErrorString(res, &(err_string)); 723 rte_cuda_log(ERR, "cuMemHostRegister failed with %s ptr %p size %zd", 724 err_string, 725 mem_alloc_list_tail->ptr_h, 726 mem_alloc_list_tail->size); 727 rte_errno = EPERM; 728 return -rte_errno; 729 } 730 731 res = pfn_cuDeviceGetAttribute(&(use_ptr_h), 732 CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM, 733 ((struct cuda_info *)(dev->mpshared->dev_private))->cu_dev); 734 if (res != 0) { 735 pfn_cuGetErrorString(res, &(err_string)); 736 rte_cuda_log(ERR, "cuDeviceGetAttribute failed with %s", 737 err_string); 738 rte_errno = EPERM; 739 return -rte_errno; 740 } 741 742 if (use_ptr_h == 0) { 743 res = pfn_cuMemHostGetDevicePointer(&(mem_alloc_list_tail->ptr_d), 744 mem_alloc_list_tail->ptr_h, 0); 745 if (res != 0) { 746 pfn_cuGetErrorString(res, &(err_string)); 747 rte_cuda_log(ERR, "cuMemHostGetDevicePointer failed with %s", 748 err_string); 749 rte_errno = EPERM; 750 return -rte_errno; 751 } 752 753 if ((uintptr_t)mem_alloc_list_tail->ptr_d != 754 (uintptr_t)mem_alloc_list_tail->ptr_h) { 755 rte_cuda_log(ERR, "Host input pointer is different wrt GPU registered pointer"); 756 rte_errno = ENOTSUP; 757 return -rte_errno; 758 } 759 } else { 760 mem_alloc_list_tail->ptr_d = (CUdeviceptr)mem_alloc_list_tail->ptr_h; 761 } 762 763 /* GPUDirect RDMA attribute required */ 764 res = pfn_cuPointerSetAttribute(&flag, 765 CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, 766 mem_alloc_list_tail->ptr_d); 767 if (res != 0) { 768 rte_cuda_log(ERR, "Could not set SYNC MEMOP attribute for GPU memory at %"PRIu32 769 ", err %d", (uint32_t)mem_alloc_list_tail->ptr_d, res); 770 rte_errno = EPERM; 771 return -rte_errno; 772 } 773 774 mem_alloc_list_tail->pkey = get_hash_from_ptr((void *)mem_alloc_list_tail->ptr_h); 775 mem_alloc_list_tail->size = size; 776 mem_alloc_list_tail->dev = dev; 777 mem_alloc_list_tail->ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 778 mem_alloc_list_tail->mtype = CPU_REGISTERED; 779 mem_alloc_list_tail->ptr_orig_d = mem_alloc_list_tail->ptr_d; 780 781 /* Restore original ctx as current ctx */ 782 res = pfn_cuCtxSetCurrent(current_ctx); 783 if (res != 0) { 784 pfn_cuGetErrorString(res, &(err_string)); 785 rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 786 err_string); 787 rte_errno = EPERM; 788 return -rte_errno; 789 } 790 791 return 0; 792 } 793 794 static int 795 cuda_mem_cpu_map(struct rte_gpu *dev, __rte_unused size_t size, void *ptr_in, void **ptr_out) 796 { 797 struct mem_entry *mem_item; 798 cuda_ptr_key hk; 799 800 if (dev == NULL) 801 return -ENODEV; 802 803 hk = get_hash_from_ptr((void *)ptr_in); 804 805 mem_item = mem_list_find_item(hk); 806 if (mem_item == NULL) { 807 rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory.", ptr_in); 808 rte_errno = EPERM; 809 return -rte_errno; 810 } 811 812 if (mem_item->mtype != GPU_MEM) { 813 rte_cuda_log(ERR, "Memory address 0x%p is not GPU memory type.", ptr_in); 814 rte_errno = EPERM; 815 return -rte_errno; 816 } 817 818 if (mem_item->size != size) 819 rte_cuda_log(WARNING, 820 "Can't expose memory area with size (%zd) different from original size (%zd).", 821 size, mem_item->size); 822 823 if (gdrcopy_pin(&gdrc_h, &(mem_item->mh), (uint64_t)mem_item->ptr_d, 824 mem_item->size, &(mem_item->ptr_h))) { 825 rte_cuda_log(ERR, "Error exposing GPU memory address 0x%p.", ptr_in); 826 rte_errno = EPERM; 827 return -rte_errno; 828 } 829 830 *ptr_out = mem_item->ptr_h; 831 832 return 0; 833 } 834 835 static int 836 cuda_mem_free(struct rte_gpu *dev, void *ptr) 837 { 838 CUresult res; 839 struct mem_entry *mem_item; 840 const char *err_string; 841 cuda_ptr_key hk; 842 843 if (dev == NULL) 844 return -ENODEV; 845 846 hk = get_hash_from_ptr((void *)ptr); 847 848 mem_item = mem_list_find_item(hk); 849 if (mem_item == NULL) { 850 rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory", ptr); 851 rte_errno = EPERM; 852 return -rte_errno; 853 } 854 855 if (mem_item->mtype == GPU_MEM) { 856 res = pfn_cuMemFree(mem_item->ptr_orig_d); 857 if (res != 0) { 858 pfn_cuGetErrorString(res, &(err_string)); 859 rte_cuda_log(ERR, "cuMemFree current failed with %s", 860 err_string); 861 rte_errno = EPERM; 862 return -rte_errno; 863 } 864 865 return mem_list_del_item(hk); 866 } 867 868 rte_cuda_log(ERR, "Memory type %d not supported", mem_item->mtype); 869 870 return -EPERM; 871 } 872 873 static int 874 cuda_mem_unregister(struct rte_gpu *dev, void *ptr) 875 { 876 CUresult res; 877 struct mem_entry *mem_item; 878 const char *err_string; 879 cuda_ptr_key hk; 880 881 if (dev == NULL) 882 return -ENODEV; 883 884 hk = get_hash_from_ptr((void *)ptr); 885 886 mem_item = mem_list_find_item(hk); 887 if (mem_item == NULL) { 888 rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory", ptr); 889 rte_errno = EPERM; 890 return -rte_errno; 891 } 892 893 if (mem_item->mtype == CPU_REGISTERED) { 894 res = pfn_cuMemHostUnregister(ptr); 895 if (res != 0) { 896 pfn_cuGetErrorString(res, &(err_string)); 897 rte_cuda_log(ERR, "cuMemHostUnregister current failed with %s", 898 err_string); 899 rte_errno = EPERM; 900 return -rte_errno; 901 } 902 903 return mem_list_del_item(hk); 904 } 905 906 rte_cuda_log(ERR, "Memory type %d not supported", mem_item->mtype); 907 908 rte_errno = EPERM; 909 return -rte_errno; 910 } 911 912 static int 913 cuda_mem_cpu_unmap(struct rte_gpu *dev, void *ptr_in) 914 { 915 struct mem_entry *mem_item; 916 cuda_ptr_key hk; 917 918 if (dev == NULL) 919 return -ENODEV; 920 921 hk = get_hash_from_ptr((void *)ptr_in); 922 923 mem_item = mem_list_find_item(hk); 924 if (mem_item == NULL) { 925 rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory.", ptr_in); 926 rte_errno = EPERM; 927 return -rte_errno; 928 } 929 930 if (gdrcopy_unpin(gdrc_h, mem_item->mh, (void *)mem_item->ptr_d, 931 mem_item->size)) { 932 rte_cuda_log(ERR, "Error unexposing GPU memory address 0x%p.", ptr_in); 933 rte_errno = EPERM; 934 return -rte_errno; 935 } 936 937 return 0; 938 } 939 940 static int 941 cuda_dev_close(struct rte_gpu *dev) 942 { 943 if (dev == NULL) 944 return -EINVAL; 945 946 rte_free(dev->mpshared->dev_private); 947 948 return 0; 949 } 950 951 static int 952 cuda_wmb(struct rte_gpu *dev) 953 { 954 CUresult res; 955 const char *err_string; 956 CUcontext current_ctx; 957 CUcontext input_ctx; 958 struct cuda_info *private; 959 960 if (dev == NULL) { 961 rte_errno = ENODEV; 962 return -rte_errno; 963 } 964 965 private = (struct cuda_info *)dev->mpshared->dev_private; 966 967 if (private->gdr_write_ordering != CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE) { 968 /* 969 * No need to explicitly force the write ordering because 970 * the device natively supports it 971 */ 972 return 0; 973 } 974 975 if (private->gdr_flush_type != CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST) { 976 /* 977 * Can't flush GDR writes with cuFlushGPUDirectRDMAWrites CUDA function. 978 * Application needs to use alternative methods. 979 */ 980 rte_cuda_log(WARNING, "Can't flush GDR writes with cuFlushGPUDirectRDMAWrites CUDA function." 981 "Application needs to use alternative methods."); 982 983 rte_errno = ENOTSUP; 984 return -rte_errno; 985 } 986 987 /* Store current ctx */ 988 res = pfn_cuCtxGetCurrent(¤t_ctx); 989 if (res != 0) { 990 pfn_cuGetErrorString(res, &(err_string)); 991 rte_cuda_log(ERR, "cuCtxGetCurrent failed with %s", 992 err_string); 993 rte_errno = EPERM; 994 return -rte_errno; 995 } 996 997 /* Set child ctx as current ctx */ 998 input_ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 999 res = pfn_cuCtxSetCurrent(input_ctx); 1000 if (res != 0) { 1001 pfn_cuGetErrorString(res, &(err_string)); 1002 rte_cuda_log(ERR, "cuCtxSetCurrent input failed with %s", 1003 err_string); 1004 rte_errno = EPERM; 1005 return -rte_errno; 1006 } 1007 1008 res = pfn_cuFlushGPUDirectRDMAWrites(CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX, 1009 CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES); 1010 if (res != 0) { 1011 pfn_cuGetErrorString(res, &(err_string)); 1012 rte_cuda_log(ERR, "cuFlushGPUDirectRDMAWrites current failed with %s", 1013 err_string); 1014 rte_errno = EPERM; 1015 return -rte_errno; 1016 } 1017 1018 /* Restore original ctx as current ctx */ 1019 res = pfn_cuCtxSetCurrent(current_ctx); 1020 if (res != 0) { 1021 pfn_cuGetErrorString(res, &(err_string)); 1022 rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 1023 err_string); 1024 rte_errno = EPERM; 1025 return -rte_errno; 1026 } 1027 1028 return 0; 1029 } 1030 1031 static int 1032 cuda_gpu_probe(__rte_unused struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) 1033 { 1034 struct rte_gpu *dev = NULL; 1035 CUresult res; 1036 CUdevice cu_dev_id; 1037 CUcontext pctx; 1038 char dev_name[RTE_DEV_NAME_MAX_LEN]; 1039 const char *err_string; 1040 int processor_count = 0; 1041 struct cuda_info *private; 1042 1043 if (pci_dev == NULL) { 1044 rte_cuda_log(ERR, "NULL PCI device"); 1045 rte_errno = ENODEV; 1046 return -rte_errno; 1047 } 1048 1049 rte_pci_device_name(&pci_dev->addr, dev_name, sizeof(dev_name)); 1050 1051 /* Allocate memory to be used privately by drivers */ 1052 dev = rte_gpu_allocate(pci_dev->device.name); 1053 if (dev == NULL) { 1054 rte_errno = ENODEV; 1055 return -rte_errno; 1056 } 1057 1058 /* Initialize values only for the first CUDA driver call */ 1059 if (dev->mpshared->info.dev_id == 0) { 1060 mem_alloc_list_head = NULL; 1061 mem_alloc_list_tail = NULL; 1062 mem_alloc_list_last_elem = 0; 1063 1064 /* Load libcuda.so library */ 1065 if (cuda_loader()) { 1066 rte_cuda_log(ERR, "CUDA Driver library not found"); 1067 rte_errno = ENOTSUP; 1068 return -rte_errno; 1069 } 1070 1071 /* Load initial CUDA functions */ 1072 if (cuda_sym_func_loader()) { 1073 rte_cuda_log(ERR, "CUDA functions not found in library"); 1074 rte_errno = ENOTSUP; 1075 return -rte_errno; 1076 } 1077 1078 /* 1079 * Required to initialize the CUDA Driver. 1080 * Multiple calls of cuInit() will return immediately 1081 * without making any relevant change 1082 */ 1083 sym_cuInit(0); 1084 1085 res = sym_cuDriverGetVersion(&cuda_driver_version); 1086 if (res != 0) { 1087 rte_cuda_log(ERR, "cuDriverGetVersion failed with %d", res); 1088 rte_errno = ENOTSUP; 1089 return -rte_errno; 1090 } 1091 1092 if (cuda_driver_version < CUDA_DRIVER_MIN_VERSION) { 1093 rte_cuda_log(ERR, "CUDA Driver version found is %d. " 1094 "Minimum requirement is %d", 1095 cuda_driver_version, 1096 CUDA_DRIVER_MIN_VERSION); 1097 rte_errno = ENOTSUP; 1098 return -rte_errno; 1099 } 1100 1101 if (cuda_pfn_func_loader()) { 1102 rte_cuda_log(ERR, "CUDA PFN functions not found in library"); 1103 rte_errno = ENOTSUP; 1104 return -rte_errno; 1105 } 1106 1107 gdrc_h = NULL; 1108 } 1109 1110 /* Fill HW specific part of device structure */ 1111 dev->device = &pci_dev->device; 1112 dev->mpshared->info.numa_node = pci_dev->device.numa_node; 1113 1114 /* Get NVIDIA GPU Device descriptor */ 1115 res = pfn_cuDeviceGetByPCIBusId(&cu_dev_id, dev->device->name); 1116 if (res != 0) { 1117 pfn_cuGetErrorString(res, &(err_string)); 1118 rte_cuda_log(ERR, "cuDeviceGetByPCIBusId name %s failed with %d: %s", 1119 dev->device->name, res, err_string); 1120 rte_errno = EPERM; 1121 return -rte_errno; 1122 } 1123 1124 res = pfn_cuDevicePrimaryCtxRetain(&pctx, cu_dev_id); 1125 if (res != 0) { 1126 pfn_cuGetErrorString(res, &(err_string)); 1127 rte_cuda_log(ERR, "cuDevicePrimaryCtxRetain name %s failed with %d: %s", 1128 dev->device->name, res, err_string); 1129 rte_errno = EPERM; 1130 return -rte_errno; 1131 } 1132 1133 res = pfn_cuCtxGetApiVersion(pctx, &cuda_api_version); 1134 if (res != 0) { 1135 rte_cuda_log(ERR, "cuCtxGetApiVersion failed with %d", res); 1136 rte_errno = ENOTSUP; 1137 return -rte_errno; 1138 } 1139 1140 if (cuda_api_version < CUDA_API_MIN_VERSION) { 1141 rte_cuda_log(ERR, "CUDA API version found is %d Minimum requirement is %d", 1142 cuda_api_version, CUDA_API_MIN_VERSION); 1143 rte_errno = ENOTSUP; 1144 return -rte_errno; 1145 } 1146 1147 dev->mpshared->info.context = (uint64_t)pctx; 1148 1149 /* 1150 * GPU Device generic info 1151 */ 1152 1153 /* Processor count */ 1154 res = pfn_cuDeviceGetAttribute(&(processor_count), 1155 CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, 1156 cu_dev_id); 1157 if (res != 0) { 1158 pfn_cuGetErrorString(res, &(err_string)); 1159 rte_cuda_log(ERR, "cuDeviceGetAttribute failed with %s", 1160 err_string); 1161 rte_errno = EPERM; 1162 return -rte_errno; 1163 } 1164 dev->mpshared->info.processor_count = (uint32_t)processor_count; 1165 1166 /* Total memory */ 1167 res = pfn_cuDeviceTotalMem(&dev->mpshared->info.total_memory, cu_dev_id); 1168 if (res != 0) { 1169 pfn_cuGetErrorString(res, &(err_string)); 1170 rte_cuda_log(ERR, "cuDeviceTotalMem failed with %s", 1171 err_string); 1172 rte_errno = EPERM; 1173 return -rte_errno; 1174 } 1175 1176 /* 1177 * GPU Device private info 1178 */ 1179 dev->mpshared->dev_private = rte_zmalloc(NULL, 1180 sizeof(struct cuda_info), 1181 RTE_CACHE_LINE_SIZE); 1182 if (dev->mpshared->dev_private == NULL) { 1183 rte_cuda_log(ERR, "Failed to allocate memory for GPU process private"); 1184 rte_errno = EPERM; 1185 return -rte_errno; 1186 } 1187 1188 private = (struct cuda_info *)dev->mpshared->dev_private; 1189 private->cu_dev = cu_dev_id; 1190 res = pfn_cuDeviceGetName(private->gpu_name, 1191 RTE_DEV_NAME_MAX_LEN, 1192 cu_dev_id); 1193 if (res != 0) { 1194 pfn_cuGetErrorString(res, &(err_string)); 1195 rte_cuda_log(ERR, "cuDeviceGetName failed with %s", 1196 err_string); 1197 rte_errno = EPERM; 1198 return -rte_errno; 1199 } 1200 1201 res = pfn_cuDeviceGetAttribute(&(private->gdr_supported), 1202 CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED, 1203 cu_dev_id); 1204 if (res != 0) { 1205 pfn_cuGetErrorString(res, &(err_string)); 1206 rte_cuda_log(ERR, "cuDeviceGetAttribute failed with %s", 1207 err_string); 1208 rte_errno = EPERM; 1209 return -rte_errno; 1210 } 1211 1212 if (private->gdr_supported == 0) 1213 rte_cuda_log(WARNING, "GPU %s doesn't support GPUDirect RDMA", 1214 pci_dev->device.name); 1215 1216 res = pfn_cuDeviceGetAttribute(&(private->gdr_write_ordering), 1217 CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING, 1218 cu_dev_id); 1219 if (res != 0) { 1220 pfn_cuGetErrorString(res, &(err_string)); 1221 rte_cuda_log(ERR, 1222 "cuDeviceGetAttribute failed with %s", 1223 err_string); 1224 rte_errno = EPERM; 1225 return -rte_errno; 1226 } 1227 1228 if (private->gdr_write_ordering == CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE) { 1229 res = pfn_cuDeviceGetAttribute(&(private->gdr_flush_type), 1230 CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS, 1231 cu_dev_id); 1232 if (res != 0) { 1233 pfn_cuGetErrorString(res, &(err_string)); 1234 rte_cuda_log(ERR, "cuDeviceGetAttribute failed with %s", 1235 err_string); 1236 rte_errno = EPERM; 1237 return -rte_errno; 1238 } 1239 1240 if (private->gdr_flush_type != CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST) 1241 rte_cuda_log(ERR, "GPUDirect RDMA flush writes API is not supported"); 1242 } 1243 1244 dev->ops.dev_info_get = cuda_dev_info_get; 1245 dev->ops.dev_close = cuda_dev_close; 1246 dev->ops.mem_alloc = cuda_mem_alloc; 1247 dev->ops.mem_free = cuda_mem_free; 1248 dev->ops.mem_register = cuda_mem_register; 1249 dev->ops.mem_unregister = cuda_mem_unregister; 1250 dev->ops.mem_cpu_map = cuda_mem_cpu_map; 1251 dev->ops.mem_cpu_unmap = cuda_mem_cpu_unmap; 1252 dev->ops.wmb = cuda_wmb; 1253 1254 rte_gpu_complete_new(dev); 1255 1256 rte_cuda_debug("dev id = %u name = %s", 1257 dev->mpshared->info.dev_id, private->gpu_name); 1258 1259 return 0; 1260 } 1261 1262 static int 1263 cuda_gpu_remove(struct rte_pci_device *pci_dev) 1264 { 1265 struct rte_gpu *dev; 1266 int ret; 1267 uint8_t gpu_id; 1268 1269 if (pci_dev == NULL) { 1270 rte_errno = ENODEV; 1271 return -rte_errno; 1272 } 1273 1274 dev = rte_gpu_get_by_name(pci_dev->device.name); 1275 if (dev == NULL) { 1276 rte_cuda_log(ERR, "Couldn't find HW dev \"%s\" to uninitialise it", 1277 pci_dev->device.name); 1278 rte_errno = ENODEV; 1279 return -rte_errno; 1280 } 1281 gpu_id = dev->mpshared->info.dev_id; 1282 1283 /* release dev from library */ 1284 ret = rte_gpu_release(dev); 1285 if (ret) 1286 rte_cuda_log(ERR, "Device %i failed to uninit: %i", gpu_id, ret); 1287 1288 rte_cuda_debug("Destroyed dev = %u", gpu_id); 1289 1290 return 0; 1291 } 1292 1293 static struct rte_pci_driver rte_cuda_driver = { 1294 .id_table = pci_id_cuda_map, 1295 .drv_flags = RTE_PCI_DRV_WC_ACTIVATE, 1296 .probe = cuda_gpu_probe, 1297 .remove = cuda_gpu_remove, 1298 }; 1299 1300 RTE_PMD_REGISTER_PCI(gpu_cuda, rte_cuda_driver); 1301 RTE_PMD_REGISTER_PCI_TABLE(gpu_cuda, pci_id_cuda_map); 1302 RTE_PMD_REGISTER_KMOD_DEP(gpu_cuda, "* nvidia & (nv_peer_mem | nvpeer_mem)"); 1303