1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2021 NVIDIA Corporation & Affiliates 3 */ 4 5 #include <dlfcn.h> 6 7 #include <rte_malloc.h> 8 #include <rte_pci.h> 9 #include <rte_bus_pci.h> 10 #include <rte_byteorder.h> 11 #include <rte_dev.h> 12 13 #include <gpudev_driver.h> 14 15 #include <cuda.h> 16 #include <cudaTypedefs.h> 17 18 #include "common.h" 19 20 #define CUDA_DRIVER_MIN_VERSION 11040 21 #define CUDA_API_MIN_VERSION 3020 22 23 /* CUDA Driver functions loaded with dlsym() */ 24 static CUresult CUDAAPI (*sym_cuInit)(unsigned int flags); 25 static CUresult CUDAAPI (*sym_cuDriverGetVersion)(int *driverVersion); 26 static CUresult CUDAAPI (*sym_cuGetProcAddress)(const char *symbol, 27 void **pfn, int cudaVersion, uint64_t flags); 28 29 /* CUDA Driver functions loaded with cuGetProcAddress for versioning */ 30 static PFN_cuGetErrorString pfn_cuGetErrorString; 31 static PFN_cuGetErrorName pfn_cuGetErrorName; 32 static PFN_cuPointerSetAttribute pfn_cuPointerSetAttribute; 33 static PFN_cuDeviceGetAttribute pfn_cuDeviceGetAttribute; 34 static PFN_cuDeviceGetByPCIBusId pfn_cuDeviceGetByPCIBusId; 35 static PFN_cuDevicePrimaryCtxRetain pfn_cuDevicePrimaryCtxRetain; 36 static PFN_cuDevicePrimaryCtxRelease pfn_cuDevicePrimaryCtxRelease; 37 static PFN_cuDeviceTotalMem pfn_cuDeviceTotalMem; 38 static PFN_cuDeviceGetName pfn_cuDeviceGetName; 39 static PFN_cuCtxGetApiVersion pfn_cuCtxGetApiVersion; 40 static PFN_cuCtxSetCurrent pfn_cuCtxSetCurrent; 41 static PFN_cuCtxGetCurrent pfn_cuCtxGetCurrent; 42 static PFN_cuCtxGetDevice pfn_cuCtxGetDevice; 43 static PFN_cuCtxGetExecAffinity pfn_cuCtxGetExecAffinity; 44 static PFN_cuMemAlloc pfn_cuMemAlloc; 45 static PFN_cuMemFree pfn_cuMemFree; 46 static PFN_cuMemHostRegister pfn_cuMemHostRegister; 47 static PFN_cuMemHostUnregister pfn_cuMemHostUnregister; 48 static PFN_cuMemHostGetDevicePointer pfn_cuMemHostGetDevicePointer; 49 static PFN_cuFlushGPUDirectRDMAWrites pfn_cuFlushGPUDirectRDMAWrites; 50 51 static void *cudalib; 52 static unsigned int cuda_api_version; 53 static int cuda_driver_version; 54 static gdr_t gdrc_h; 55 56 /* NVIDIA GPU vendor */ 57 #define NVIDIA_GPU_VENDOR_ID (0x10de) 58 59 /* NVIDIA GPU device IDs */ 60 #define NVIDIA_GPU_A100_40GB_DEVICE_ID (0x20f1) 61 #define NVIDIA_GPU_A100_80GB_DEVICE_ID (0x20b5) 62 #define NVIDIA_GPU_A100_80GB_DPU_DEVICE_ID (0x20b8) 63 64 #define NVIDIA_GPU_A30_24GB_DEVICE_ID (0x20b7) 65 #define NVIDIA_GPU_A10_24GB_DEVICE_ID (0x2236) 66 67 #define NVIDIA_GPU_V100_32GB_SXM_DEVICE_ID (0x1db5) 68 #define NVIDIA_GPU_V100_32GB_PCIE_DEVICE_ID (0x1db6) 69 #define NVIDIA_GPU_V100_16GB_DEVICE_ID (0x1db4) 70 71 #define NVIDIA_GPU_T4_16GB_DEVICE_ID (0x1eb8) 72 73 #define CUDA_MAX_ALLOCATION_NUM 512 74 75 #define GPU_PAGE_SHIFT 16 76 #define GPU_PAGE_SIZE (1UL << GPU_PAGE_SHIFT) 77 78 RTE_LOG_REGISTER_DEFAULT(cuda_logtype, NOTICE); 79 80 /* NVIDIA GPU address map */ 81 static const struct rte_pci_id pci_id_cuda_map[] = { 82 { 83 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 84 NVIDIA_GPU_A100_40GB_DEVICE_ID) 85 }, 86 { 87 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 88 NVIDIA_GPU_A100_80GB_DEVICE_ID) 89 }, 90 { 91 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 92 NVIDIA_GPU_A100_80GB_DPU_DEVICE_ID) 93 }, 94 { 95 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 96 NVIDIA_GPU_A30_24GB_DEVICE_ID) 97 }, 98 { 99 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 100 NVIDIA_GPU_A10_24GB_DEVICE_ID) 101 }, 102 { 103 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 104 NVIDIA_GPU_V100_32GB_SXM_DEVICE_ID) 105 }, 106 { 107 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 108 NVIDIA_GPU_V100_32GB_PCIE_DEVICE_ID) 109 }, 110 { 111 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 112 NVIDIA_GPU_V100_16GB_DEVICE_ID) 113 }, 114 { 115 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 116 NVIDIA_GPU_T4_16GB_DEVICE_ID) 117 }, 118 { 119 .device_id = 0 120 } 121 }; 122 123 /* Device private info */ 124 struct cuda_info { 125 char gpu_name[RTE_DEV_NAME_MAX_LEN]; 126 CUdevice cu_dev; 127 int gdr_supported; 128 int gdr_write_ordering; 129 int gdr_flush_type; 130 }; 131 132 /* Type of memory allocated by CUDA driver */ 133 enum mem_type { 134 GPU_MEM = 0, 135 CPU_REGISTERED, 136 GPU_REGISTERED /* Not used yet */ 137 }; 138 139 /* key associated to a memory address */ 140 typedef uintptr_t cuda_ptr_key; 141 142 /* Single entry of the memory list */ 143 struct mem_entry { 144 CUdeviceptr ptr_d; 145 CUdeviceptr ptr_orig_d; 146 void *ptr_h; 147 size_t size; 148 size_t size_orig; 149 struct rte_gpu *dev; 150 CUcontext ctx; 151 cuda_ptr_key pkey; 152 enum mem_type mtype; 153 gdr_mh_t mh; 154 struct mem_entry *prev; 155 struct mem_entry *next; 156 }; 157 158 static struct mem_entry *mem_alloc_list_head; 159 static struct mem_entry *mem_alloc_list_tail; 160 static uint32_t mem_alloc_list_last_elem; 161 162 /* Load the CUDA symbols */ 163 164 static int 165 cuda_loader(void) 166 { 167 char cuda_path[1024]; 168 169 if (getenv("CUDA_PATH_L") == NULL) 170 snprintf(cuda_path, 1024, "%s", "libcuda.so"); 171 else 172 snprintf(cuda_path, 1024, "%s/%s", getenv("CUDA_PATH_L"), "libcuda.so"); 173 174 cudalib = dlopen(cuda_path, RTLD_LAZY); 175 if (cudalib == NULL) { 176 rte_cuda_log(ERR, "Failed to find CUDA library in %s (CUDA_PATH_L=%s)", 177 cuda_path, getenv("CUDA_PATH_L")); 178 return -1; 179 } 180 181 return 0; 182 } 183 184 static int 185 cuda_sym_func_loader(void) 186 { 187 if (cudalib == NULL) 188 return -1; 189 190 sym_cuInit = dlsym(cudalib, "cuInit"); 191 if (sym_cuInit == NULL) { 192 rte_cuda_log(ERR, "Failed to load CUDA missing symbol cuInit"); 193 return -1; 194 } 195 196 sym_cuDriverGetVersion = dlsym(cudalib, "cuDriverGetVersion"); 197 if (sym_cuDriverGetVersion == NULL) { 198 rte_cuda_log(ERR, "Failed to load CUDA missing symbol cuDriverGetVersion"); 199 return -1; 200 } 201 202 sym_cuGetProcAddress = dlsym(cudalib, "cuGetProcAddress"); 203 if (sym_cuGetProcAddress == NULL) { 204 rte_cuda_log(ERR, "Failed to load CUDA missing symbol cuGetProcAddress"); 205 return -1; 206 } 207 208 return 0; 209 } 210 211 static int 212 cuda_pfn_func_loader(void) 213 { 214 CUresult res; 215 216 res = sym_cuGetProcAddress("cuGetErrorString", 217 (void **) (&pfn_cuGetErrorString), cuda_driver_version, 0); 218 if (res != 0) { 219 rte_cuda_log(ERR, "Retrieve pfn_cuGetErrorString failed with %d", res); 220 return -1; 221 } 222 223 res = sym_cuGetProcAddress("cuGetErrorName", 224 (void **)(&pfn_cuGetErrorName), cuda_driver_version, 0); 225 if (res != 0) { 226 rte_cuda_log(ERR, "Retrieve pfn_cuGetErrorName failed with %d", res); 227 return -1; 228 } 229 230 res = sym_cuGetProcAddress("cuPointerSetAttribute", 231 (void **)(&pfn_cuPointerSetAttribute), cuda_driver_version, 0); 232 if (res != 0) { 233 rte_cuda_log(ERR, "Retrieve pfn_cuPointerSetAttribute failed with %d", res); 234 return -1; 235 } 236 237 res = sym_cuGetProcAddress("cuDeviceGetAttribute", 238 (void **)(&pfn_cuDeviceGetAttribute), cuda_driver_version, 0); 239 if (res != 0) { 240 rte_cuda_log(ERR, "Retrieve pfn_cuDeviceGetAttribute failed with %d", res); 241 return -1; 242 } 243 244 res = sym_cuGetProcAddress("cuDeviceGetByPCIBusId", 245 (void **)(&pfn_cuDeviceGetByPCIBusId), cuda_driver_version, 0); 246 if (res != 0) { 247 rte_cuda_log(ERR, "Retrieve pfn_cuDeviceGetByPCIBusId failed with %d", res); 248 return -1; 249 } 250 251 res = sym_cuGetProcAddress("cuDeviceGetName", 252 (void **)(&pfn_cuDeviceGetName), cuda_driver_version, 0); 253 if (res != 0) { 254 rte_cuda_log(ERR, "Retrieve pfn_cuDeviceGetName failed with %d", res); 255 return -1; 256 } 257 258 res = sym_cuGetProcAddress("cuDevicePrimaryCtxRetain", 259 (void **)(&pfn_cuDevicePrimaryCtxRetain), cuda_driver_version, 0); 260 if (res != 0) { 261 rte_cuda_log(ERR, "Retrieve pfn_cuDevicePrimaryCtxRetain failed with %d", res); 262 return -1; 263 } 264 265 res = sym_cuGetProcAddress("cuDevicePrimaryCtxRelease", 266 (void **)(&pfn_cuDevicePrimaryCtxRelease), cuda_driver_version, 0); 267 if (res != 0) { 268 rte_cuda_log(ERR, "Retrieve pfn_cuDevicePrimaryCtxRelease failed with %d", res); 269 return -1; 270 } 271 272 res = sym_cuGetProcAddress("cuDeviceTotalMem", 273 (void **)(&pfn_cuDeviceTotalMem), cuda_driver_version, 0); 274 if (res != 0) { 275 rte_cuda_log(ERR, "Retrieve pfn_cuDeviceTotalMem failed with %d", res); 276 return -1; 277 } 278 279 res = sym_cuGetProcAddress("cuCtxGetApiVersion", 280 (void **)(&pfn_cuCtxGetApiVersion), cuda_driver_version, 0); 281 if (res != 0) { 282 rte_cuda_log(ERR, "Retrieve pfn_cuCtxGetApiVersion failed with %d", res); 283 return -1; 284 } 285 286 res = sym_cuGetProcAddress("cuCtxGetDevice", 287 (void **)(&pfn_cuCtxGetDevice), cuda_driver_version, 0); 288 if (res != 0) { 289 rte_cuda_log(ERR, "Retrieve pfn_cuCtxGetDevice failed with %d", res); 290 return -1; 291 } 292 293 res = sym_cuGetProcAddress("cuCtxSetCurrent", 294 (void **)(&pfn_cuCtxSetCurrent), cuda_driver_version, 0); 295 if (res != 0) { 296 rte_cuda_log(ERR, "Retrieve pfn_cuCtxSetCurrent failed with %d", res); 297 return -1; 298 } 299 300 res = sym_cuGetProcAddress("cuCtxGetCurrent", 301 (void **)(&pfn_cuCtxGetCurrent), cuda_driver_version, 0); 302 if (res != 0) { 303 rte_cuda_log(ERR, "Retrieve pfn_cuCtxGetCurrent failed with %d", res); 304 return -1; 305 } 306 307 res = sym_cuGetProcAddress("cuCtxGetExecAffinity", 308 (void **)(&pfn_cuCtxGetExecAffinity), cuda_driver_version, 0); 309 if (res != 0) { 310 rte_cuda_log(ERR, "Retrieve pfn_cuCtxGetExecAffinity failed with %d", res); 311 return -1; 312 } 313 314 res = sym_cuGetProcAddress("cuMemAlloc", 315 (void **)(&pfn_cuMemAlloc), cuda_driver_version, 0); 316 if (res != 0) { 317 rte_cuda_log(ERR, "Retrieve pfn_cuMemAlloc failed with %d", res); 318 return -1; 319 } 320 321 res = sym_cuGetProcAddress("cuMemFree", 322 (void **)(&pfn_cuMemFree), cuda_driver_version, 0); 323 if (res != 0) { 324 rte_cuda_log(ERR, "Retrieve pfn_cuMemFree failed with %d", res); 325 return -1; 326 } 327 328 res = sym_cuGetProcAddress("cuMemHostRegister", 329 (void **)(&pfn_cuMemHostRegister), cuda_driver_version, 0); 330 if (res != 0) { 331 rte_cuda_log(ERR, "Retrieve pfn_cuMemHostRegister failed with %d", res); 332 return -1; 333 } 334 335 res = sym_cuGetProcAddress("cuMemHostUnregister", 336 (void **)(&pfn_cuMemHostUnregister), cuda_driver_version, 0); 337 if (res != 0) { 338 rte_cuda_log(ERR, "Retrieve pfn_cuMemHostUnregister failed with %d", res); 339 return -1; 340 } 341 342 res = sym_cuGetProcAddress("cuMemHostGetDevicePointer", 343 (void **)(&pfn_cuMemHostGetDevicePointer), cuda_driver_version, 0); 344 if (res != 0) { 345 rte_cuda_log(ERR, "Retrieve pfn_cuMemHostGetDevicePointer failed with %d", res); 346 return -1; 347 } 348 349 res = sym_cuGetProcAddress("cuFlushGPUDirectRDMAWrites", 350 (void **)(&pfn_cuFlushGPUDirectRDMAWrites), cuda_driver_version, 0); 351 if (res != 0) { 352 rte_cuda_log(ERR, "Retrieve cuFlushGPUDirectRDMAWrites failed with %d", res); 353 return -1; 354 } 355 356 return 0; 357 } 358 359 /* Generate a key from a memory pointer */ 360 static cuda_ptr_key 361 get_hash_from_ptr(void *ptr) 362 { 363 return (uintptr_t)ptr; 364 } 365 366 static uint32_t 367 mem_list_count_item(void) 368 { 369 return mem_alloc_list_last_elem; 370 } 371 372 /* Initiate list of memory allocations if not done yet */ 373 static struct mem_entry * 374 mem_list_add_item(void) 375 { 376 /* Initiate list of memory allocations if not done yet */ 377 if (mem_alloc_list_head == NULL) { 378 mem_alloc_list_head = rte_zmalloc(NULL, 379 sizeof(struct mem_entry), 380 RTE_CACHE_LINE_SIZE); 381 if (mem_alloc_list_head == NULL) { 382 rte_cuda_log(ERR, "Failed to allocate memory for memory list"); 383 return NULL; 384 } 385 386 mem_alloc_list_head->next = NULL; 387 mem_alloc_list_head->prev = NULL; 388 mem_alloc_list_tail = mem_alloc_list_head; 389 } else { 390 struct mem_entry *mem_alloc_list_cur = rte_zmalloc(NULL, 391 sizeof(struct mem_entry), 392 RTE_CACHE_LINE_SIZE); 393 394 if (mem_alloc_list_cur == NULL) { 395 rte_cuda_log(ERR, "Failed to allocate memory for memory list"); 396 return NULL; 397 } 398 399 mem_alloc_list_tail->next = mem_alloc_list_cur; 400 mem_alloc_list_cur->prev = mem_alloc_list_tail; 401 mem_alloc_list_tail = mem_alloc_list_tail->next; 402 mem_alloc_list_tail->next = NULL; 403 } 404 405 mem_alloc_list_last_elem++; 406 407 return mem_alloc_list_tail; 408 } 409 410 static struct mem_entry * 411 mem_list_find_item(cuda_ptr_key pk) 412 { 413 struct mem_entry *mem_alloc_list_cur = NULL; 414 415 if (mem_alloc_list_head == NULL) { 416 rte_cuda_log(ERR, "Memory list doesn't exist"); 417 return NULL; 418 } 419 420 if (mem_list_count_item() == 0) { 421 rte_cuda_log(ERR, "No items in memory list"); 422 return NULL; 423 } 424 425 mem_alloc_list_cur = mem_alloc_list_head; 426 427 while (mem_alloc_list_cur != NULL) { 428 if (mem_alloc_list_cur->pkey == pk) 429 return mem_alloc_list_cur; 430 mem_alloc_list_cur = mem_alloc_list_cur->next; 431 } 432 433 return mem_alloc_list_cur; 434 } 435 436 static int 437 mem_list_del_item(cuda_ptr_key pk) 438 { 439 struct mem_entry *mem_alloc_list_cur = NULL; 440 441 mem_alloc_list_cur = mem_list_find_item(pk); 442 if (mem_alloc_list_cur == NULL) 443 return -EINVAL; 444 445 /* if key is in head */ 446 if (mem_alloc_list_cur->prev == NULL) { 447 mem_alloc_list_head = mem_alloc_list_cur->next; 448 if (mem_alloc_list_head != NULL) 449 mem_alloc_list_head->prev = NULL; 450 } else { 451 mem_alloc_list_cur->prev->next = mem_alloc_list_cur->next; 452 if (mem_alloc_list_cur->next != NULL) 453 mem_alloc_list_cur->next->prev = mem_alloc_list_cur->prev; 454 } 455 456 rte_free(mem_alloc_list_cur); 457 458 mem_alloc_list_last_elem--; 459 460 return 0; 461 } 462 463 static int 464 cuda_dev_info_get(struct rte_gpu *dev, struct rte_gpu_info *info) 465 { 466 int ret = 0; 467 CUresult res; 468 struct rte_gpu_info parent_info; 469 CUexecAffinityParam affinityPrm; 470 const char *err_string; 471 struct cuda_info *private; 472 CUcontext current_ctx; 473 CUcontext input_ctx; 474 475 if (dev == NULL) { 476 rte_errno = ENODEV; 477 return -rte_errno; 478 } 479 480 /* Child initialization time probably called by rte_gpu_add_child() */ 481 if (dev->mpshared->info.parent != RTE_GPU_ID_NONE && 482 dev->mpshared->dev_private == NULL) { 483 /* Store current ctx */ 484 res = pfn_cuCtxGetCurrent(¤t_ctx); 485 if (res != 0) { 486 pfn_cuGetErrorString(res, &(err_string)); 487 rte_cuda_log(ERR, "cuCtxGetCurrent failed with %s", 488 err_string); 489 rte_errno = EPERM; 490 return -rte_errno; 491 } 492 493 /* Set child ctx as current ctx */ 494 input_ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 495 res = pfn_cuCtxSetCurrent(input_ctx); 496 if (res != 0) { 497 pfn_cuGetErrorString(res, &(err_string)); 498 rte_cuda_log(ERR, "cuCtxSetCurrent input failed with %s", 499 err_string); 500 rte_errno = EPERM; 501 return -rte_errno; 502 } 503 504 /* 505 * Ctx capacity info 506 */ 507 508 /* MPS compatible */ 509 res = pfn_cuCtxGetExecAffinity(&affinityPrm, 510 CU_EXEC_AFFINITY_TYPE_SM_COUNT); 511 if (res != 0) { 512 pfn_cuGetErrorString(res, &(err_string)); 513 rte_cuda_log(ERR, "cuCtxGetExecAffinity failed with %s", 514 err_string); 515 } 516 dev->mpshared->info.processor_count = 517 (uint32_t)affinityPrm.param.smCount.val; 518 519 ret = rte_gpu_info_get(dev->mpshared->info.parent, &parent_info); 520 if (ret) { 521 rte_errno = ENODEV; 522 return -rte_errno; 523 } 524 dev->mpshared->info.total_memory = parent_info.total_memory; 525 526 dev->mpshared->info.page_size = parent_info.page_size; 527 528 /* 529 * GPU Device private info 530 */ 531 dev->mpshared->dev_private = rte_zmalloc(NULL, 532 sizeof(struct cuda_info), 533 RTE_CACHE_LINE_SIZE); 534 if (dev->mpshared->dev_private == NULL) { 535 rte_cuda_log(ERR, "Failed to allocate memory for GPU process private"); 536 rte_errno = EPERM; 537 return -rte_errno; 538 } 539 540 private = (struct cuda_info *)dev->mpshared->dev_private; 541 542 res = pfn_cuCtxGetDevice(&(private->cu_dev)); 543 if (res != 0) { 544 pfn_cuGetErrorString(res, &(err_string)); 545 rte_cuda_log(ERR, "cuCtxGetDevice failed with %s", 546 err_string); 547 rte_errno = EPERM; 548 return -rte_errno; 549 } 550 551 res = pfn_cuDeviceGetName(private->gpu_name, 552 RTE_DEV_NAME_MAX_LEN, private->cu_dev); 553 if (res != 0) { 554 pfn_cuGetErrorString(res, &(err_string)); 555 rte_cuda_log(ERR, "cuDeviceGetName failed with %s", 556 err_string); 557 rte_errno = EPERM; 558 return -rte_errno; 559 } 560 561 /* Restore original ctx as current ctx */ 562 res = pfn_cuCtxSetCurrent(current_ctx); 563 if (res != 0) { 564 pfn_cuGetErrorString(res, &(err_string)); 565 rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 566 err_string); 567 rte_errno = EPERM; 568 return -rte_errno; 569 } 570 } 571 572 *info = dev->mpshared->info; 573 574 return 0; 575 } 576 577 /* 578 * GPU Memory 579 */ 580 581 static int 582 cuda_mem_alloc(struct rte_gpu *dev, size_t size, unsigned int align, void **ptr) 583 { 584 CUresult res; 585 const char *err_string; 586 CUcontext current_ctx; 587 CUcontext input_ctx; 588 unsigned int flag = 1; 589 590 if (dev == NULL) 591 return -ENODEV; 592 593 /* Store current ctx */ 594 res = pfn_cuCtxGetCurrent(¤t_ctx); 595 if (res != 0) { 596 pfn_cuGetErrorString(res, &(err_string)); 597 rte_cuda_log(ERR, "cuCtxGetCurrent failed with %s", 598 err_string); 599 rte_errno = EPERM; 600 return -rte_errno; 601 } 602 603 /* Set child ctx as current ctx */ 604 input_ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 605 res = pfn_cuCtxSetCurrent(input_ctx); 606 if (res != 0) { 607 pfn_cuGetErrorString(res, &(err_string)); 608 rte_cuda_log(ERR, "cuCtxSetCurrent input failed with %s", 609 err_string); 610 rte_errno = EPERM; 611 return -rte_errno; 612 } 613 614 /* Get next memory list item */ 615 mem_alloc_list_tail = mem_list_add_item(); 616 if (mem_alloc_list_tail == NULL) { 617 rte_errno = EPERM; 618 return -rte_errno; 619 } 620 621 /* Allocate memory */ 622 mem_alloc_list_tail->size = size; 623 mem_alloc_list_tail->size_orig = size + align; 624 625 res = pfn_cuMemAlloc(&(mem_alloc_list_tail->ptr_orig_d), 626 mem_alloc_list_tail->size_orig); 627 if (res != 0) { 628 pfn_cuGetErrorString(res, &(err_string)); 629 rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 630 err_string); 631 rte_errno = EPERM; 632 return -rte_errno; 633 } 634 635 /* Align memory address */ 636 mem_alloc_list_tail->ptr_d = mem_alloc_list_tail->ptr_orig_d; 637 if (align && ((uintptr_t)mem_alloc_list_tail->ptr_d) % align) 638 mem_alloc_list_tail->ptr_d += (align - 639 (((uintptr_t)mem_alloc_list_tail->ptr_d) % align)); 640 641 /* GPUDirect RDMA attribute required */ 642 res = pfn_cuPointerSetAttribute(&flag, 643 CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, 644 mem_alloc_list_tail->ptr_d); 645 if (res != 0) { 646 rte_cuda_log(ERR, "Could not set SYNC MEMOP attribute for " 647 "GPU memory at %"PRIu32", err %d", 648 (uint32_t)mem_alloc_list_tail->ptr_d, res); 649 rte_errno = EPERM; 650 return -rte_errno; 651 } 652 653 mem_alloc_list_tail->pkey = get_hash_from_ptr((void *)mem_alloc_list_tail->ptr_d); 654 mem_alloc_list_tail->ptr_h = NULL; 655 mem_alloc_list_tail->dev = dev; 656 mem_alloc_list_tail->ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 657 mem_alloc_list_tail->mtype = GPU_MEM; 658 659 /* Restore original ctx as current ctx */ 660 res = pfn_cuCtxSetCurrent(current_ctx); 661 if (res != 0) { 662 pfn_cuGetErrorString(res, &(err_string)); 663 rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 664 err_string); 665 rte_errno = EPERM; 666 return -rte_errno; 667 } 668 669 *ptr = (void *)mem_alloc_list_tail->ptr_d; 670 671 return 0; 672 } 673 674 static int 675 cuda_mem_register(struct rte_gpu *dev, size_t size, void *ptr) 676 { 677 CUresult res; 678 const char *err_string; 679 CUcontext current_ctx; 680 CUcontext input_ctx; 681 unsigned int flag = 1; 682 int use_ptr_h = 0; 683 684 if (dev == NULL) 685 return -ENODEV; 686 687 /* Store current ctx */ 688 res = pfn_cuCtxGetCurrent(¤t_ctx); 689 if (res != 0) { 690 pfn_cuGetErrorString(res, &(err_string)); 691 rte_cuda_log(ERR, "cuCtxGetCurrent failed with %s", 692 err_string); 693 rte_errno = EPERM; 694 return -rte_errno; 695 } 696 697 /* Set child ctx as current ctx */ 698 input_ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 699 res = pfn_cuCtxSetCurrent(input_ctx); 700 if (res != 0) { 701 pfn_cuGetErrorString(res, &(err_string)); 702 rte_cuda_log(ERR, "cuCtxSetCurrent input failed with %s", 703 err_string); 704 rte_errno = EPERM; 705 return -rte_errno; 706 } 707 708 /* Get next memory list item */ 709 mem_alloc_list_tail = mem_list_add_item(); 710 if (mem_alloc_list_tail == NULL) { 711 rte_errno = EPERM; 712 return -rte_errno; 713 } 714 715 /* Allocate memory */ 716 mem_alloc_list_tail->size = size; 717 mem_alloc_list_tail->ptr_h = ptr; 718 719 res = pfn_cuMemHostRegister(mem_alloc_list_tail->ptr_h, 720 mem_alloc_list_tail->size, 721 CU_MEMHOSTREGISTER_PORTABLE | 722 CU_MEMHOSTREGISTER_DEVICEMAP); 723 if (res != 0) { 724 pfn_cuGetErrorString(res, &(err_string)); 725 rte_cuda_log(ERR, "cuMemHostRegister failed with %s ptr %p size %zd", 726 err_string, 727 mem_alloc_list_tail->ptr_h, 728 mem_alloc_list_tail->size); 729 rte_errno = EPERM; 730 return -rte_errno; 731 } 732 733 res = pfn_cuDeviceGetAttribute(&(use_ptr_h), 734 CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM, 735 ((struct cuda_info *)(dev->mpshared->dev_private))->cu_dev); 736 if (res != 0) { 737 pfn_cuGetErrorString(res, &(err_string)); 738 rte_cuda_log(ERR, "cuDeviceGetAttribute failed with %s", 739 err_string); 740 rte_errno = EPERM; 741 return -rte_errno; 742 } 743 744 if (use_ptr_h == 0) { 745 res = pfn_cuMemHostGetDevicePointer(&(mem_alloc_list_tail->ptr_d), 746 mem_alloc_list_tail->ptr_h, 0); 747 if (res != 0) { 748 pfn_cuGetErrorString(res, &(err_string)); 749 rte_cuda_log(ERR, "cuMemHostGetDevicePointer failed with %s", 750 err_string); 751 rte_errno = EPERM; 752 return -rte_errno; 753 } 754 755 if ((uintptr_t)mem_alloc_list_tail->ptr_d != 756 (uintptr_t)mem_alloc_list_tail->ptr_h) { 757 rte_cuda_log(ERR, "Host input pointer is different wrt GPU registered pointer"); 758 rte_errno = ENOTSUP; 759 return -rte_errno; 760 } 761 } else { 762 mem_alloc_list_tail->ptr_d = (CUdeviceptr)mem_alloc_list_tail->ptr_h; 763 } 764 765 /* GPUDirect RDMA attribute required */ 766 res = pfn_cuPointerSetAttribute(&flag, 767 CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, 768 mem_alloc_list_tail->ptr_d); 769 if (res != 0) { 770 rte_cuda_log(ERR, "Could not set SYNC MEMOP attribute for GPU memory at %"PRIu32 771 ", err %d", (uint32_t)mem_alloc_list_tail->ptr_d, res); 772 rte_errno = EPERM; 773 return -rte_errno; 774 } 775 776 mem_alloc_list_tail->pkey = get_hash_from_ptr((void *)mem_alloc_list_tail->ptr_h); 777 mem_alloc_list_tail->size = size; 778 mem_alloc_list_tail->dev = dev; 779 mem_alloc_list_tail->ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 780 mem_alloc_list_tail->mtype = CPU_REGISTERED; 781 mem_alloc_list_tail->ptr_orig_d = mem_alloc_list_tail->ptr_d; 782 783 /* Restore original ctx as current ctx */ 784 res = pfn_cuCtxSetCurrent(current_ctx); 785 if (res != 0) { 786 pfn_cuGetErrorString(res, &(err_string)); 787 rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 788 err_string); 789 rte_errno = EPERM; 790 return -rte_errno; 791 } 792 793 return 0; 794 } 795 796 static int 797 cuda_mem_cpu_map(struct rte_gpu *dev, __rte_unused size_t size, void *ptr_in, void **ptr_out) 798 { 799 struct mem_entry *mem_item; 800 cuda_ptr_key hk; 801 802 if (dev == NULL) 803 return -ENODEV; 804 805 hk = get_hash_from_ptr((void *)ptr_in); 806 807 mem_item = mem_list_find_item(hk); 808 if (mem_item == NULL) { 809 rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory.", ptr_in); 810 rte_errno = EPERM; 811 return -rte_errno; 812 } 813 814 if (mem_item->mtype != GPU_MEM) { 815 rte_cuda_log(ERR, "Memory address 0x%p is not GPU memory type.", ptr_in); 816 rte_errno = EPERM; 817 return -rte_errno; 818 } 819 820 if (mem_item->size != size) 821 rte_cuda_log(WARNING, 822 "Can't expose memory area with size (%zd) different from original size (%zd).", 823 size, mem_item->size); 824 825 if (gdrcopy_pin(&gdrc_h, &(mem_item->mh), (uint64_t)mem_item->ptr_d, 826 mem_item->size, &(mem_item->ptr_h))) { 827 rte_cuda_log(ERR, "Error exposing GPU memory address 0x%p.", ptr_in); 828 rte_errno = EPERM; 829 return -rte_errno; 830 } 831 832 *ptr_out = mem_item->ptr_h; 833 834 return 0; 835 } 836 837 static int 838 cuda_mem_free(struct rte_gpu *dev, void *ptr) 839 { 840 CUresult res; 841 struct mem_entry *mem_item; 842 const char *err_string; 843 cuda_ptr_key hk; 844 845 if (dev == NULL) 846 return -ENODEV; 847 848 hk = get_hash_from_ptr((void *)ptr); 849 850 mem_item = mem_list_find_item(hk); 851 if (mem_item == NULL) { 852 rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory", ptr); 853 rte_errno = EPERM; 854 return -rte_errno; 855 } 856 857 if (mem_item->mtype == GPU_MEM) { 858 res = pfn_cuMemFree(mem_item->ptr_orig_d); 859 if (res != 0) { 860 pfn_cuGetErrorString(res, &(err_string)); 861 rte_cuda_log(ERR, "cuMemFree current failed with %s", 862 err_string); 863 rte_errno = EPERM; 864 return -rte_errno; 865 } 866 867 return mem_list_del_item(hk); 868 } 869 870 rte_cuda_log(ERR, "Memory type %d not supported", mem_item->mtype); 871 872 return -EPERM; 873 } 874 875 static int 876 cuda_mem_unregister(struct rte_gpu *dev, void *ptr) 877 { 878 CUresult res; 879 struct mem_entry *mem_item; 880 const char *err_string; 881 cuda_ptr_key hk; 882 883 if (dev == NULL) 884 return -ENODEV; 885 886 hk = get_hash_from_ptr((void *)ptr); 887 888 mem_item = mem_list_find_item(hk); 889 if (mem_item == NULL) { 890 rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory", ptr); 891 rte_errno = EPERM; 892 return -rte_errno; 893 } 894 895 if (mem_item->mtype == CPU_REGISTERED) { 896 res = pfn_cuMemHostUnregister(ptr); 897 if (res != 0) { 898 pfn_cuGetErrorString(res, &(err_string)); 899 rte_cuda_log(ERR, "cuMemHostUnregister current failed with %s", 900 err_string); 901 rte_errno = EPERM; 902 return -rte_errno; 903 } 904 905 return mem_list_del_item(hk); 906 } 907 908 rte_cuda_log(ERR, "Memory type %d not supported", mem_item->mtype); 909 910 rte_errno = EPERM; 911 return -rte_errno; 912 } 913 914 static int 915 cuda_mem_cpu_unmap(struct rte_gpu *dev, void *ptr_in) 916 { 917 struct mem_entry *mem_item; 918 cuda_ptr_key hk; 919 920 if (dev == NULL) 921 return -ENODEV; 922 923 hk = get_hash_from_ptr((void *)ptr_in); 924 925 mem_item = mem_list_find_item(hk); 926 if (mem_item == NULL) { 927 rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory.", ptr_in); 928 rte_errno = EPERM; 929 return -rte_errno; 930 } 931 932 if (gdrcopy_unpin(gdrc_h, mem_item->mh, (void *)mem_item->ptr_d, 933 mem_item->size)) { 934 rte_cuda_log(ERR, "Error unexposing GPU memory address 0x%p.", ptr_in); 935 rte_errno = EPERM; 936 return -rte_errno; 937 } 938 939 return 0; 940 } 941 942 static int 943 cuda_dev_close(struct rte_gpu *dev) 944 { 945 if (dev == NULL) 946 return -EINVAL; 947 948 rte_free(dev->mpshared->dev_private); 949 950 return 0; 951 } 952 953 static int 954 cuda_wmb(struct rte_gpu *dev) 955 { 956 CUresult res; 957 const char *err_string; 958 CUcontext current_ctx; 959 CUcontext input_ctx; 960 struct cuda_info *private; 961 962 if (dev == NULL) { 963 rte_errno = ENODEV; 964 return -rte_errno; 965 } 966 967 private = (struct cuda_info *)dev->mpshared->dev_private; 968 969 if (private->gdr_write_ordering != CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE) { 970 /* 971 * No need to explicitly force the write ordering because 972 * the device natively supports it 973 */ 974 return 0; 975 } 976 977 if (private->gdr_flush_type != CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST) { 978 /* 979 * Can't flush GDR writes with cuFlushGPUDirectRDMAWrites CUDA function. 980 * Application needs to use alternative methods. 981 */ 982 rte_cuda_log(WARNING, "Can't flush GDR writes with cuFlushGPUDirectRDMAWrites CUDA function." 983 "Application needs to use alternative methods."); 984 985 rte_errno = ENOTSUP; 986 return -rte_errno; 987 } 988 989 /* Store current ctx */ 990 res = pfn_cuCtxGetCurrent(¤t_ctx); 991 if (res != 0) { 992 pfn_cuGetErrorString(res, &(err_string)); 993 rte_cuda_log(ERR, "cuCtxGetCurrent failed with %s", 994 err_string); 995 rte_errno = EPERM; 996 return -rte_errno; 997 } 998 999 /* Set child ctx as current ctx */ 1000 input_ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 1001 res = pfn_cuCtxSetCurrent(input_ctx); 1002 if (res != 0) { 1003 pfn_cuGetErrorString(res, &(err_string)); 1004 rte_cuda_log(ERR, "cuCtxSetCurrent input failed with %s", 1005 err_string); 1006 rte_errno = EPERM; 1007 return -rte_errno; 1008 } 1009 1010 res = pfn_cuFlushGPUDirectRDMAWrites(CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX, 1011 CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES); 1012 if (res != 0) { 1013 pfn_cuGetErrorString(res, &(err_string)); 1014 rte_cuda_log(ERR, "cuFlushGPUDirectRDMAWrites current failed with %s", 1015 err_string); 1016 rte_errno = EPERM; 1017 return -rte_errno; 1018 } 1019 1020 /* Restore original ctx as current ctx */ 1021 res = pfn_cuCtxSetCurrent(current_ctx); 1022 if (res != 0) { 1023 pfn_cuGetErrorString(res, &(err_string)); 1024 rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 1025 err_string); 1026 rte_errno = EPERM; 1027 return -rte_errno; 1028 } 1029 1030 return 0; 1031 } 1032 1033 static int 1034 cuda_gpu_probe(__rte_unused struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) 1035 { 1036 struct rte_gpu *dev = NULL; 1037 CUresult res; 1038 CUdevice cu_dev_id; 1039 CUcontext pctx; 1040 char dev_name[RTE_DEV_NAME_MAX_LEN]; 1041 const char *err_string; 1042 int processor_count = 0; 1043 struct cuda_info *private; 1044 1045 if (pci_dev == NULL) { 1046 rte_cuda_log(ERR, "NULL PCI device"); 1047 rte_errno = ENODEV; 1048 return -rte_errno; 1049 } 1050 1051 rte_pci_device_name(&pci_dev->addr, dev_name, sizeof(dev_name)); 1052 1053 /* Allocate memory to be used privately by drivers */ 1054 dev = rte_gpu_allocate(pci_dev->device.name); 1055 if (dev == NULL) { 1056 rte_errno = ENODEV; 1057 return -rte_errno; 1058 } 1059 1060 /* Initialize values only for the first CUDA driver call */ 1061 if (dev->mpshared->info.dev_id == 0) { 1062 mem_alloc_list_head = NULL; 1063 mem_alloc_list_tail = NULL; 1064 mem_alloc_list_last_elem = 0; 1065 1066 /* Load libcuda.so library */ 1067 if (cuda_loader()) { 1068 rte_cuda_log(ERR, "CUDA Driver library not found"); 1069 rte_errno = ENOTSUP; 1070 return -rte_errno; 1071 } 1072 1073 /* Load initial CUDA functions */ 1074 if (cuda_sym_func_loader()) { 1075 rte_cuda_log(ERR, "CUDA functions not found in library"); 1076 rte_errno = ENOTSUP; 1077 return -rte_errno; 1078 } 1079 1080 /* 1081 * Required to initialize the CUDA Driver. 1082 * Multiple calls of cuInit() will return immediately 1083 * without making any relevant change 1084 */ 1085 sym_cuInit(0); 1086 1087 res = sym_cuDriverGetVersion(&cuda_driver_version); 1088 if (res != 0) { 1089 rte_cuda_log(ERR, "cuDriverGetVersion failed with %d", res); 1090 rte_errno = ENOTSUP; 1091 return -rte_errno; 1092 } 1093 1094 if (cuda_driver_version < CUDA_DRIVER_MIN_VERSION) { 1095 rte_cuda_log(ERR, "CUDA Driver version found is %d. " 1096 "Minimum requirement is %d", 1097 cuda_driver_version, 1098 CUDA_DRIVER_MIN_VERSION); 1099 rte_errno = ENOTSUP; 1100 return -rte_errno; 1101 } 1102 1103 if (cuda_pfn_func_loader()) { 1104 rte_cuda_log(ERR, "CUDA PFN functions not found in library"); 1105 rte_errno = ENOTSUP; 1106 return -rte_errno; 1107 } 1108 1109 gdrc_h = NULL; 1110 } 1111 1112 /* Fill HW specific part of device structure */ 1113 dev->device = &pci_dev->device; 1114 dev->mpshared->info.numa_node = pci_dev->device.numa_node; 1115 1116 /* Get NVIDIA GPU Device descriptor */ 1117 res = pfn_cuDeviceGetByPCIBusId(&cu_dev_id, dev->device->name); 1118 if (res != 0) { 1119 pfn_cuGetErrorString(res, &(err_string)); 1120 rte_cuda_log(ERR, "cuDeviceGetByPCIBusId name %s failed with %d: %s", 1121 dev->device->name, res, err_string); 1122 rte_errno = EPERM; 1123 return -rte_errno; 1124 } 1125 1126 res = pfn_cuDevicePrimaryCtxRetain(&pctx, cu_dev_id); 1127 if (res != 0) { 1128 pfn_cuGetErrorString(res, &(err_string)); 1129 rte_cuda_log(ERR, "cuDevicePrimaryCtxRetain name %s failed with %d: %s", 1130 dev->device->name, res, err_string); 1131 rte_errno = EPERM; 1132 return -rte_errno; 1133 } 1134 1135 res = pfn_cuCtxGetApiVersion(pctx, &cuda_api_version); 1136 if (res != 0) { 1137 rte_cuda_log(ERR, "cuCtxGetApiVersion failed with %d", res); 1138 rte_errno = ENOTSUP; 1139 return -rte_errno; 1140 } 1141 1142 if (cuda_api_version < CUDA_API_MIN_VERSION) { 1143 rte_cuda_log(ERR, "CUDA API version found is %d Minimum requirement is %d", 1144 cuda_api_version, CUDA_API_MIN_VERSION); 1145 rte_errno = ENOTSUP; 1146 return -rte_errno; 1147 } 1148 1149 dev->mpshared->info.context = (uint64_t)pctx; 1150 1151 /* 1152 * GPU Device generic info 1153 */ 1154 1155 /* Processor count */ 1156 res = pfn_cuDeviceGetAttribute(&(processor_count), 1157 CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, 1158 cu_dev_id); 1159 if (res != 0) { 1160 pfn_cuGetErrorString(res, &(err_string)); 1161 rte_cuda_log(ERR, "cuDeviceGetAttribute failed with %s", 1162 err_string); 1163 rte_errno = EPERM; 1164 return -rte_errno; 1165 } 1166 dev->mpshared->info.processor_count = (uint32_t)processor_count; 1167 1168 /* Total memory */ 1169 res = pfn_cuDeviceTotalMem(&dev->mpshared->info.total_memory, cu_dev_id); 1170 if (res != 0) { 1171 pfn_cuGetErrorString(res, &(err_string)); 1172 rte_cuda_log(ERR, "cuDeviceTotalMem failed with %s", 1173 err_string); 1174 rte_errno = EPERM; 1175 return -rte_errno; 1176 } 1177 1178 dev->mpshared->info.page_size = (size_t)GPU_PAGE_SIZE; 1179 1180 /* 1181 * GPU Device private info 1182 */ 1183 dev->mpshared->dev_private = rte_zmalloc(NULL, 1184 sizeof(struct cuda_info), 1185 RTE_CACHE_LINE_SIZE); 1186 if (dev->mpshared->dev_private == NULL) { 1187 rte_cuda_log(ERR, "Failed to allocate memory for GPU process private"); 1188 rte_errno = EPERM; 1189 return -rte_errno; 1190 } 1191 1192 private = (struct cuda_info *)dev->mpshared->dev_private; 1193 private->cu_dev = cu_dev_id; 1194 res = pfn_cuDeviceGetName(private->gpu_name, 1195 RTE_DEV_NAME_MAX_LEN, 1196 cu_dev_id); 1197 if (res != 0) { 1198 pfn_cuGetErrorString(res, &(err_string)); 1199 rte_cuda_log(ERR, "cuDeviceGetName failed with %s", 1200 err_string); 1201 rte_errno = EPERM; 1202 return -rte_errno; 1203 } 1204 1205 res = pfn_cuDeviceGetAttribute(&(private->gdr_supported), 1206 CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED, 1207 cu_dev_id); 1208 if (res != 0) { 1209 pfn_cuGetErrorString(res, &(err_string)); 1210 rte_cuda_log(ERR, "cuDeviceGetAttribute failed with %s", 1211 err_string); 1212 rte_errno = EPERM; 1213 return -rte_errno; 1214 } 1215 1216 if (private->gdr_supported == 0) 1217 rte_cuda_log(WARNING, "GPU %s doesn't support GPUDirect RDMA", 1218 pci_dev->device.name); 1219 1220 res = pfn_cuDeviceGetAttribute(&(private->gdr_write_ordering), 1221 CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING, 1222 cu_dev_id); 1223 if (res != 0) { 1224 pfn_cuGetErrorString(res, &(err_string)); 1225 rte_cuda_log(ERR, 1226 "cuDeviceGetAttribute failed with %s", 1227 err_string); 1228 rte_errno = EPERM; 1229 return -rte_errno; 1230 } 1231 1232 if (private->gdr_write_ordering == CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE) { 1233 res = pfn_cuDeviceGetAttribute(&(private->gdr_flush_type), 1234 CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS, 1235 cu_dev_id); 1236 if (res != 0) { 1237 pfn_cuGetErrorString(res, &(err_string)); 1238 rte_cuda_log(ERR, "cuDeviceGetAttribute failed with %s", 1239 err_string); 1240 rte_errno = EPERM; 1241 return -rte_errno; 1242 } 1243 1244 if (private->gdr_flush_type != CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST) 1245 rte_cuda_log(ERR, "GPUDirect RDMA flush writes API is not supported"); 1246 } 1247 1248 dev->ops.dev_info_get = cuda_dev_info_get; 1249 dev->ops.dev_close = cuda_dev_close; 1250 dev->ops.mem_alloc = cuda_mem_alloc; 1251 dev->ops.mem_free = cuda_mem_free; 1252 dev->ops.mem_register = cuda_mem_register; 1253 dev->ops.mem_unregister = cuda_mem_unregister; 1254 dev->ops.mem_cpu_map = cuda_mem_cpu_map; 1255 dev->ops.mem_cpu_unmap = cuda_mem_cpu_unmap; 1256 dev->ops.wmb = cuda_wmb; 1257 1258 rte_gpu_complete_new(dev); 1259 1260 rte_cuda_debug("dev id = %u name = %s", 1261 dev->mpshared->info.dev_id, private->gpu_name); 1262 1263 return 0; 1264 } 1265 1266 static int 1267 cuda_gpu_remove(struct rte_pci_device *pci_dev) 1268 { 1269 struct rte_gpu *dev; 1270 int ret; 1271 uint8_t gpu_id; 1272 1273 if (pci_dev == NULL) { 1274 rte_errno = ENODEV; 1275 return -rte_errno; 1276 } 1277 1278 dev = rte_gpu_get_by_name(pci_dev->device.name); 1279 if (dev == NULL) { 1280 rte_cuda_log(ERR, "Couldn't find HW dev \"%s\" to uninitialise it", 1281 pci_dev->device.name); 1282 rte_errno = ENODEV; 1283 return -rte_errno; 1284 } 1285 gpu_id = dev->mpshared->info.dev_id; 1286 1287 /* release dev from library */ 1288 ret = rte_gpu_release(dev); 1289 if (ret) 1290 rte_cuda_log(ERR, "Device %i failed to uninit: %i", gpu_id, ret); 1291 1292 rte_cuda_debug("Destroyed dev = %u", gpu_id); 1293 1294 return 0; 1295 } 1296 1297 static struct rte_pci_driver rte_cuda_driver = { 1298 .id_table = pci_id_cuda_map, 1299 .drv_flags = RTE_PCI_DRV_WC_ACTIVATE, 1300 .probe = cuda_gpu_probe, 1301 .remove = cuda_gpu_remove, 1302 }; 1303 1304 RTE_PMD_REGISTER_PCI(gpu_cuda, rte_cuda_driver); 1305 RTE_PMD_REGISTER_PCI_TABLE(gpu_cuda, pci_id_cuda_map); 1306 RTE_PMD_REGISTER_KMOD_DEP(gpu_cuda, "* nvidia & (nv_peer_mem | nvpeer_mem)"); 1307