1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2021 NVIDIA Corporation & Affiliates 3 */ 4 5 #include <dlfcn.h> 6 7 #include <rte_malloc.h> 8 #include <rte_pci.h> 9 #include <rte_bus_pci.h> 10 #include <rte_byteorder.h> 11 #include <rte_dev.h> 12 13 #include <gpudev_driver.h> 14 15 #include <cuda.h> 16 #include <cudaTypedefs.h> 17 18 #include "common.h" 19 #include "devices.h" 20 21 #define CUDA_DRIVER_MIN_VERSION 11040 22 #define CUDA_API_MIN_VERSION 3020 23 24 /* CUDA Driver functions loaded with dlsym() */ 25 static CUresult CUDAAPI (*sym_cuInit)(unsigned int flags); 26 static CUresult CUDAAPI (*sym_cuDriverGetVersion)(int *driverVersion); 27 static CUresult CUDAAPI (*sym_cuGetProcAddress)(const char *symbol, 28 void **pfn, int cudaVersion, uint64_t flags); 29 30 /* CUDA Driver functions loaded with cuGetProcAddress for versioning */ 31 static PFN_cuGetErrorString pfn_cuGetErrorString; 32 static PFN_cuGetErrorName pfn_cuGetErrorName; 33 static PFN_cuPointerSetAttribute pfn_cuPointerSetAttribute; 34 static PFN_cuDeviceGetAttribute pfn_cuDeviceGetAttribute; 35 static PFN_cuDeviceGetByPCIBusId pfn_cuDeviceGetByPCIBusId; 36 static PFN_cuDevicePrimaryCtxRetain pfn_cuDevicePrimaryCtxRetain; 37 static PFN_cuDevicePrimaryCtxRelease pfn_cuDevicePrimaryCtxRelease; 38 static PFN_cuDeviceTotalMem pfn_cuDeviceTotalMem; 39 static PFN_cuDeviceGetName pfn_cuDeviceGetName; 40 static PFN_cuCtxGetApiVersion pfn_cuCtxGetApiVersion; 41 static PFN_cuCtxSetCurrent pfn_cuCtxSetCurrent; 42 static PFN_cuCtxGetCurrent pfn_cuCtxGetCurrent; 43 static PFN_cuCtxGetDevice pfn_cuCtxGetDevice; 44 static PFN_cuCtxGetExecAffinity pfn_cuCtxGetExecAffinity; 45 static PFN_cuMemAlloc pfn_cuMemAlloc; 46 static PFN_cuMemFree pfn_cuMemFree; 47 static PFN_cuMemHostRegister pfn_cuMemHostRegister; 48 static PFN_cuMemHostUnregister pfn_cuMemHostUnregister; 49 static PFN_cuMemHostGetDevicePointer pfn_cuMemHostGetDevicePointer; 50 static PFN_cuFlushGPUDirectRDMAWrites pfn_cuFlushGPUDirectRDMAWrites; 51 52 static void *cudalib; 53 static unsigned int cuda_api_version; 54 static int cuda_driver_version; 55 static gdr_t gdrc_h; 56 57 #define CUDA_MAX_ALLOCATION_NUM 512 58 59 #define GPU_PAGE_SHIFT 16 60 #define GPU_PAGE_SIZE (1UL << GPU_PAGE_SHIFT) 61 62 RTE_LOG_REGISTER_DEFAULT(cuda_logtype, NOTICE); 63 64 /* NVIDIA GPU address map */ 65 static const struct rte_pci_id pci_id_cuda_map[] = { 66 { 67 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 68 NVIDIA_GPU_A40_DEVICE_ID) 69 }, 70 { 71 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 72 NVIDIA_GPU_A30_24GB_DEVICE_ID) 73 }, 74 { 75 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 76 NVIDIA_GPU_A30X_24GB_DPU_DEVICE_ID) 77 }, 78 { 79 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 80 NVIDIA_GPU_A10_24GB_DEVICE_ID) 81 }, 82 { 83 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 84 NVIDIA_GPU_A10G_DEVICE_ID) 85 }, 86 { 87 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 88 NVIDIA_GPU_A10M_DEVICE_ID) 89 }, 90 { 91 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 92 NVIDIA_GPU_A100_40GB_SXM4_DEVICE_ID) 93 }, 94 { 95 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 96 NVIDIA_GPU_A100_40GB_PCIE_DEVICE_ID) 97 }, 98 { 99 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 100 NVIDIA_GPU_A100_80GB_SXM4_DEVICE_ID) 101 }, 102 { 103 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 104 NVIDIA_GPU_A100_80GB_PCIE_DEVICE_ID) 105 }, 106 { 107 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 108 NVIDIA_GPU_A100X_80GB_DPU_DEVICE_ID) 109 }, 110 { 111 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 112 NVIDIA_GPU_GA100_PG506_207) 113 }, 114 { 115 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 116 NVIDIA_GPU_GA100_PCIE) 117 }, 118 { 119 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 120 NVIDIA_GPU_GA100_PG506_217) 121 }, 122 { 123 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 124 NVIDIA_GPU_V100_16GB_SXM2_DEVICE_ID) 125 }, 126 { 127 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 128 NVIDIA_GPU_V100_16GB_DGXS_DEVICE_ID) 129 }, 130 { 131 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 132 NVIDIA_GPU_V100_16GB_FHHL_DEVICE_ID) 133 }, 134 { 135 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 136 NVIDIA_GPU_V100_16GB_PCIE_DEVICE_ID) 137 }, 138 { 139 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 140 NVIDIA_GPU_V100_32GB_SXM2_DEVICE_ID) 141 }, 142 { 143 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 144 NVIDIA_GPU_V100_32GB_PCIE_DEVICE_ID) 145 }, 146 { 147 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 148 NVIDIA_GPU_V100_32GB_DGXS_DEVICE_ID) 149 }, 150 { 151 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 152 NVIDIA_GPU_V100_32GB_SXM3_DEVICE_ID) 153 }, 154 { 155 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 156 NVIDIA_GPU_V100_32GB_SXM3_H_DEVICE_ID) 157 }, 158 { 159 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 160 NVIDIA_GPU_V100_SXM2) 161 }, 162 { 163 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 164 NVIDIA_GPU_V100S_PCIE) 165 }, 166 { 167 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 168 NVIDIA_GPU_TITAN_V_CEO_ED) 169 }, 170 { 171 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 172 NVIDIA_GPU_GV100GL_PG500_216) 173 }, 174 { 175 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 176 NVIDIA_GPU_GV100GL_PG503_216) 177 }, 178 { 179 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 180 NVIDIA_GPU_TU102_TITAN_RTX) 181 }, 182 { 183 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 184 NVIDIA_GPU_TU102GL_QUADRO_RTX) 185 }, 186 { 187 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 188 NVIDIA_GPU_GV100_QUADRO_DEVICE_ID) 189 }, 190 { 191 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 192 NVIDIA_GPU_QUADRO_RTX_4000) 193 }, 194 { 195 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 196 NVIDIA_GPU_QUADRO_RTX_5000) 197 }, 198 { 199 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 200 NVIDIA_GPU_QUADRO_RTX_6000) 201 }, 202 { 203 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 204 NVIDIA_GPU_QUADRO_RTX_8000) 205 }, 206 { 207 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 208 NVIDIA_GPU_QUADRO_RTX_A4000) 209 }, 210 { 211 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 212 NVIDIA_GPU_QUADRO_RTX_A6000) 213 }, 214 { 215 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 216 NVIDIA_GPU_QUADRO_RTX_A5000) 217 }, 218 { 219 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 220 NVIDIA_GPU_QUADRO_RTX_A4500) 221 }, 222 { 223 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 224 NVIDIA_GPU_QUADRO_RTX_A5500) 225 }, 226 { 227 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 228 NVIDIA_GPU_QUADRO_RTX_A2000) 229 }, 230 { 231 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 232 NVIDIA_GPU_QUADRO_RTX_A2000_12GB) 233 }, 234 { 235 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 236 NVIDIA_GPU_T4G) 237 }, 238 { 239 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 240 NVIDIA_GPU_T4) 241 }, 242 { 243 .device_id = 0 244 } 245 }; 246 247 /* Device private info */ 248 struct cuda_info { 249 char gpu_name[RTE_DEV_NAME_MAX_LEN]; 250 CUdevice cu_dev; 251 int gdr_supported; 252 int gdr_write_ordering; 253 int gdr_flush_type; 254 }; 255 256 /* Type of memory allocated by CUDA driver */ 257 enum mem_type { 258 GPU_MEM = 0, 259 CPU_REGISTERED, 260 GPU_REGISTERED /* Not used yet */ 261 }; 262 263 /* key associated to a memory address */ 264 typedef uintptr_t cuda_ptr_key; 265 266 /* Single entry of the memory list */ 267 struct mem_entry { 268 CUdeviceptr ptr_d; 269 CUdeviceptr ptr_orig_d; 270 void *ptr_h; 271 size_t size; 272 size_t size_orig; 273 struct rte_gpu *dev; 274 CUcontext ctx; 275 cuda_ptr_key pkey; 276 enum mem_type mtype; 277 gdr_mh_t mh; 278 struct mem_entry *prev; 279 struct mem_entry *next; 280 }; 281 282 static struct mem_entry *mem_alloc_list_head; 283 static struct mem_entry *mem_alloc_list_tail; 284 static uint32_t mem_alloc_list_last_elem; 285 286 /* Load the CUDA symbols */ 287 288 static int 289 cuda_loader(void) 290 { 291 char cuda_path[1024]; 292 293 if (getenv("CUDA_PATH_L") == NULL) 294 snprintf(cuda_path, 1024, "%s", "libcuda.so"); 295 else 296 snprintf(cuda_path, 1024, "%s/%s", getenv("CUDA_PATH_L"), "libcuda.so"); 297 298 cudalib = dlopen(cuda_path, RTLD_LAZY); 299 if (cudalib == NULL) { 300 rte_cuda_log(ERR, "Failed to find CUDA library in %s (CUDA_PATH_L=%s)", 301 cuda_path, getenv("CUDA_PATH_L")); 302 return -1; 303 } 304 305 return 0; 306 } 307 308 static int 309 cuda_sym_func_loader(void) 310 { 311 if (cudalib == NULL) 312 return -1; 313 314 sym_cuInit = dlsym(cudalib, "cuInit"); 315 if (sym_cuInit == NULL) { 316 rte_cuda_log(ERR, "Failed to load CUDA missing symbol cuInit"); 317 return -1; 318 } 319 320 sym_cuDriverGetVersion = dlsym(cudalib, "cuDriverGetVersion"); 321 if (sym_cuDriverGetVersion == NULL) { 322 rte_cuda_log(ERR, "Failed to load CUDA missing symbol cuDriverGetVersion"); 323 return -1; 324 } 325 326 sym_cuGetProcAddress = dlsym(cudalib, "cuGetProcAddress"); 327 if (sym_cuGetProcAddress == NULL) { 328 rte_cuda_log(ERR, "Failed to load CUDA missing symbol cuGetProcAddress"); 329 return -1; 330 } 331 332 return 0; 333 } 334 335 static int 336 cuda_pfn_func_loader(void) 337 { 338 CUresult res; 339 340 res = sym_cuGetProcAddress("cuGetErrorString", 341 (void **) (&pfn_cuGetErrorString), cuda_driver_version, 0); 342 if (res != 0) { 343 rte_cuda_log(ERR, "Retrieve pfn_cuGetErrorString failed with %d", res); 344 return -1; 345 } 346 347 res = sym_cuGetProcAddress("cuGetErrorName", 348 (void **)(&pfn_cuGetErrorName), cuda_driver_version, 0); 349 if (res != 0) { 350 rte_cuda_log(ERR, "Retrieve pfn_cuGetErrorName failed with %d", res); 351 return -1; 352 } 353 354 res = sym_cuGetProcAddress("cuPointerSetAttribute", 355 (void **)(&pfn_cuPointerSetAttribute), cuda_driver_version, 0); 356 if (res != 0) { 357 rte_cuda_log(ERR, "Retrieve pfn_cuPointerSetAttribute failed with %d", res); 358 return -1; 359 } 360 361 res = sym_cuGetProcAddress("cuDeviceGetAttribute", 362 (void **)(&pfn_cuDeviceGetAttribute), cuda_driver_version, 0); 363 if (res != 0) { 364 rte_cuda_log(ERR, "Retrieve pfn_cuDeviceGetAttribute failed with %d", res); 365 return -1; 366 } 367 368 res = sym_cuGetProcAddress("cuDeviceGetByPCIBusId", 369 (void **)(&pfn_cuDeviceGetByPCIBusId), cuda_driver_version, 0); 370 if (res != 0) { 371 rte_cuda_log(ERR, "Retrieve pfn_cuDeviceGetByPCIBusId failed with %d", res); 372 return -1; 373 } 374 375 res = sym_cuGetProcAddress("cuDeviceGetName", 376 (void **)(&pfn_cuDeviceGetName), cuda_driver_version, 0); 377 if (res != 0) { 378 rte_cuda_log(ERR, "Retrieve pfn_cuDeviceGetName failed with %d", res); 379 return -1; 380 } 381 382 res = sym_cuGetProcAddress("cuDevicePrimaryCtxRetain", 383 (void **)(&pfn_cuDevicePrimaryCtxRetain), cuda_driver_version, 0); 384 if (res != 0) { 385 rte_cuda_log(ERR, "Retrieve pfn_cuDevicePrimaryCtxRetain failed with %d", res); 386 return -1; 387 } 388 389 res = sym_cuGetProcAddress("cuDevicePrimaryCtxRelease", 390 (void **)(&pfn_cuDevicePrimaryCtxRelease), cuda_driver_version, 0); 391 if (res != 0) { 392 rte_cuda_log(ERR, "Retrieve pfn_cuDevicePrimaryCtxRelease failed with %d", res); 393 return -1; 394 } 395 396 res = sym_cuGetProcAddress("cuDeviceTotalMem", 397 (void **)(&pfn_cuDeviceTotalMem), cuda_driver_version, 0); 398 if (res != 0) { 399 rte_cuda_log(ERR, "Retrieve pfn_cuDeviceTotalMem failed with %d", res); 400 return -1; 401 } 402 403 res = sym_cuGetProcAddress("cuCtxGetApiVersion", 404 (void **)(&pfn_cuCtxGetApiVersion), cuda_driver_version, 0); 405 if (res != 0) { 406 rte_cuda_log(ERR, "Retrieve pfn_cuCtxGetApiVersion failed with %d", res); 407 return -1; 408 } 409 410 res = sym_cuGetProcAddress("cuCtxGetDevice", 411 (void **)(&pfn_cuCtxGetDevice), cuda_driver_version, 0); 412 if (res != 0) { 413 rte_cuda_log(ERR, "Retrieve pfn_cuCtxGetDevice failed with %d", res); 414 return -1; 415 } 416 417 res = sym_cuGetProcAddress("cuCtxSetCurrent", 418 (void **)(&pfn_cuCtxSetCurrent), cuda_driver_version, 0); 419 if (res != 0) { 420 rte_cuda_log(ERR, "Retrieve pfn_cuCtxSetCurrent failed with %d", res); 421 return -1; 422 } 423 424 res = sym_cuGetProcAddress("cuCtxGetCurrent", 425 (void **)(&pfn_cuCtxGetCurrent), cuda_driver_version, 0); 426 if (res != 0) { 427 rte_cuda_log(ERR, "Retrieve pfn_cuCtxGetCurrent failed with %d", res); 428 return -1; 429 } 430 431 res = sym_cuGetProcAddress("cuCtxGetExecAffinity", 432 (void **)(&pfn_cuCtxGetExecAffinity), cuda_driver_version, 0); 433 if (res != 0) { 434 rte_cuda_log(ERR, "Retrieve pfn_cuCtxGetExecAffinity failed with %d", res); 435 return -1; 436 } 437 438 res = sym_cuGetProcAddress("cuMemAlloc", 439 (void **)(&pfn_cuMemAlloc), cuda_driver_version, 0); 440 if (res != 0) { 441 rte_cuda_log(ERR, "Retrieve pfn_cuMemAlloc failed with %d", res); 442 return -1; 443 } 444 445 res = sym_cuGetProcAddress("cuMemFree", 446 (void **)(&pfn_cuMemFree), cuda_driver_version, 0); 447 if (res != 0) { 448 rte_cuda_log(ERR, "Retrieve pfn_cuMemFree failed with %d", res); 449 return -1; 450 } 451 452 res = sym_cuGetProcAddress("cuMemHostRegister", 453 (void **)(&pfn_cuMemHostRegister), cuda_driver_version, 0); 454 if (res != 0) { 455 rte_cuda_log(ERR, "Retrieve pfn_cuMemHostRegister failed with %d", res); 456 return -1; 457 } 458 459 res = sym_cuGetProcAddress("cuMemHostUnregister", 460 (void **)(&pfn_cuMemHostUnregister), cuda_driver_version, 0); 461 if (res != 0) { 462 rte_cuda_log(ERR, "Retrieve pfn_cuMemHostUnregister failed with %d", res); 463 return -1; 464 } 465 466 res = sym_cuGetProcAddress("cuMemHostGetDevicePointer", 467 (void **)(&pfn_cuMemHostGetDevicePointer), cuda_driver_version, 0); 468 if (res != 0) { 469 rte_cuda_log(ERR, "Retrieve pfn_cuMemHostGetDevicePointer failed with %d", res); 470 return -1; 471 } 472 473 res = sym_cuGetProcAddress("cuFlushGPUDirectRDMAWrites", 474 (void **)(&pfn_cuFlushGPUDirectRDMAWrites), cuda_driver_version, 0); 475 if (res != 0) { 476 rte_cuda_log(ERR, "Retrieve cuFlushGPUDirectRDMAWrites failed with %d", res); 477 return -1; 478 } 479 480 return 0; 481 } 482 483 /* Generate a key from a memory pointer */ 484 static cuda_ptr_key 485 get_hash_from_ptr(void *ptr) 486 { 487 return (uintptr_t)ptr; 488 } 489 490 static uint32_t 491 mem_list_count_item(void) 492 { 493 return mem_alloc_list_last_elem; 494 } 495 496 /* Initiate list of memory allocations if not done yet */ 497 static struct mem_entry * 498 mem_list_add_item(void) 499 { 500 /* Initiate list of memory allocations if not done yet */ 501 if (mem_alloc_list_head == NULL) { 502 mem_alloc_list_head = rte_zmalloc(NULL, 503 sizeof(struct mem_entry), 504 RTE_CACHE_LINE_SIZE); 505 if (mem_alloc_list_head == NULL) { 506 rte_cuda_log(ERR, "Failed to allocate memory for memory list"); 507 return NULL; 508 } 509 510 mem_alloc_list_head->next = NULL; 511 mem_alloc_list_head->prev = NULL; 512 mem_alloc_list_tail = mem_alloc_list_head; 513 } else { 514 struct mem_entry *mem_alloc_list_cur = rte_zmalloc(NULL, 515 sizeof(struct mem_entry), 516 RTE_CACHE_LINE_SIZE); 517 518 if (mem_alloc_list_cur == NULL) { 519 rte_cuda_log(ERR, "Failed to allocate memory for memory list"); 520 return NULL; 521 } 522 523 mem_alloc_list_tail->next = mem_alloc_list_cur; 524 mem_alloc_list_cur->prev = mem_alloc_list_tail; 525 mem_alloc_list_tail = mem_alloc_list_tail->next; 526 mem_alloc_list_tail->next = NULL; 527 } 528 529 mem_alloc_list_last_elem++; 530 531 return mem_alloc_list_tail; 532 } 533 534 static struct mem_entry * 535 mem_list_find_item(cuda_ptr_key pk) 536 { 537 struct mem_entry *mem_alloc_list_cur = NULL; 538 539 if (mem_alloc_list_head == NULL) { 540 rte_cuda_log(ERR, "Memory list doesn't exist"); 541 return NULL; 542 } 543 544 if (mem_list_count_item() == 0) { 545 rte_cuda_log(ERR, "No items in memory list"); 546 return NULL; 547 } 548 549 mem_alloc_list_cur = mem_alloc_list_head; 550 551 while (mem_alloc_list_cur != NULL) { 552 if (mem_alloc_list_cur->pkey == pk) 553 return mem_alloc_list_cur; 554 mem_alloc_list_cur = mem_alloc_list_cur->next; 555 } 556 557 return mem_alloc_list_cur; 558 } 559 560 static int 561 mem_list_del_item(cuda_ptr_key pk) 562 { 563 struct mem_entry *mem_alloc_list_cur = NULL; 564 565 mem_alloc_list_cur = mem_list_find_item(pk); 566 if (mem_alloc_list_cur == NULL) 567 return -EINVAL; 568 569 /* if key is in head */ 570 if (mem_alloc_list_cur->prev == NULL) { 571 mem_alloc_list_head = mem_alloc_list_cur->next; 572 if (mem_alloc_list_head != NULL) 573 mem_alloc_list_head->prev = NULL; 574 } else { 575 mem_alloc_list_cur->prev->next = mem_alloc_list_cur->next; 576 if (mem_alloc_list_cur->next != NULL) 577 mem_alloc_list_cur->next->prev = mem_alloc_list_cur->prev; 578 } 579 580 rte_free(mem_alloc_list_cur); 581 582 mem_alloc_list_last_elem--; 583 584 return 0; 585 } 586 587 static int 588 cuda_dev_info_get(struct rte_gpu *dev, struct rte_gpu_info *info) 589 { 590 int ret = 0; 591 CUresult res; 592 struct rte_gpu_info parent_info; 593 CUexecAffinityParam affinityPrm; 594 const char *err_string; 595 struct cuda_info *private; 596 CUcontext current_ctx; 597 CUcontext input_ctx; 598 599 if (dev == NULL) { 600 rte_errno = ENODEV; 601 return -rte_errno; 602 } 603 604 /* Child initialization time probably called by rte_gpu_add_child() */ 605 if (dev->mpshared->info.parent != RTE_GPU_ID_NONE && 606 dev->mpshared->dev_private == NULL) { 607 /* Store current ctx */ 608 res = pfn_cuCtxGetCurrent(¤t_ctx); 609 if (res != 0) { 610 pfn_cuGetErrorString(res, &(err_string)); 611 rte_cuda_log(ERR, "cuCtxGetCurrent failed with %s", 612 err_string); 613 rte_errno = EPERM; 614 return -rte_errno; 615 } 616 617 /* Set child ctx as current ctx */ 618 input_ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 619 res = pfn_cuCtxSetCurrent(input_ctx); 620 if (res != 0) { 621 pfn_cuGetErrorString(res, &(err_string)); 622 rte_cuda_log(ERR, "cuCtxSetCurrent input failed with %s", 623 err_string); 624 rte_errno = EPERM; 625 return -rte_errno; 626 } 627 628 /* 629 * Ctx capacity info 630 */ 631 632 /* MPS compatible */ 633 res = pfn_cuCtxGetExecAffinity(&affinityPrm, 634 CU_EXEC_AFFINITY_TYPE_SM_COUNT); 635 if (res != 0) { 636 pfn_cuGetErrorString(res, &(err_string)); 637 rte_cuda_log(ERR, "cuCtxGetExecAffinity failed with %s", 638 err_string); 639 } 640 dev->mpshared->info.processor_count = 641 (uint32_t)affinityPrm.param.smCount.val; 642 643 ret = rte_gpu_info_get(dev->mpshared->info.parent, &parent_info); 644 if (ret) { 645 rte_errno = ENODEV; 646 return -rte_errno; 647 } 648 dev->mpshared->info.total_memory = parent_info.total_memory; 649 650 dev->mpshared->info.page_size = parent_info.page_size; 651 652 /* 653 * GPU Device private info 654 */ 655 dev->mpshared->dev_private = rte_zmalloc(NULL, 656 sizeof(struct cuda_info), 657 RTE_CACHE_LINE_SIZE); 658 if (dev->mpshared->dev_private == NULL) { 659 rte_cuda_log(ERR, "Failed to allocate memory for GPU process private"); 660 rte_errno = EPERM; 661 return -rte_errno; 662 } 663 664 private = (struct cuda_info *)dev->mpshared->dev_private; 665 666 res = pfn_cuCtxGetDevice(&(private->cu_dev)); 667 if (res != 0) { 668 pfn_cuGetErrorString(res, &(err_string)); 669 rte_cuda_log(ERR, "cuCtxGetDevice failed with %s", 670 err_string); 671 rte_errno = EPERM; 672 return -rte_errno; 673 } 674 675 res = pfn_cuDeviceGetName(private->gpu_name, 676 RTE_DEV_NAME_MAX_LEN, private->cu_dev); 677 if (res != 0) { 678 pfn_cuGetErrorString(res, &(err_string)); 679 rte_cuda_log(ERR, "cuDeviceGetName failed with %s", 680 err_string); 681 rte_errno = EPERM; 682 return -rte_errno; 683 } 684 685 /* Restore original ctx as current ctx */ 686 res = pfn_cuCtxSetCurrent(current_ctx); 687 if (res != 0) { 688 pfn_cuGetErrorString(res, &(err_string)); 689 rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 690 err_string); 691 rte_errno = EPERM; 692 return -rte_errno; 693 } 694 } 695 696 *info = dev->mpshared->info; 697 698 return 0; 699 } 700 701 /* 702 * GPU Memory 703 */ 704 705 static int 706 cuda_mem_alloc(struct rte_gpu *dev, size_t size, unsigned int align, void **ptr) 707 { 708 CUresult res; 709 const char *err_string; 710 CUcontext current_ctx; 711 CUcontext input_ctx; 712 unsigned int flag = 1; 713 714 if (dev == NULL) 715 return -ENODEV; 716 717 /* Store current ctx */ 718 res = pfn_cuCtxGetCurrent(¤t_ctx); 719 if (res != 0) { 720 pfn_cuGetErrorString(res, &(err_string)); 721 rte_cuda_log(ERR, "cuCtxGetCurrent failed with %s", 722 err_string); 723 rte_errno = EPERM; 724 return -rte_errno; 725 } 726 727 /* Set child ctx as current ctx */ 728 input_ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 729 res = pfn_cuCtxSetCurrent(input_ctx); 730 if (res != 0) { 731 pfn_cuGetErrorString(res, &(err_string)); 732 rte_cuda_log(ERR, "cuCtxSetCurrent input failed with %s", 733 err_string); 734 rte_errno = EPERM; 735 return -rte_errno; 736 } 737 738 /* Get next memory list item */ 739 mem_alloc_list_tail = mem_list_add_item(); 740 if (mem_alloc_list_tail == NULL) { 741 rte_errno = EPERM; 742 return -rte_errno; 743 } 744 745 /* Allocate memory */ 746 mem_alloc_list_tail->size = size; 747 mem_alloc_list_tail->size_orig = size + align; 748 749 res = pfn_cuMemAlloc(&(mem_alloc_list_tail->ptr_orig_d), 750 mem_alloc_list_tail->size_orig); 751 if (res != 0) { 752 pfn_cuGetErrorString(res, &(err_string)); 753 rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 754 err_string); 755 rte_errno = EPERM; 756 return -rte_errno; 757 } 758 759 /* Align memory address */ 760 mem_alloc_list_tail->ptr_d = mem_alloc_list_tail->ptr_orig_d; 761 if (align && ((uintptr_t)mem_alloc_list_tail->ptr_d) % align) 762 mem_alloc_list_tail->ptr_d += (align - 763 (((uintptr_t)mem_alloc_list_tail->ptr_d) % align)); 764 765 /* GPUDirect RDMA attribute required */ 766 res = pfn_cuPointerSetAttribute(&flag, 767 CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, 768 mem_alloc_list_tail->ptr_d); 769 if (res != 0) { 770 rte_cuda_log(ERR, "Could not set SYNC MEMOP attribute for " 771 "GPU memory at %"PRIu32", err %d", 772 (uint32_t)mem_alloc_list_tail->ptr_d, res); 773 rte_errno = EPERM; 774 return -rte_errno; 775 } 776 777 mem_alloc_list_tail->pkey = get_hash_from_ptr((void *)mem_alloc_list_tail->ptr_d); 778 mem_alloc_list_tail->ptr_h = NULL; 779 mem_alloc_list_tail->dev = dev; 780 mem_alloc_list_tail->ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 781 mem_alloc_list_tail->mtype = GPU_MEM; 782 783 /* Restore original ctx as current ctx */ 784 res = pfn_cuCtxSetCurrent(current_ctx); 785 if (res != 0) { 786 pfn_cuGetErrorString(res, &(err_string)); 787 rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 788 err_string); 789 rte_errno = EPERM; 790 return -rte_errno; 791 } 792 793 *ptr = (void *)mem_alloc_list_tail->ptr_d; 794 795 return 0; 796 } 797 798 static int 799 cuda_mem_register(struct rte_gpu *dev, size_t size, void *ptr) 800 { 801 CUresult res; 802 const char *err_string; 803 CUcontext current_ctx; 804 CUcontext input_ctx; 805 unsigned int flag = 1; 806 int use_ptr_h = 0; 807 808 if (dev == NULL) 809 return -ENODEV; 810 811 /* Store current ctx */ 812 res = pfn_cuCtxGetCurrent(¤t_ctx); 813 if (res != 0) { 814 pfn_cuGetErrorString(res, &(err_string)); 815 rte_cuda_log(ERR, "cuCtxGetCurrent failed with %s", 816 err_string); 817 rte_errno = EPERM; 818 return -rte_errno; 819 } 820 821 /* Set child ctx as current ctx */ 822 input_ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 823 res = pfn_cuCtxSetCurrent(input_ctx); 824 if (res != 0) { 825 pfn_cuGetErrorString(res, &(err_string)); 826 rte_cuda_log(ERR, "cuCtxSetCurrent input failed with %s", 827 err_string); 828 rte_errno = EPERM; 829 return -rte_errno; 830 } 831 832 /* Get next memory list item */ 833 mem_alloc_list_tail = mem_list_add_item(); 834 if (mem_alloc_list_tail == NULL) { 835 rte_errno = EPERM; 836 return -rte_errno; 837 } 838 839 /* Allocate memory */ 840 mem_alloc_list_tail->size = size; 841 mem_alloc_list_tail->ptr_h = ptr; 842 843 res = pfn_cuMemHostRegister(mem_alloc_list_tail->ptr_h, 844 mem_alloc_list_tail->size, 845 CU_MEMHOSTREGISTER_PORTABLE | 846 CU_MEMHOSTREGISTER_DEVICEMAP); 847 if (res != 0) { 848 pfn_cuGetErrorString(res, &(err_string)); 849 rte_cuda_log(ERR, "cuMemHostRegister failed with %s ptr %p size %zd", 850 err_string, 851 mem_alloc_list_tail->ptr_h, 852 mem_alloc_list_tail->size); 853 rte_errno = EPERM; 854 return -rte_errno; 855 } 856 857 res = pfn_cuDeviceGetAttribute(&(use_ptr_h), 858 CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM, 859 ((struct cuda_info *)(dev->mpshared->dev_private))->cu_dev); 860 if (res != 0) { 861 pfn_cuGetErrorString(res, &(err_string)); 862 rte_cuda_log(ERR, "cuDeviceGetAttribute failed with %s", 863 err_string); 864 rte_errno = EPERM; 865 return -rte_errno; 866 } 867 868 if (use_ptr_h == 0) { 869 res = pfn_cuMemHostGetDevicePointer(&(mem_alloc_list_tail->ptr_d), 870 mem_alloc_list_tail->ptr_h, 0); 871 if (res != 0) { 872 pfn_cuGetErrorString(res, &(err_string)); 873 rte_cuda_log(ERR, "cuMemHostGetDevicePointer failed with %s", 874 err_string); 875 rte_errno = EPERM; 876 return -rte_errno; 877 } 878 879 if ((uintptr_t)mem_alloc_list_tail->ptr_d != 880 (uintptr_t)mem_alloc_list_tail->ptr_h) { 881 rte_cuda_log(ERR, "Host input pointer is different wrt GPU registered pointer"); 882 rte_errno = ENOTSUP; 883 return -rte_errno; 884 } 885 } else { 886 mem_alloc_list_tail->ptr_d = (CUdeviceptr)mem_alloc_list_tail->ptr_h; 887 } 888 889 /* GPUDirect RDMA attribute required */ 890 res = pfn_cuPointerSetAttribute(&flag, 891 CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, 892 mem_alloc_list_tail->ptr_d); 893 if (res != 0) { 894 rte_cuda_log(ERR, "Could not set SYNC MEMOP attribute for GPU memory at %"PRIu32 895 ", err %d", (uint32_t)mem_alloc_list_tail->ptr_d, res); 896 rte_errno = EPERM; 897 return -rte_errno; 898 } 899 900 mem_alloc_list_tail->pkey = get_hash_from_ptr((void *)mem_alloc_list_tail->ptr_h); 901 mem_alloc_list_tail->size = size; 902 mem_alloc_list_tail->dev = dev; 903 mem_alloc_list_tail->ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 904 mem_alloc_list_tail->mtype = CPU_REGISTERED; 905 mem_alloc_list_tail->ptr_orig_d = mem_alloc_list_tail->ptr_d; 906 907 /* Restore original ctx as current ctx */ 908 res = pfn_cuCtxSetCurrent(current_ctx); 909 if (res != 0) { 910 pfn_cuGetErrorString(res, &(err_string)); 911 rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 912 err_string); 913 rte_errno = EPERM; 914 return -rte_errno; 915 } 916 917 return 0; 918 } 919 920 static int 921 cuda_mem_cpu_map(struct rte_gpu *dev, __rte_unused size_t size, void *ptr_in, void **ptr_out) 922 { 923 struct mem_entry *mem_item; 924 cuda_ptr_key hk; 925 926 if (dev == NULL) 927 return -ENODEV; 928 929 hk = get_hash_from_ptr((void *)ptr_in); 930 931 mem_item = mem_list_find_item(hk); 932 if (mem_item == NULL) { 933 rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory.", ptr_in); 934 rte_errno = EPERM; 935 return -rte_errno; 936 } 937 938 if (mem_item->mtype != GPU_MEM) { 939 rte_cuda_log(ERR, "Memory address 0x%p is not GPU memory type.", ptr_in); 940 rte_errno = EPERM; 941 return -rte_errno; 942 } 943 944 if (mem_item->size != size) 945 rte_cuda_log(WARNING, 946 "Can't expose memory area with size (%zd) different from original size (%zd).", 947 size, mem_item->size); 948 949 if (gdrcopy_pin(&gdrc_h, &(mem_item->mh), (uint64_t)mem_item->ptr_d, 950 mem_item->size, &(mem_item->ptr_h))) { 951 rte_cuda_log(ERR, "Error exposing GPU memory address 0x%p.", ptr_in); 952 rte_errno = EPERM; 953 return -rte_errno; 954 } 955 956 *ptr_out = mem_item->ptr_h; 957 958 return 0; 959 } 960 961 static int 962 cuda_mem_free(struct rte_gpu *dev, void *ptr) 963 { 964 CUresult res; 965 struct mem_entry *mem_item; 966 const char *err_string; 967 cuda_ptr_key hk; 968 969 if (dev == NULL) 970 return -ENODEV; 971 972 hk = get_hash_from_ptr((void *)ptr); 973 974 mem_item = mem_list_find_item(hk); 975 if (mem_item == NULL) { 976 rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory", ptr); 977 rte_errno = EPERM; 978 return -rte_errno; 979 } 980 981 if (mem_item->mtype == GPU_MEM) { 982 res = pfn_cuMemFree(mem_item->ptr_orig_d); 983 if (res != 0) { 984 pfn_cuGetErrorString(res, &(err_string)); 985 rte_cuda_log(ERR, "cuMemFree current failed with %s", 986 err_string); 987 rte_errno = EPERM; 988 return -rte_errno; 989 } 990 991 return mem_list_del_item(hk); 992 } 993 994 rte_cuda_log(ERR, "Memory type %d not supported", mem_item->mtype); 995 996 return -EPERM; 997 } 998 999 static int 1000 cuda_mem_unregister(struct rte_gpu *dev, void *ptr) 1001 { 1002 CUresult res; 1003 struct mem_entry *mem_item; 1004 const char *err_string; 1005 cuda_ptr_key hk; 1006 1007 if (dev == NULL) 1008 return -ENODEV; 1009 1010 hk = get_hash_from_ptr((void *)ptr); 1011 1012 mem_item = mem_list_find_item(hk); 1013 if (mem_item == NULL) { 1014 rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory", ptr); 1015 rte_errno = EPERM; 1016 return -rte_errno; 1017 } 1018 1019 if (mem_item->mtype == CPU_REGISTERED) { 1020 res = pfn_cuMemHostUnregister(ptr); 1021 if (res != 0) { 1022 pfn_cuGetErrorString(res, &(err_string)); 1023 rte_cuda_log(ERR, "cuMemHostUnregister current failed with %s", 1024 err_string); 1025 rte_errno = EPERM; 1026 return -rte_errno; 1027 } 1028 1029 return mem_list_del_item(hk); 1030 } 1031 1032 rte_cuda_log(ERR, "Memory type %d not supported", mem_item->mtype); 1033 1034 rte_errno = EPERM; 1035 return -rte_errno; 1036 } 1037 1038 static int 1039 cuda_mem_cpu_unmap(struct rte_gpu *dev, void *ptr_in) 1040 { 1041 struct mem_entry *mem_item; 1042 cuda_ptr_key hk; 1043 1044 if (dev == NULL) 1045 return -ENODEV; 1046 1047 hk = get_hash_from_ptr((void *)ptr_in); 1048 1049 mem_item = mem_list_find_item(hk); 1050 if (mem_item == NULL) { 1051 rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory.", ptr_in); 1052 rte_errno = EPERM; 1053 return -rte_errno; 1054 } 1055 1056 if (gdrcopy_unpin(gdrc_h, mem_item->mh, (void *)mem_item->ptr_d, 1057 mem_item->size)) { 1058 rte_cuda_log(ERR, "Error unexposing GPU memory address 0x%p.", ptr_in); 1059 rte_errno = EPERM; 1060 return -rte_errno; 1061 } 1062 1063 return 0; 1064 } 1065 1066 static int 1067 cuda_dev_close(struct rte_gpu *dev) 1068 { 1069 if (dev == NULL) 1070 return -EINVAL; 1071 1072 rte_free(dev->mpshared->dev_private); 1073 1074 return 0; 1075 } 1076 1077 static int 1078 cuda_wmb(struct rte_gpu *dev) 1079 { 1080 CUresult res; 1081 const char *err_string; 1082 CUcontext current_ctx; 1083 CUcontext input_ctx; 1084 struct cuda_info *private; 1085 1086 if (dev == NULL) { 1087 rte_errno = ENODEV; 1088 return -rte_errno; 1089 } 1090 1091 private = (struct cuda_info *)dev->mpshared->dev_private; 1092 1093 if (private->gdr_write_ordering != CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE) { 1094 /* 1095 * No need to explicitly force the write ordering because 1096 * the device natively supports it 1097 */ 1098 return 0; 1099 } 1100 1101 if (private->gdr_flush_type != CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST) { 1102 /* 1103 * Can't flush GDR writes with cuFlushGPUDirectRDMAWrites CUDA function. 1104 * Application needs to use alternative methods. 1105 */ 1106 rte_cuda_log(WARNING, "Can't flush GDR writes with cuFlushGPUDirectRDMAWrites CUDA function." 1107 "Application needs to use alternative methods."); 1108 1109 rte_errno = ENOTSUP; 1110 return -rte_errno; 1111 } 1112 1113 /* Store current ctx */ 1114 res = pfn_cuCtxGetCurrent(¤t_ctx); 1115 if (res != 0) { 1116 pfn_cuGetErrorString(res, &(err_string)); 1117 rte_cuda_log(ERR, "cuCtxGetCurrent failed with %s", 1118 err_string); 1119 rte_errno = EPERM; 1120 return -rte_errno; 1121 } 1122 1123 /* Set child ctx as current ctx */ 1124 input_ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 1125 res = pfn_cuCtxSetCurrent(input_ctx); 1126 if (res != 0) { 1127 pfn_cuGetErrorString(res, &(err_string)); 1128 rte_cuda_log(ERR, "cuCtxSetCurrent input failed with %s", 1129 err_string); 1130 rte_errno = EPERM; 1131 return -rte_errno; 1132 } 1133 1134 res = pfn_cuFlushGPUDirectRDMAWrites(CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX, 1135 CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES); 1136 if (res != 0) { 1137 pfn_cuGetErrorString(res, &(err_string)); 1138 rte_cuda_log(ERR, "cuFlushGPUDirectRDMAWrites current failed with %s", 1139 err_string); 1140 rte_errno = EPERM; 1141 return -rte_errno; 1142 } 1143 1144 /* Restore original ctx as current ctx */ 1145 res = pfn_cuCtxSetCurrent(current_ctx); 1146 if (res != 0) { 1147 pfn_cuGetErrorString(res, &(err_string)); 1148 rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 1149 err_string); 1150 rte_errno = EPERM; 1151 return -rte_errno; 1152 } 1153 1154 return 0; 1155 } 1156 1157 static int 1158 cuda_gpu_probe(__rte_unused struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) 1159 { 1160 struct rte_gpu *dev = NULL; 1161 CUresult res; 1162 CUdevice cu_dev_id; 1163 CUcontext pctx; 1164 char dev_name[RTE_DEV_NAME_MAX_LEN]; 1165 const char *err_string; 1166 int processor_count = 0; 1167 struct cuda_info *private; 1168 1169 if (pci_dev == NULL) { 1170 rte_cuda_log(ERR, "NULL PCI device"); 1171 rte_errno = ENODEV; 1172 return -rte_errno; 1173 } 1174 1175 rte_pci_device_name(&pci_dev->addr, dev_name, sizeof(dev_name)); 1176 1177 /* Allocate memory to be used privately by drivers */ 1178 dev = rte_gpu_allocate(pci_dev->device.name); 1179 if (dev == NULL) { 1180 rte_errno = ENODEV; 1181 return -rte_errno; 1182 } 1183 1184 /* Initialize values only for the first CUDA driver call */ 1185 if (dev->mpshared->info.dev_id == 0) { 1186 mem_alloc_list_head = NULL; 1187 mem_alloc_list_tail = NULL; 1188 mem_alloc_list_last_elem = 0; 1189 1190 /* Load libcuda.so library */ 1191 if (cuda_loader()) { 1192 rte_cuda_log(ERR, "CUDA Driver library not found"); 1193 rte_errno = ENOTSUP; 1194 return -rte_errno; 1195 } 1196 1197 /* Load initial CUDA functions */ 1198 if (cuda_sym_func_loader()) { 1199 rte_cuda_log(ERR, "CUDA functions not found in library"); 1200 rte_errno = ENOTSUP; 1201 return -rte_errno; 1202 } 1203 1204 /* 1205 * Required to initialize the CUDA Driver. 1206 * Multiple calls of cuInit() will return immediately 1207 * without making any relevant change 1208 */ 1209 sym_cuInit(0); 1210 1211 res = sym_cuDriverGetVersion(&cuda_driver_version); 1212 if (res != 0) { 1213 rte_cuda_log(ERR, "cuDriverGetVersion failed with %d", res); 1214 rte_errno = ENOTSUP; 1215 return -rte_errno; 1216 } 1217 1218 if (cuda_driver_version < CUDA_DRIVER_MIN_VERSION) { 1219 rte_cuda_log(ERR, "CUDA Driver version found is %d. " 1220 "Minimum requirement is %d", 1221 cuda_driver_version, 1222 CUDA_DRIVER_MIN_VERSION); 1223 rte_errno = ENOTSUP; 1224 return -rte_errno; 1225 } 1226 1227 if (cuda_pfn_func_loader()) { 1228 rte_cuda_log(ERR, "CUDA PFN functions not found in library"); 1229 rte_errno = ENOTSUP; 1230 return -rte_errno; 1231 } 1232 1233 gdrc_h = NULL; 1234 } 1235 1236 /* Fill HW specific part of device structure */ 1237 dev->device = &pci_dev->device; 1238 dev->mpshared->info.numa_node = pci_dev->device.numa_node; 1239 1240 /* Get NVIDIA GPU Device descriptor */ 1241 res = pfn_cuDeviceGetByPCIBusId(&cu_dev_id, dev->device->name); 1242 if (res != 0) { 1243 pfn_cuGetErrorString(res, &(err_string)); 1244 rte_cuda_log(ERR, "cuDeviceGetByPCIBusId name %s failed with %d: %s", 1245 dev->device->name, res, err_string); 1246 rte_errno = EPERM; 1247 return -rte_errno; 1248 } 1249 1250 res = pfn_cuDevicePrimaryCtxRetain(&pctx, cu_dev_id); 1251 if (res != 0) { 1252 pfn_cuGetErrorString(res, &(err_string)); 1253 rte_cuda_log(ERR, "cuDevicePrimaryCtxRetain name %s failed with %d: %s", 1254 dev->device->name, res, err_string); 1255 rte_errno = EPERM; 1256 return -rte_errno; 1257 } 1258 1259 res = pfn_cuCtxGetApiVersion(pctx, &cuda_api_version); 1260 if (res != 0) { 1261 rte_cuda_log(ERR, "cuCtxGetApiVersion failed with %d", res); 1262 rte_errno = ENOTSUP; 1263 return -rte_errno; 1264 } 1265 1266 if (cuda_api_version < CUDA_API_MIN_VERSION) { 1267 rte_cuda_log(ERR, "CUDA API version found is %d Minimum requirement is %d", 1268 cuda_api_version, CUDA_API_MIN_VERSION); 1269 rte_errno = ENOTSUP; 1270 return -rte_errno; 1271 } 1272 1273 dev->mpshared->info.context = (uint64_t)pctx; 1274 1275 /* 1276 * GPU Device generic info 1277 */ 1278 1279 /* Processor count */ 1280 res = pfn_cuDeviceGetAttribute(&(processor_count), 1281 CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, 1282 cu_dev_id); 1283 if (res != 0) { 1284 pfn_cuGetErrorString(res, &(err_string)); 1285 rte_cuda_log(ERR, "cuDeviceGetAttribute failed with %s", 1286 err_string); 1287 rte_errno = EPERM; 1288 return -rte_errno; 1289 } 1290 dev->mpshared->info.processor_count = (uint32_t)processor_count; 1291 1292 /* Total memory */ 1293 res = pfn_cuDeviceTotalMem(&dev->mpshared->info.total_memory, cu_dev_id); 1294 if (res != 0) { 1295 pfn_cuGetErrorString(res, &(err_string)); 1296 rte_cuda_log(ERR, "cuDeviceTotalMem failed with %s", 1297 err_string); 1298 rte_errno = EPERM; 1299 return -rte_errno; 1300 } 1301 1302 dev->mpshared->info.page_size = (size_t)GPU_PAGE_SIZE; 1303 1304 /* 1305 * GPU Device private info 1306 */ 1307 dev->mpshared->dev_private = rte_zmalloc(NULL, 1308 sizeof(struct cuda_info), 1309 RTE_CACHE_LINE_SIZE); 1310 if (dev->mpshared->dev_private == NULL) { 1311 rte_cuda_log(ERR, "Failed to allocate memory for GPU process private"); 1312 rte_errno = EPERM; 1313 return -rte_errno; 1314 } 1315 1316 private = (struct cuda_info *)dev->mpshared->dev_private; 1317 private->cu_dev = cu_dev_id; 1318 res = pfn_cuDeviceGetName(private->gpu_name, 1319 RTE_DEV_NAME_MAX_LEN, 1320 cu_dev_id); 1321 if (res != 0) { 1322 pfn_cuGetErrorString(res, &(err_string)); 1323 rte_cuda_log(ERR, "cuDeviceGetName failed with %s", 1324 err_string); 1325 rte_errno = EPERM; 1326 return -rte_errno; 1327 } 1328 1329 res = pfn_cuDeviceGetAttribute(&(private->gdr_supported), 1330 CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED, 1331 cu_dev_id); 1332 if (res != 0) { 1333 pfn_cuGetErrorString(res, &(err_string)); 1334 rte_cuda_log(ERR, "cuDeviceGetAttribute failed with %s", 1335 err_string); 1336 rte_errno = EPERM; 1337 return -rte_errno; 1338 } 1339 1340 if (private->gdr_supported == 0) 1341 rte_cuda_log(WARNING, "GPU %s doesn't support GPUDirect RDMA", 1342 pci_dev->device.name); 1343 1344 res = pfn_cuDeviceGetAttribute(&(private->gdr_write_ordering), 1345 CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING, 1346 cu_dev_id); 1347 if (res != 0) { 1348 pfn_cuGetErrorString(res, &(err_string)); 1349 rte_cuda_log(ERR, 1350 "cuDeviceGetAttribute failed with %s", 1351 err_string); 1352 rte_errno = EPERM; 1353 return -rte_errno; 1354 } 1355 1356 if (private->gdr_write_ordering == CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE) { 1357 res = pfn_cuDeviceGetAttribute(&(private->gdr_flush_type), 1358 CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS, 1359 cu_dev_id); 1360 if (res != 0) { 1361 pfn_cuGetErrorString(res, &(err_string)); 1362 rte_cuda_log(ERR, "cuDeviceGetAttribute failed with %s", 1363 err_string); 1364 rte_errno = EPERM; 1365 return -rte_errno; 1366 } 1367 1368 if (private->gdr_flush_type != CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST) 1369 rte_cuda_log(ERR, "GPUDirect RDMA flush writes API is not supported"); 1370 } 1371 1372 dev->ops.dev_info_get = cuda_dev_info_get; 1373 dev->ops.dev_close = cuda_dev_close; 1374 dev->ops.mem_alloc = cuda_mem_alloc; 1375 dev->ops.mem_free = cuda_mem_free; 1376 dev->ops.mem_register = cuda_mem_register; 1377 dev->ops.mem_unregister = cuda_mem_unregister; 1378 dev->ops.mem_cpu_map = cuda_mem_cpu_map; 1379 dev->ops.mem_cpu_unmap = cuda_mem_cpu_unmap; 1380 dev->ops.wmb = cuda_wmb; 1381 1382 rte_gpu_complete_new(dev); 1383 1384 rte_cuda_debug("dev id = %u name = %s", 1385 dev->mpshared->info.dev_id, private->gpu_name); 1386 1387 return 0; 1388 } 1389 1390 static int 1391 cuda_gpu_remove(struct rte_pci_device *pci_dev) 1392 { 1393 struct rte_gpu *dev; 1394 int ret; 1395 uint8_t gpu_id; 1396 1397 if (pci_dev == NULL) { 1398 rte_errno = ENODEV; 1399 return -rte_errno; 1400 } 1401 1402 dev = rte_gpu_get_by_name(pci_dev->device.name); 1403 if (dev == NULL) { 1404 rte_cuda_log(ERR, "Couldn't find HW dev \"%s\" to uninitialise it", 1405 pci_dev->device.name); 1406 rte_errno = ENODEV; 1407 return -rte_errno; 1408 } 1409 gpu_id = dev->mpshared->info.dev_id; 1410 1411 /* release dev from library */ 1412 ret = rte_gpu_release(dev); 1413 if (ret) 1414 rte_cuda_log(ERR, "Device %i failed to uninit: %i", gpu_id, ret); 1415 1416 rte_cuda_debug("Destroyed dev = %u", gpu_id); 1417 1418 return 0; 1419 } 1420 1421 static struct rte_pci_driver rte_cuda_driver = { 1422 .id_table = pci_id_cuda_map, 1423 .drv_flags = RTE_PCI_DRV_WC_ACTIVATE, 1424 .probe = cuda_gpu_probe, 1425 .remove = cuda_gpu_remove, 1426 }; 1427 1428 RTE_PMD_REGISTER_PCI(gpu_cuda, rte_cuda_driver); 1429 RTE_PMD_REGISTER_PCI_TABLE(gpu_cuda, pci_id_cuda_map); 1430 RTE_PMD_REGISTER_KMOD_DEP(gpu_cuda, "* nvidia & (nv_peer_mem | nvpeer_mem)"); 1431