1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2021 NVIDIA Corporation & Affiliates 3 */ 4 5 #include <dlfcn.h> 6 7 #include <rte_malloc.h> 8 #include <rte_pci.h> 9 #include <rte_bus_pci.h> 10 #include <rte_byteorder.h> 11 #include <rte_dev.h> 12 13 #include <gpudev_driver.h> 14 15 #include <cuda.h> 16 #include <cudaTypedefs.h> 17 18 #include "common.h" 19 #include "devices.h" 20 21 #define CUDA_DRIVER_MIN_VERSION 11040 22 #define CUDA_API_MIN_VERSION 3020 23 24 /* CUDA Driver functions loaded with dlsym() */ 25 static CUresult CUDAAPI (*sym_cuInit)(unsigned int flags); 26 static CUresult CUDAAPI (*sym_cuDriverGetVersion)(int *driverVersion); 27 static CUresult CUDAAPI (*sym_cuGetProcAddress)(const char *symbol, 28 void **pfn, int cudaVersion, uint64_t flags); 29 30 /* CUDA Driver functions loaded with cuGetProcAddress for versioning */ 31 static PFN_cuGetErrorString pfn_cuGetErrorString; 32 static PFN_cuGetErrorName pfn_cuGetErrorName; 33 static PFN_cuPointerSetAttribute pfn_cuPointerSetAttribute; 34 static PFN_cuDeviceGetAttribute pfn_cuDeviceGetAttribute; 35 static PFN_cuDeviceGetByPCIBusId pfn_cuDeviceGetByPCIBusId; 36 static PFN_cuDevicePrimaryCtxRetain pfn_cuDevicePrimaryCtxRetain; 37 static PFN_cuDevicePrimaryCtxRelease pfn_cuDevicePrimaryCtxRelease; 38 static PFN_cuDeviceTotalMem pfn_cuDeviceTotalMem; 39 static PFN_cuDeviceGetName pfn_cuDeviceGetName; 40 static PFN_cuCtxGetApiVersion pfn_cuCtxGetApiVersion; 41 static PFN_cuCtxSetCurrent pfn_cuCtxSetCurrent; 42 static PFN_cuCtxGetCurrent pfn_cuCtxGetCurrent; 43 static PFN_cuCtxGetDevice pfn_cuCtxGetDevice; 44 static PFN_cuCtxGetExecAffinity pfn_cuCtxGetExecAffinity; 45 static PFN_cuMemAlloc pfn_cuMemAlloc; 46 static PFN_cuMemFree pfn_cuMemFree; 47 static PFN_cuMemHostRegister pfn_cuMemHostRegister; 48 static PFN_cuMemHostUnregister pfn_cuMemHostUnregister; 49 static PFN_cuMemHostGetDevicePointer pfn_cuMemHostGetDevicePointer; 50 static PFN_cuFlushGPUDirectRDMAWrites pfn_cuFlushGPUDirectRDMAWrites; 51 52 static void *cudalib; 53 static unsigned int cuda_api_version; 54 static int cuda_driver_version; 55 static gdr_t gdrc_h; 56 57 #define CUDA_MAX_ALLOCATION_NUM 512 58 59 #define GPU_PAGE_SHIFT 16 60 #define GPU_PAGE_SIZE (1UL << GPU_PAGE_SHIFT) 61 62 RTE_LOG_REGISTER_DEFAULT(cuda_logtype, NOTICE); 63 64 /* NVIDIA GPU address map */ 65 static const struct rte_pci_id pci_id_cuda_map[] = { 66 { 67 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 68 NVIDIA_GPU_A40_DEVICE_ID) 69 }, 70 { 71 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 72 NVIDIA_GPU_A30_24GB_DEVICE_ID) 73 }, 74 { 75 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 76 NVIDIA_GPU_A30X_24GB_DPU_DEVICE_ID) 77 }, 78 { 79 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 80 NVIDIA_GPU_A10_24GB_DEVICE_ID) 81 }, 82 { 83 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 84 NVIDIA_GPU_A10G_DEVICE_ID) 85 }, 86 { 87 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 88 NVIDIA_GPU_A10M_DEVICE_ID) 89 }, 90 { 91 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 92 NVIDIA_GPU_A100_40GB_SXM4_DEVICE_ID) 93 }, 94 { 95 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 96 NVIDIA_GPU_A100_40GB_PCIE_DEVICE_ID) 97 }, 98 { 99 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 100 NVIDIA_GPU_A100_80GB_SXM4_DEVICE_ID) 101 }, 102 { 103 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 104 NVIDIA_GPU_A100_80GB_PCIE_DEVICE_ID) 105 }, 106 { 107 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 108 NVIDIA_GPU_A100X_80GB_DPU_DEVICE_ID) 109 }, 110 { 111 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 112 NVIDIA_GPU_GA100_PG506_207) 113 }, 114 { 115 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 116 NVIDIA_GPU_GA100_PCIE) 117 }, 118 { 119 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 120 NVIDIA_GPU_GA100_PG506_217) 121 }, 122 { 123 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 124 NVIDIA_GPU_V100_16GB_SXM2_DEVICE_ID) 125 }, 126 { 127 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 128 NVIDIA_GPU_V100_16GB_DGXS_DEVICE_ID) 129 }, 130 { 131 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 132 NVIDIA_GPU_V100_16GB_FHHL_DEVICE_ID) 133 }, 134 { 135 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 136 NVIDIA_GPU_V100_16GB_PCIE_DEVICE_ID) 137 }, 138 { 139 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 140 NVIDIA_GPU_V100_32GB_SXM2_DEVICE_ID) 141 }, 142 { 143 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 144 NVIDIA_GPU_V100_32GB_PCIE_DEVICE_ID) 145 }, 146 { 147 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 148 NVIDIA_GPU_V100_32GB_DGXS_DEVICE_ID) 149 }, 150 { 151 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 152 NVIDIA_GPU_V100_32GB_SXM3_DEVICE_ID) 153 }, 154 { 155 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 156 NVIDIA_GPU_V100_32GB_SXM3_H_DEVICE_ID) 157 }, 158 { 159 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 160 NVIDIA_GPU_V100_SXM2) 161 }, 162 { 163 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 164 NVIDIA_GPU_V100S_PCIE) 165 }, 166 { 167 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 168 NVIDIA_GPU_TITAN_V_CEO_ED) 169 }, 170 { 171 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 172 NVIDIA_GPU_GV100GL_PG500_216) 173 }, 174 { 175 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 176 NVIDIA_GPU_GV100GL_PG503_216) 177 }, 178 { 179 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 180 NVIDIA_GPU_TU102_TITAN_RTX) 181 }, 182 { 183 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 184 NVIDIA_GPU_TU102GL_QUADRO_RTX) 185 }, 186 { 187 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 188 NVIDIA_GPU_GV100_QUADRO_DEVICE_ID) 189 }, 190 { 191 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 192 NVIDIA_GPU_QUADRO_RTX_4000) 193 }, 194 { 195 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 196 NVIDIA_GPU_QUADRO_RTX_5000) 197 }, 198 { 199 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 200 NVIDIA_GPU_QUADRO_RTX_6000) 201 }, 202 { 203 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 204 NVIDIA_GPU_QUADRO_RTX_8000) 205 }, 206 { 207 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 208 NVIDIA_GPU_QUADRO_RTX_A4000) 209 }, 210 { 211 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 212 NVIDIA_GPU_QUADRO_RTX_A6000) 213 }, 214 { 215 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 216 NVIDIA_GPU_QUADRO_RTX_A5000) 217 }, 218 { 219 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 220 NVIDIA_GPU_QUADRO_RTX_A4500) 221 }, 222 { 223 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 224 NVIDIA_GPU_QUADRO_RTX_A5500) 225 }, 226 { 227 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 228 NVIDIA_GPU_QUADRO_RTX_A2000) 229 }, 230 { 231 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 232 NVIDIA_GPU_QUADRO_RTX_A2000_12GB) 233 }, 234 { 235 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 236 NVIDIA_GPU_T4G) 237 }, 238 { 239 RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID, 240 NVIDIA_GPU_T4) 241 }, 242 { 243 .device_id = 0 244 } 245 }; 246 247 /* Device private info */ 248 struct cuda_info { 249 char gpu_name[RTE_DEV_NAME_MAX_LEN]; 250 CUdevice cu_dev; 251 int gdr_supported; 252 int gdr_write_ordering; 253 int gdr_flush_type; 254 }; 255 256 /* Type of memory allocated by CUDA driver */ 257 enum mem_type { 258 GPU_MEM = 0, 259 CPU_REGISTERED, 260 GPU_REGISTERED 261 }; 262 263 /* key associated to a memory address */ 264 typedef uintptr_t cuda_ptr_key; 265 266 /* Single entry of the memory list */ 267 struct mem_entry { 268 CUdeviceptr ptr_d; 269 CUdeviceptr ptr_orig_d; 270 void *ptr_h; 271 size_t size; 272 size_t size_orig; 273 struct rte_gpu *dev; 274 CUcontext ctx; 275 cuda_ptr_key pkey; 276 enum mem_type mtype; 277 gdr_mh_t mh; 278 struct mem_entry *prev; 279 struct mem_entry *next; 280 }; 281 282 static struct mem_entry *mem_alloc_list_head; 283 static struct mem_entry *mem_alloc_list_tail; 284 static uint32_t mem_alloc_list_last_elem; 285 286 /* Load the CUDA symbols */ 287 288 static int 289 cuda_loader(void) 290 { 291 char cuda_path[1024]; 292 293 if (getenv("CUDA_PATH_L") == NULL) 294 snprintf(cuda_path, 1024, "%s", "libcuda.so"); 295 else 296 snprintf(cuda_path, 1024, "%s/%s", getenv("CUDA_PATH_L"), "libcuda.so"); 297 298 cudalib = dlopen(cuda_path, RTLD_LAZY); 299 if (cudalib == NULL) { 300 rte_cuda_log(ERR, "Failed to find CUDA library in %s (CUDA_PATH_L=%s)", 301 cuda_path, getenv("CUDA_PATH_L")); 302 return -1; 303 } 304 305 return 0; 306 } 307 308 static int 309 cuda_sym_func_loader(void) 310 { 311 if (cudalib == NULL) 312 return -1; 313 314 sym_cuInit = dlsym(cudalib, "cuInit"); 315 if (sym_cuInit == NULL) { 316 rte_cuda_log(ERR, "Failed to load CUDA missing symbol cuInit"); 317 return -1; 318 } 319 320 sym_cuDriverGetVersion = dlsym(cudalib, "cuDriverGetVersion"); 321 if (sym_cuDriverGetVersion == NULL) { 322 rte_cuda_log(ERR, "Failed to load CUDA missing symbol cuDriverGetVersion"); 323 return -1; 324 } 325 326 sym_cuGetProcAddress = dlsym(cudalib, "cuGetProcAddress"); 327 if (sym_cuGetProcAddress == NULL) { 328 rte_cuda_log(ERR, "Failed to load CUDA missing symbol cuGetProcAddress"); 329 return -1; 330 } 331 332 return 0; 333 } 334 335 static int 336 cuda_pfn_func_loader(void) 337 { 338 CUresult res; 339 340 res = sym_cuGetProcAddress("cuGetErrorString", 341 (void **) (&pfn_cuGetErrorString), cuda_driver_version, 0); 342 if (res != 0) { 343 rte_cuda_log(ERR, "Retrieve pfn_cuGetErrorString failed with %d", res); 344 return -1; 345 } 346 347 res = sym_cuGetProcAddress("cuGetErrorName", 348 (void **)(&pfn_cuGetErrorName), cuda_driver_version, 0); 349 if (res != 0) { 350 rte_cuda_log(ERR, "Retrieve pfn_cuGetErrorName failed with %d", res); 351 return -1; 352 } 353 354 res = sym_cuGetProcAddress("cuPointerSetAttribute", 355 (void **)(&pfn_cuPointerSetAttribute), cuda_driver_version, 0); 356 if (res != 0) { 357 rte_cuda_log(ERR, "Retrieve pfn_cuPointerSetAttribute failed with %d", res); 358 return -1; 359 } 360 361 res = sym_cuGetProcAddress("cuDeviceGetAttribute", 362 (void **)(&pfn_cuDeviceGetAttribute), cuda_driver_version, 0); 363 if (res != 0) { 364 rte_cuda_log(ERR, "Retrieve pfn_cuDeviceGetAttribute failed with %d", res); 365 return -1; 366 } 367 368 res = sym_cuGetProcAddress("cuDeviceGetByPCIBusId", 369 (void **)(&pfn_cuDeviceGetByPCIBusId), cuda_driver_version, 0); 370 if (res != 0) { 371 rte_cuda_log(ERR, "Retrieve pfn_cuDeviceGetByPCIBusId failed with %d", res); 372 return -1; 373 } 374 375 res = sym_cuGetProcAddress("cuDeviceGetName", 376 (void **)(&pfn_cuDeviceGetName), cuda_driver_version, 0); 377 if (res != 0) { 378 rte_cuda_log(ERR, "Retrieve pfn_cuDeviceGetName failed with %d", res); 379 return -1; 380 } 381 382 res = sym_cuGetProcAddress("cuDevicePrimaryCtxRetain", 383 (void **)(&pfn_cuDevicePrimaryCtxRetain), cuda_driver_version, 0); 384 if (res != 0) { 385 rte_cuda_log(ERR, "Retrieve pfn_cuDevicePrimaryCtxRetain failed with %d", res); 386 return -1; 387 } 388 389 res = sym_cuGetProcAddress("cuDevicePrimaryCtxRelease", 390 (void **)(&pfn_cuDevicePrimaryCtxRelease), cuda_driver_version, 0); 391 if (res != 0) { 392 rte_cuda_log(ERR, "Retrieve pfn_cuDevicePrimaryCtxRelease failed with %d", res); 393 return -1; 394 } 395 396 res = sym_cuGetProcAddress("cuDeviceTotalMem", 397 (void **)(&pfn_cuDeviceTotalMem), cuda_driver_version, 0); 398 if (res != 0) { 399 rte_cuda_log(ERR, "Retrieve pfn_cuDeviceTotalMem failed with %d", res); 400 return -1; 401 } 402 403 res = sym_cuGetProcAddress("cuCtxGetApiVersion", 404 (void **)(&pfn_cuCtxGetApiVersion), cuda_driver_version, 0); 405 if (res != 0) { 406 rte_cuda_log(ERR, "Retrieve pfn_cuCtxGetApiVersion failed with %d", res); 407 return -1; 408 } 409 410 res = sym_cuGetProcAddress("cuCtxGetDevice", 411 (void **)(&pfn_cuCtxGetDevice), cuda_driver_version, 0); 412 if (res != 0) { 413 rte_cuda_log(ERR, "Retrieve pfn_cuCtxGetDevice failed with %d", res); 414 return -1; 415 } 416 417 res = sym_cuGetProcAddress("cuCtxSetCurrent", 418 (void **)(&pfn_cuCtxSetCurrent), cuda_driver_version, 0); 419 if (res != 0) { 420 rte_cuda_log(ERR, "Retrieve pfn_cuCtxSetCurrent failed with %d", res); 421 return -1; 422 } 423 424 res = sym_cuGetProcAddress("cuCtxGetCurrent", 425 (void **)(&pfn_cuCtxGetCurrent), cuda_driver_version, 0); 426 if (res != 0) { 427 rte_cuda_log(ERR, "Retrieve pfn_cuCtxGetCurrent failed with %d", res); 428 return -1; 429 } 430 431 res = sym_cuGetProcAddress("cuCtxGetExecAffinity", 432 (void **)(&pfn_cuCtxGetExecAffinity), cuda_driver_version, 0); 433 if (res != 0) { 434 rte_cuda_log(ERR, "Retrieve pfn_cuCtxGetExecAffinity failed with %d", res); 435 return -1; 436 } 437 438 res = sym_cuGetProcAddress("cuMemAlloc", 439 (void **)(&pfn_cuMemAlloc), cuda_driver_version, 0); 440 if (res != 0) { 441 rte_cuda_log(ERR, "Retrieve pfn_cuMemAlloc failed with %d", res); 442 return -1; 443 } 444 445 res = sym_cuGetProcAddress("cuMemFree", 446 (void **)(&pfn_cuMemFree), cuda_driver_version, 0); 447 if (res != 0) { 448 rte_cuda_log(ERR, "Retrieve pfn_cuMemFree failed with %d", res); 449 return -1; 450 } 451 452 res = sym_cuGetProcAddress("cuMemHostRegister", 453 (void **)(&pfn_cuMemHostRegister), cuda_driver_version, 0); 454 if (res != 0) { 455 rte_cuda_log(ERR, "Retrieve pfn_cuMemHostRegister failed with %d", res); 456 return -1; 457 } 458 459 res = sym_cuGetProcAddress("cuMemHostUnregister", 460 (void **)(&pfn_cuMemHostUnregister), cuda_driver_version, 0); 461 if (res != 0) { 462 rte_cuda_log(ERR, "Retrieve pfn_cuMemHostUnregister failed with %d", res); 463 return -1; 464 } 465 466 res = sym_cuGetProcAddress("cuMemHostGetDevicePointer", 467 (void **)(&pfn_cuMemHostGetDevicePointer), cuda_driver_version, 0); 468 if (res != 0) { 469 rte_cuda_log(ERR, "Retrieve pfn_cuMemHostGetDevicePointer failed with %d", res); 470 return -1; 471 } 472 473 res = sym_cuGetProcAddress("cuFlushGPUDirectRDMAWrites", 474 (void **)(&pfn_cuFlushGPUDirectRDMAWrites), cuda_driver_version, 0); 475 if (res != 0) { 476 rte_cuda_log(ERR, "Retrieve cuFlushGPUDirectRDMAWrites failed with %d", res); 477 return -1; 478 } 479 480 return 0; 481 } 482 483 /* Generate a key from a memory pointer */ 484 static cuda_ptr_key 485 get_hash_from_ptr(void *ptr) 486 { 487 return (uintptr_t)ptr; 488 } 489 490 static uint32_t 491 mem_list_count_item(void) 492 { 493 return mem_alloc_list_last_elem; 494 } 495 496 /* Initiate list of memory allocations if not done yet */ 497 static struct mem_entry * 498 mem_list_add_item(void) 499 { 500 /* Initiate list of memory allocations if not done yet */ 501 if (mem_alloc_list_head == NULL) { 502 mem_alloc_list_head = rte_zmalloc(NULL, 503 sizeof(struct mem_entry), 504 RTE_CACHE_LINE_SIZE); 505 if (mem_alloc_list_head == NULL) { 506 rte_cuda_log(ERR, "Failed to allocate memory for memory list"); 507 return NULL; 508 } 509 510 mem_alloc_list_head->next = NULL; 511 mem_alloc_list_head->prev = NULL; 512 mem_alloc_list_tail = mem_alloc_list_head; 513 } else { 514 struct mem_entry *mem_alloc_list_cur = rte_zmalloc(NULL, 515 sizeof(struct mem_entry), 516 RTE_CACHE_LINE_SIZE); 517 518 if (mem_alloc_list_cur == NULL) { 519 rte_cuda_log(ERR, "Failed to allocate memory for memory list"); 520 return NULL; 521 } 522 523 mem_alloc_list_tail->next = mem_alloc_list_cur; 524 mem_alloc_list_cur->prev = mem_alloc_list_tail; 525 mem_alloc_list_tail = mem_alloc_list_tail->next; 526 mem_alloc_list_tail->next = NULL; 527 } 528 529 mem_alloc_list_last_elem++; 530 531 return mem_alloc_list_tail; 532 } 533 534 static struct mem_entry * 535 mem_list_find_item(cuda_ptr_key pk) 536 { 537 struct mem_entry *mem_alloc_list_cur = NULL; 538 539 if (mem_alloc_list_head == NULL) { 540 rte_cuda_log(ERR, "Memory list doesn't exist"); 541 return NULL; 542 } 543 544 if (mem_list_count_item() == 0) { 545 rte_cuda_log(ERR, "No items in memory list"); 546 return NULL; 547 } 548 549 mem_alloc_list_cur = mem_alloc_list_head; 550 551 while (mem_alloc_list_cur != NULL) { 552 if (mem_alloc_list_cur->pkey == pk) 553 return mem_alloc_list_cur; 554 mem_alloc_list_cur = mem_alloc_list_cur->next; 555 } 556 557 return mem_alloc_list_cur; 558 } 559 560 static int 561 mem_list_del_item(cuda_ptr_key pk) 562 { 563 struct mem_entry *mem_alloc_list_cur = NULL; 564 565 mem_alloc_list_cur = mem_list_find_item(pk); 566 if (mem_alloc_list_cur == NULL) 567 return -EINVAL; 568 569 /* if key is in head */ 570 if (mem_alloc_list_cur->prev == NULL) { 571 mem_alloc_list_head = mem_alloc_list_cur->next; 572 if (mem_alloc_list_head != NULL) 573 mem_alloc_list_head->prev = NULL; 574 } else { 575 mem_alloc_list_cur->prev->next = mem_alloc_list_cur->next; 576 if (mem_alloc_list_cur->next != NULL) 577 mem_alloc_list_cur->next->prev = mem_alloc_list_cur->prev; 578 } 579 580 rte_free(mem_alloc_list_cur); 581 582 mem_alloc_list_last_elem--; 583 584 return 0; 585 } 586 587 static int 588 cuda_dev_info_get(struct rte_gpu *dev, struct rte_gpu_info *info) 589 { 590 int ret = 0; 591 CUresult res; 592 struct rte_gpu_info parent_info; 593 CUexecAffinityParam affinityPrm; 594 const char *err_string; 595 struct cuda_info *private; 596 CUcontext current_ctx; 597 CUcontext input_ctx; 598 599 if (dev == NULL) { 600 rte_errno = ENODEV; 601 return -rte_errno; 602 } 603 604 /* Child initialization time probably called by rte_gpu_add_child() */ 605 if (dev->mpshared->info.parent != RTE_GPU_ID_NONE && 606 dev->mpshared->dev_private == NULL) { 607 /* Store current ctx */ 608 res = pfn_cuCtxGetCurrent(¤t_ctx); 609 if (res != 0) { 610 pfn_cuGetErrorString(res, &(err_string)); 611 rte_cuda_log(ERR, "cuCtxGetCurrent failed with %s", 612 err_string); 613 rte_errno = EPERM; 614 return -rte_errno; 615 } 616 617 /* Set child ctx as current ctx */ 618 input_ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 619 res = pfn_cuCtxSetCurrent(input_ctx); 620 if (res != 0) { 621 pfn_cuGetErrorString(res, &(err_string)); 622 rte_cuda_log(ERR, "cuCtxSetCurrent input failed with %s", 623 err_string); 624 rte_errno = EPERM; 625 return -rte_errno; 626 } 627 628 /* 629 * Ctx capacity info 630 */ 631 632 /* MPS compatible */ 633 res = pfn_cuCtxGetExecAffinity(&affinityPrm, 634 CU_EXEC_AFFINITY_TYPE_SM_COUNT); 635 if (res != 0) { 636 pfn_cuGetErrorString(res, &(err_string)); 637 rte_cuda_log(ERR, "cuCtxGetExecAffinity failed with %s", 638 err_string); 639 } 640 dev->mpshared->info.processor_count = 641 (uint32_t)affinityPrm.param.smCount.val; 642 643 ret = rte_gpu_info_get(dev->mpshared->info.parent, &parent_info); 644 if (ret) { 645 rte_errno = ENODEV; 646 return -rte_errno; 647 } 648 dev->mpshared->info.total_memory = parent_info.total_memory; 649 650 dev->mpshared->info.page_size = parent_info.page_size; 651 652 /* 653 * GPU Device private info 654 */ 655 dev->mpshared->dev_private = rte_zmalloc(NULL, 656 sizeof(struct cuda_info), 657 RTE_CACHE_LINE_SIZE); 658 if (dev->mpshared->dev_private == NULL) { 659 rte_cuda_log(ERR, "Failed to allocate memory for GPU process private"); 660 rte_errno = EPERM; 661 return -rte_errno; 662 } 663 664 private = (struct cuda_info *)dev->mpshared->dev_private; 665 666 res = pfn_cuCtxGetDevice(&(private->cu_dev)); 667 if (res != 0) { 668 pfn_cuGetErrorString(res, &(err_string)); 669 rte_cuda_log(ERR, "cuCtxGetDevice failed with %s", 670 err_string); 671 rte_errno = EPERM; 672 return -rte_errno; 673 } 674 675 res = pfn_cuDeviceGetName(private->gpu_name, 676 RTE_DEV_NAME_MAX_LEN, private->cu_dev); 677 if (res != 0) { 678 pfn_cuGetErrorString(res, &(err_string)); 679 rte_cuda_log(ERR, "cuDeviceGetName failed with %s", 680 err_string); 681 rte_errno = EPERM; 682 return -rte_errno; 683 } 684 685 /* Restore original ctx as current ctx */ 686 res = pfn_cuCtxSetCurrent(current_ctx); 687 if (res != 0) { 688 pfn_cuGetErrorString(res, &(err_string)); 689 rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 690 err_string); 691 rte_errno = EPERM; 692 return -rte_errno; 693 } 694 } 695 696 *info = dev->mpshared->info; 697 698 return 0; 699 } 700 701 /* 702 * GPU Memory 703 */ 704 705 static int 706 cuda_mem_alloc(struct rte_gpu *dev, size_t size, unsigned int align, void **ptr) 707 { 708 CUresult res; 709 const char *err_string; 710 CUcontext current_ctx; 711 CUcontext input_ctx; 712 unsigned int flag = 1; 713 714 if (dev == NULL) 715 return -ENODEV; 716 717 /* Store current ctx */ 718 res = pfn_cuCtxGetCurrent(¤t_ctx); 719 if (res != 0) { 720 pfn_cuGetErrorString(res, &(err_string)); 721 rte_cuda_log(ERR, "cuCtxGetCurrent failed with %s", 722 err_string); 723 rte_errno = EPERM; 724 return -rte_errno; 725 } 726 727 /* Set child ctx as current ctx */ 728 input_ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 729 res = pfn_cuCtxSetCurrent(input_ctx); 730 if (res != 0) { 731 pfn_cuGetErrorString(res, &(err_string)); 732 rte_cuda_log(ERR, "cuCtxSetCurrent input failed with %s", 733 err_string); 734 rte_errno = EPERM; 735 return -rte_errno; 736 } 737 738 /* Get next memory list item */ 739 mem_alloc_list_tail = mem_list_add_item(); 740 if (mem_alloc_list_tail == NULL) { 741 rte_errno = EPERM; 742 return -rte_errno; 743 } 744 745 /* Allocate memory */ 746 mem_alloc_list_tail->size = size; 747 mem_alloc_list_tail->size_orig = size + align; 748 749 res = pfn_cuMemAlloc(&(mem_alloc_list_tail->ptr_orig_d), 750 mem_alloc_list_tail->size_orig); 751 if (res != 0) { 752 pfn_cuGetErrorString(res, &(err_string)); 753 rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 754 err_string); 755 rte_errno = EPERM; 756 return -rte_errno; 757 } 758 759 /* Align memory address */ 760 mem_alloc_list_tail->ptr_d = mem_alloc_list_tail->ptr_orig_d; 761 if (align && ((uintptr_t)mem_alloc_list_tail->ptr_d) % align) 762 mem_alloc_list_tail->ptr_d += (align - 763 (((uintptr_t)mem_alloc_list_tail->ptr_d) % align)); 764 765 /* GPUDirect RDMA attribute required */ 766 res = pfn_cuPointerSetAttribute(&flag, 767 CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, 768 mem_alloc_list_tail->ptr_d); 769 if (res != 0) { 770 rte_cuda_log(ERR, "Could not set SYNC MEMOP attribute for " 771 "GPU memory at %"PRIu32", err %d", 772 (uint32_t)mem_alloc_list_tail->ptr_d, res); 773 rte_errno = EPERM; 774 return -rte_errno; 775 } 776 777 mem_alloc_list_tail->pkey = get_hash_from_ptr((void *)mem_alloc_list_tail->ptr_d); 778 mem_alloc_list_tail->ptr_h = NULL; 779 mem_alloc_list_tail->dev = dev; 780 mem_alloc_list_tail->ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 781 mem_alloc_list_tail->mtype = GPU_MEM; 782 783 /* Restore original ctx as current ctx */ 784 res = pfn_cuCtxSetCurrent(current_ctx); 785 if (res != 0) { 786 pfn_cuGetErrorString(res, &(err_string)); 787 rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 788 err_string); 789 rte_errno = EPERM; 790 return -rte_errno; 791 } 792 793 *ptr = (void *)mem_alloc_list_tail->ptr_d; 794 795 return 0; 796 } 797 798 static int 799 cuda_mem_register(struct rte_gpu *dev, size_t size, void *ptr) 800 { 801 CUresult res; 802 const char *err_string; 803 CUcontext current_ctx; 804 CUcontext input_ctx; 805 unsigned int flag = 1; 806 int use_ptr_h = 0; 807 808 if (dev == NULL) 809 return -ENODEV; 810 811 /* Store current ctx */ 812 res = pfn_cuCtxGetCurrent(¤t_ctx); 813 if (res != 0) { 814 pfn_cuGetErrorString(res, &(err_string)); 815 rte_cuda_log(ERR, "cuCtxGetCurrent failed with %s", 816 err_string); 817 rte_errno = EPERM; 818 return -rte_errno; 819 } 820 821 /* Set child ctx as current ctx */ 822 input_ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 823 res = pfn_cuCtxSetCurrent(input_ctx); 824 if (res != 0) { 825 pfn_cuGetErrorString(res, &(err_string)); 826 rte_cuda_log(ERR, "cuCtxSetCurrent input failed with %s", 827 err_string); 828 rte_errno = EPERM; 829 return -rte_errno; 830 } 831 832 /* Get next memory list item */ 833 mem_alloc_list_tail = mem_list_add_item(); 834 if (mem_alloc_list_tail == NULL) { 835 rte_errno = EPERM; 836 return -rte_errno; 837 } 838 839 /* Allocate memory */ 840 mem_alloc_list_tail->size = size; 841 mem_alloc_list_tail->ptr_h = ptr; 842 843 res = pfn_cuMemHostRegister(mem_alloc_list_tail->ptr_h, 844 mem_alloc_list_tail->size, 845 CU_MEMHOSTREGISTER_PORTABLE | 846 CU_MEMHOSTREGISTER_DEVICEMAP); 847 if (res != 0) { 848 pfn_cuGetErrorString(res, &(err_string)); 849 rte_cuda_log(ERR, "cuMemHostRegister failed with %s ptr %p size %zd", 850 err_string, 851 mem_alloc_list_tail->ptr_h, 852 mem_alloc_list_tail->size); 853 rte_errno = EPERM; 854 return -rte_errno; 855 } 856 857 res = pfn_cuDeviceGetAttribute(&(use_ptr_h), 858 CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM, 859 ((struct cuda_info *)(dev->mpshared->dev_private))->cu_dev); 860 if (res != 0) { 861 pfn_cuGetErrorString(res, &(err_string)); 862 rte_cuda_log(ERR, "cuDeviceGetAttribute failed with %s", 863 err_string); 864 rte_errno = EPERM; 865 return -rte_errno; 866 } 867 868 if (use_ptr_h == 0) { 869 res = pfn_cuMemHostGetDevicePointer(&(mem_alloc_list_tail->ptr_d), 870 mem_alloc_list_tail->ptr_h, 0); 871 if (res != 0) { 872 pfn_cuGetErrorString(res, &(err_string)); 873 rte_cuda_log(ERR, "cuMemHostGetDevicePointer failed with %s", 874 err_string); 875 rte_errno = EPERM; 876 return -rte_errno; 877 } 878 879 if ((uintptr_t)mem_alloc_list_tail->ptr_d != 880 (uintptr_t)mem_alloc_list_tail->ptr_h) { 881 rte_cuda_log(ERR, "Host input pointer is different wrt GPU registered pointer"); 882 rte_errno = ENOTSUP; 883 return -rte_errno; 884 } 885 } else { 886 mem_alloc_list_tail->ptr_d = (CUdeviceptr)mem_alloc_list_tail->ptr_h; 887 } 888 889 /* GPUDirect RDMA attribute required */ 890 res = pfn_cuPointerSetAttribute(&flag, 891 CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, 892 mem_alloc_list_tail->ptr_d); 893 if (res != 0) { 894 rte_cuda_log(ERR, "Could not set SYNC MEMOP attribute for GPU memory at %"PRIu32 895 ", err %d", (uint32_t)mem_alloc_list_tail->ptr_d, res); 896 rte_errno = EPERM; 897 return -rte_errno; 898 } 899 900 mem_alloc_list_tail->pkey = get_hash_from_ptr((void *)mem_alloc_list_tail->ptr_h); 901 mem_alloc_list_tail->size = size; 902 mem_alloc_list_tail->dev = dev; 903 mem_alloc_list_tail->ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 904 mem_alloc_list_tail->mtype = CPU_REGISTERED; 905 mem_alloc_list_tail->ptr_orig_d = mem_alloc_list_tail->ptr_d; 906 907 /* Restore original ctx as current ctx */ 908 res = pfn_cuCtxSetCurrent(current_ctx); 909 if (res != 0) { 910 pfn_cuGetErrorString(res, &(err_string)); 911 rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 912 err_string); 913 rte_errno = EPERM; 914 return -rte_errno; 915 } 916 917 return 0; 918 } 919 920 static int 921 cuda_mem_cpu_map(struct rte_gpu *dev, __rte_unused size_t size, void *ptr_in, void **ptr_out) 922 { 923 struct mem_entry *mem_item; 924 cuda_ptr_key hk; 925 926 if (dev == NULL) 927 return -ENODEV; 928 929 hk = get_hash_from_ptr((void *)ptr_in); 930 931 mem_item = mem_list_find_item(hk); 932 if (mem_item == NULL) { 933 rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory.", ptr_in); 934 rte_errno = EPERM; 935 return -rte_errno; 936 } 937 938 if (mem_item->mtype != GPU_MEM) { 939 rte_cuda_log(ERR, "Memory address 0x%p is not GPU memory type.", ptr_in); 940 rte_errno = EPERM; 941 return -rte_errno; 942 } 943 944 if (mem_item->size != size) 945 rte_cuda_log(WARNING, 946 "Can't expose memory area with size (%zd) different from original size (%zd).", 947 size, mem_item->size); 948 949 if (gdrcopy_pin(&gdrc_h, &(mem_item->mh), (uint64_t)mem_item->ptr_d, 950 mem_item->size, &(mem_item->ptr_h))) { 951 rte_cuda_log(ERR, "Error exposing GPU memory address 0x%p.", ptr_in); 952 rte_errno = EPERM; 953 return -rte_errno; 954 } 955 956 mem_item->mtype = GPU_REGISTERED; 957 *ptr_out = mem_item->ptr_h; 958 959 return 0; 960 } 961 962 static int 963 cuda_mem_unregister(struct rte_gpu *dev, void *ptr) 964 { 965 CUresult res; 966 struct mem_entry *mem_item; 967 const char *err_string; 968 cuda_ptr_key hk; 969 970 if (dev == NULL) 971 return -ENODEV; 972 973 hk = get_hash_from_ptr((void *)ptr); 974 975 mem_item = mem_list_find_item(hk); 976 if (mem_item == NULL) { 977 rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory", ptr); 978 rte_errno = EPERM; 979 return -rte_errno; 980 } 981 982 if (mem_item->mtype == CPU_REGISTERED) { 983 res = pfn_cuMemHostUnregister(ptr); 984 if (res != 0) { 985 pfn_cuGetErrorString(res, &(err_string)); 986 rte_cuda_log(ERR, "cuMemHostUnregister current failed with %s", 987 err_string); 988 rte_errno = EPERM; 989 return -rte_errno; 990 } 991 992 return mem_list_del_item(hk); 993 } 994 995 rte_cuda_log(ERR, "Memory type %d not supported", mem_item->mtype); 996 997 rte_errno = EPERM; 998 return -rte_errno; 999 } 1000 1001 static int 1002 cuda_mem_cpu_unmap(struct rte_gpu *dev, void *ptr_in) 1003 { 1004 struct mem_entry *mem_item; 1005 cuda_ptr_key hk; 1006 1007 if (dev == NULL) 1008 return -ENODEV; 1009 1010 hk = get_hash_from_ptr((void *)ptr_in); 1011 1012 mem_item = mem_list_find_item(hk); 1013 if (mem_item == NULL) { 1014 rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory.", ptr_in); 1015 rte_errno = EPERM; 1016 return -rte_errno; 1017 } 1018 1019 if (mem_item->mtype == GPU_REGISTERED) { 1020 if (gdrcopy_unpin(gdrc_h, mem_item->mh, (void *)mem_item->ptr_d, 1021 mem_item->size)) { 1022 rte_cuda_log(ERR, "Error unexposing GPU memory address 0x%p.", ptr_in); 1023 rte_errno = EPERM; 1024 return -rte_errno; 1025 } 1026 1027 mem_item->mtype = GPU_MEM; 1028 } else { 1029 rte_errno = EPERM; 1030 return -rte_errno; 1031 } 1032 1033 return 0; 1034 } 1035 1036 static int 1037 cuda_mem_free(struct rte_gpu *dev, void *ptr) 1038 { 1039 CUresult res; 1040 struct mem_entry *mem_item; 1041 const char *err_string; 1042 cuda_ptr_key hk; 1043 1044 if (dev == NULL) 1045 return -ENODEV; 1046 1047 hk = get_hash_from_ptr((void *)ptr); 1048 1049 mem_item = mem_list_find_item(hk); 1050 if (mem_item == NULL) { 1051 rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory", ptr); 1052 rte_errno = EPERM; 1053 return -rte_errno; 1054 } 1055 1056 /* 1057 * If a GPU memory area that's CPU mapped is being freed 1058 * without calling cpu_unmap, force the unmapping. 1059 */ 1060 if (mem_item->mtype == GPU_REGISTERED) 1061 cuda_mem_cpu_unmap(dev, ptr); 1062 1063 if (mem_item->mtype == GPU_MEM) { 1064 res = pfn_cuMemFree(mem_item->ptr_orig_d); 1065 if (res != 0) { 1066 pfn_cuGetErrorString(res, &(err_string)); 1067 rte_cuda_log(ERR, "cuMemFree current failed with %s", 1068 err_string); 1069 rte_errno = EPERM; 1070 return -rte_errno; 1071 } 1072 1073 return mem_list_del_item(hk); 1074 } 1075 1076 rte_cuda_log(ERR, "Memory type %d not supported", mem_item->mtype); 1077 1078 return -EPERM; 1079 } 1080 1081 static int 1082 cuda_dev_close(struct rte_gpu *dev) 1083 { 1084 if (dev == NULL) 1085 return -EINVAL; 1086 1087 rte_free(dev->mpshared->dev_private); 1088 1089 return 0; 1090 } 1091 1092 static int 1093 cuda_wmb(struct rte_gpu *dev) 1094 { 1095 CUresult res; 1096 const char *err_string; 1097 CUcontext current_ctx; 1098 CUcontext input_ctx; 1099 struct cuda_info *private; 1100 1101 if (dev == NULL) { 1102 rte_errno = ENODEV; 1103 return -rte_errno; 1104 } 1105 1106 private = (struct cuda_info *)dev->mpshared->dev_private; 1107 1108 if (private->gdr_write_ordering != CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE) { 1109 /* 1110 * No need to explicitly force the write ordering because 1111 * the device natively supports it 1112 */ 1113 return 0; 1114 } 1115 1116 if (private->gdr_flush_type != CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST) { 1117 /* 1118 * Can't flush GDR writes with cuFlushGPUDirectRDMAWrites CUDA function. 1119 * Application needs to use alternative methods. 1120 */ 1121 rte_cuda_log(WARNING, "Can't flush GDR writes with cuFlushGPUDirectRDMAWrites CUDA function." 1122 "Application needs to use alternative methods."); 1123 1124 rte_errno = ENOTSUP; 1125 return -rte_errno; 1126 } 1127 1128 /* Store current ctx */ 1129 res = pfn_cuCtxGetCurrent(¤t_ctx); 1130 if (res != 0) { 1131 pfn_cuGetErrorString(res, &(err_string)); 1132 rte_cuda_log(ERR, "cuCtxGetCurrent failed with %s", 1133 err_string); 1134 rte_errno = EPERM; 1135 return -rte_errno; 1136 } 1137 1138 /* Set child ctx as current ctx */ 1139 input_ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); 1140 res = pfn_cuCtxSetCurrent(input_ctx); 1141 if (res != 0) { 1142 pfn_cuGetErrorString(res, &(err_string)); 1143 rte_cuda_log(ERR, "cuCtxSetCurrent input failed with %s", 1144 err_string); 1145 rte_errno = EPERM; 1146 return -rte_errno; 1147 } 1148 1149 res = pfn_cuFlushGPUDirectRDMAWrites(CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX, 1150 CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES); 1151 if (res != 0) { 1152 pfn_cuGetErrorString(res, &(err_string)); 1153 rte_cuda_log(ERR, "cuFlushGPUDirectRDMAWrites current failed with %s", 1154 err_string); 1155 rte_errno = EPERM; 1156 return -rte_errno; 1157 } 1158 1159 /* Restore original ctx as current ctx */ 1160 res = pfn_cuCtxSetCurrent(current_ctx); 1161 if (res != 0) { 1162 pfn_cuGetErrorString(res, &(err_string)); 1163 rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", 1164 err_string); 1165 rte_errno = EPERM; 1166 return -rte_errno; 1167 } 1168 1169 return 0; 1170 } 1171 1172 static int 1173 cuda_gpu_probe(__rte_unused struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) 1174 { 1175 struct rte_gpu *dev = NULL; 1176 CUresult res; 1177 CUdevice cu_dev_id; 1178 CUcontext pctx; 1179 char dev_name[RTE_DEV_NAME_MAX_LEN]; 1180 const char *err_string; 1181 int processor_count = 0; 1182 struct cuda_info *private; 1183 1184 if (pci_dev == NULL) { 1185 rte_cuda_log(ERR, "NULL PCI device"); 1186 rte_errno = ENODEV; 1187 return -rte_errno; 1188 } 1189 1190 rte_pci_device_name(&pci_dev->addr, dev_name, sizeof(dev_name)); 1191 1192 /* Allocate memory to be used privately by drivers */ 1193 dev = rte_gpu_allocate(pci_dev->device.name); 1194 if (dev == NULL) { 1195 rte_errno = ENODEV; 1196 return -rte_errno; 1197 } 1198 1199 /* Initialize values only for the first CUDA driver call */ 1200 if (dev->mpshared->info.dev_id == 0) { 1201 mem_alloc_list_head = NULL; 1202 mem_alloc_list_tail = NULL; 1203 mem_alloc_list_last_elem = 0; 1204 1205 /* Load libcuda.so library */ 1206 if (cuda_loader()) { 1207 rte_cuda_log(ERR, "CUDA Driver library not found"); 1208 rte_errno = ENOTSUP; 1209 return -rte_errno; 1210 } 1211 1212 /* Load initial CUDA functions */ 1213 if (cuda_sym_func_loader()) { 1214 rte_cuda_log(ERR, "CUDA functions not found in library"); 1215 rte_errno = ENOTSUP; 1216 return -rte_errno; 1217 } 1218 1219 /* 1220 * Required to initialize the CUDA Driver. 1221 * Multiple calls of cuInit() will return immediately 1222 * without making any relevant change 1223 */ 1224 sym_cuInit(0); 1225 1226 res = sym_cuDriverGetVersion(&cuda_driver_version); 1227 if (res != 0) { 1228 rte_cuda_log(ERR, "cuDriverGetVersion failed with %d", res); 1229 rte_errno = ENOTSUP; 1230 return -rte_errno; 1231 } 1232 1233 if (cuda_driver_version < CUDA_DRIVER_MIN_VERSION) { 1234 rte_cuda_log(ERR, "CUDA Driver version found is %d. " 1235 "Minimum requirement is %d", 1236 cuda_driver_version, 1237 CUDA_DRIVER_MIN_VERSION); 1238 rte_errno = ENOTSUP; 1239 return -rte_errno; 1240 } 1241 1242 if (cuda_pfn_func_loader()) { 1243 rte_cuda_log(ERR, "CUDA PFN functions not found in library"); 1244 rte_errno = ENOTSUP; 1245 return -rte_errno; 1246 } 1247 1248 gdrc_h = NULL; 1249 } 1250 1251 /* Fill HW specific part of device structure */ 1252 dev->device = &pci_dev->device; 1253 dev->mpshared->info.numa_node = pci_dev->device.numa_node; 1254 1255 /* Get NVIDIA GPU Device descriptor */ 1256 res = pfn_cuDeviceGetByPCIBusId(&cu_dev_id, dev->device->name); 1257 if (res != 0) { 1258 pfn_cuGetErrorString(res, &(err_string)); 1259 rte_cuda_log(ERR, "cuDeviceGetByPCIBusId name %s failed with %d: %s", 1260 dev->device->name, res, err_string); 1261 rte_errno = EPERM; 1262 return -rte_errno; 1263 } 1264 1265 res = pfn_cuDevicePrimaryCtxRetain(&pctx, cu_dev_id); 1266 if (res != 0) { 1267 pfn_cuGetErrorString(res, &(err_string)); 1268 rte_cuda_log(ERR, "cuDevicePrimaryCtxRetain name %s failed with %d: %s", 1269 dev->device->name, res, err_string); 1270 rte_errno = EPERM; 1271 return -rte_errno; 1272 } 1273 1274 res = pfn_cuCtxGetApiVersion(pctx, &cuda_api_version); 1275 if (res != 0) { 1276 rte_cuda_log(ERR, "cuCtxGetApiVersion failed with %d", res); 1277 rte_errno = ENOTSUP; 1278 return -rte_errno; 1279 } 1280 1281 if (cuda_api_version < CUDA_API_MIN_VERSION) { 1282 rte_cuda_log(ERR, "CUDA API version found is %d Minimum requirement is %d", 1283 cuda_api_version, CUDA_API_MIN_VERSION); 1284 rte_errno = ENOTSUP; 1285 return -rte_errno; 1286 } 1287 1288 dev->mpshared->info.context = (uint64_t)pctx; 1289 1290 /* 1291 * GPU Device generic info 1292 */ 1293 1294 /* Processor count */ 1295 res = pfn_cuDeviceGetAttribute(&(processor_count), 1296 CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, 1297 cu_dev_id); 1298 if (res != 0) { 1299 pfn_cuGetErrorString(res, &(err_string)); 1300 rte_cuda_log(ERR, "cuDeviceGetAttribute failed with %s", 1301 err_string); 1302 rte_errno = EPERM; 1303 return -rte_errno; 1304 } 1305 dev->mpshared->info.processor_count = (uint32_t)processor_count; 1306 1307 /* Total memory */ 1308 res = pfn_cuDeviceTotalMem(&dev->mpshared->info.total_memory, cu_dev_id); 1309 if (res != 0) { 1310 pfn_cuGetErrorString(res, &(err_string)); 1311 rte_cuda_log(ERR, "cuDeviceTotalMem failed with %s", 1312 err_string); 1313 rte_errno = EPERM; 1314 return -rte_errno; 1315 } 1316 1317 dev->mpshared->info.page_size = (size_t)GPU_PAGE_SIZE; 1318 1319 /* 1320 * GPU Device private info 1321 */ 1322 dev->mpshared->dev_private = rte_zmalloc(NULL, 1323 sizeof(struct cuda_info), 1324 RTE_CACHE_LINE_SIZE); 1325 if (dev->mpshared->dev_private == NULL) { 1326 rte_cuda_log(ERR, "Failed to allocate memory for GPU process private"); 1327 rte_errno = EPERM; 1328 return -rte_errno; 1329 } 1330 1331 private = (struct cuda_info *)dev->mpshared->dev_private; 1332 private->cu_dev = cu_dev_id; 1333 res = pfn_cuDeviceGetName(private->gpu_name, 1334 RTE_DEV_NAME_MAX_LEN, 1335 cu_dev_id); 1336 if (res != 0) { 1337 pfn_cuGetErrorString(res, &(err_string)); 1338 rte_cuda_log(ERR, "cuDeviceGetName failed with %s", 1339 err_string); 1340 rte_errno = EPERM; 1341 return -rte_errno; 1342 } 1343 1344 res = pfn_cuDeviceGetAttribute(&(private->gdr_supported), 1345 CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED, 1346 cu_dev_id); 1347 if (res != 0) { 1348 pfn_cuGetErrorString(res, &(err_string)); 1349 rte_cuda_log(ERR, "cuDeviceGetAttribute failed with %s", 1350 err_string); 1351 rte_errno = EPERM; 1352 return -rte_errno; 1353 } 1354 1355 if (private->gdr_supported == 0) 1356 rte_cuda_log(WARNING, "GPU %s doesn't support GPUDirect RDMA", 1357 pci_dev->device.name); 1358 1359 res = pfn_cuDeviceGetAttribute(&(private->gdr_write_ordering), 1360 CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING, 1361 cu_dev_id); 1362 if (res != 0) { 1363 pfn_cuGetErrorString(res, &(err_string)); 1364 rte_cuda_log(ERR, 1365 "cuDeviceGetAttribute failed with %s", 1366 err_string); 1367 rte_errno = EPERM; 1368 return -rte_errno; 1369 } 1370 1371 if (private->gdr_write_ordering == CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE) { 1372 res = pfn_cuDeviceGetAttribute(&(private->gdr_flush_type), 1373 CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS, 1374 cu_dev_id); 1375 if (res != 0) { 1376 pfn_cuGetErrorString(res, &(err_string)); 1377 rte_cuda_log(ERR, "cuDeviceGetAttribute failed with %s", 1378 err_string); 1379 rte_errno = EPERM; 1380 return -rte_errno; 1381 } 1382 1383 if (private->gdr_flush_type != CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST) 1384 rte_cuda_log(ERR, "GPUDirect RDMA flush writes API is not supported"); 1385 } 1386 1387 dev->ops.dev_info_get = cuda_dev_info_get; 1388 dev->ops.dev_close = cuda_dev_close; 1389 dev->ops.mem_alloc = cuda_mem_alloc; 1390 dev->ops.mem_free = cuda_mem_free; 1391 dev->ops.mem_register = cuda_mem_register; 1392 dev->ops.mem_unregister = cuda_mem_unregister; 1393 dev->ops.mem_cpu_map = cuda_mem_cpu_map; 1394 dev->ops.mem_cpu_unmap = cuda_mem_cpu_unmap; 1395 dev->ops.wmb = cuda_wmb; 1396 1397 rte_gpu_complete_new(dev); 1398 1399 rte_cuda_debug("dev id = %u name = %s", 1400 dev->mpshared->info.dev_id, private->gpu_name); 1401 1402 return 0; 1403 } 1404 1405 static int 1406 cuda_gpu_remove(struct rte_pci_device *pci_dev) 1407 { 1408 struct rte_gpu *dev; 1409 int ret; 1410 uint8_t gpu_id; 1411 1412 if (pci_dev == NULL) { 1413 rte_errno = ENODEV; 1414 return -rte_errno; 1415 } 1416 1417 dev = rte_gpu_get_by_name(pci_dev->device.name); 1418 if (dev == NULL) { 1419 rte_cuda_log(ERR, "Couldn't find HW dev \"%s\" to uninitialise it", 1420 pci_dev->device.name); 1421 rte_errno = ENODEV; 1422 return -rte_errno; 1423 } 1424 gpu_id = dev->mpshared->info.dev_id; 1425 1426 /* release dev from library */ 1427 ret = rte_gpu_release(dev); 1428 if (ret) 1429 rte_cuda_log(ERR, "Device %i failed to uninit: %i", gpu_id, ret); 1430 1431 rte_cuda_debug("Destroyed dev = %u", gpu_id); 1432 1433 return 0; 1434 } 1435 1436 static struct rte_pci_driver rte_cuda_driver = { 1437 .id_table = pci_id_cuda_map, 1438 .drv_flags = RTE_PCI_DRV_WC_ACTIVATE, 1439 .probe = cuda_gpu_probe, 1440 .remove = cuda_gpu_remove, 1441 }; 1442 1443 RTE_PMD_REGISTER_PCI(gpu_cuda, rte_cuda_driver); 1444 RTE_PMD_REGISTER_PCI_TABLE(gpu_cuda, pci_id_cuda_map); 1445 RTE_PMD_REGISTER_KMOD_DEP(gpu_cuda, "* nvidia & (nv_peer_mem | nvpeer_mem)"); 1446