1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2021 NVIDIA Corporation & Affiliates 3 */ 4 5 #include <rte_eal.h> 6 #include <rte_tailq.h> 7 #include <rte_rwlock.h> 8 #include <rte_string_fns.h> 9 #include <rte_memzone.h> 10 #include <rte_malloc.h> 11 #include <rte_errno.h> 12 #include <rte_log.h> 13 14 #include "rte_gpudev.h" 15 #include "gpudev_driver.h" 16 17 /* Logging */ 18 RTE_LOG_REGISTER_DEFAULT(gpu_logtype, NOTICE); 19 #define GPU_LOG(level, ...) \ 20 rte_log(RTE_LOG_ ## level, gpu_logtype, RTE_FMT("gpu: " \ 21 RTE_FMT_HEAD(__VA_ARGS__, ) "\n", RTE_FMT_TAIL(__VA_ARGS__, ))) 22 23 /* Set any driver error as EPERM */ 24 #define GPU_DRV_RET(function) \ 25 ((function != 0) ? -(rte_errno = EPERM) : (rte_errno = 0)) 26 27 /* Array of devices */ 28 static struct rte_gpu *gpus; 29 /* Number of currently valid devices */ 30 static int16_t gpu_max; 31 /* Number of currently valid devices */ 32 static int16_t gpu_count; 33 34 /* Shared memory between processes. */ 35 static const char *GPU_MEMZONE = "rte_gpu_shared"; 36 static struct { 37 __extension__ struct rte_gpu_mpshared gpus[0]; 38 } *gpu_shared_mem; 39 40 /* Event callback object */ 41 struct rte_gpu_callback { 42 TAILQ_ENTRY(rte_gpu_callback) next; 43 rte_gpu_callback_t *function; 44 void *user_data; 45 enum rte_gpu_event event; 46 }; 47 static rte_rwlock_t gpu_callback_lock = RTE_RWLOCK_INITIALIZER; 48 static void gpu_free_callbacks(struct rte_gpu *dev); 49 50 int 51 rte_gpu_init(size_t dev_max) 52 { 53 if (dev_max == 0 || dev_max > INT16_MAX) { 54 GPU_LOG(ERR, "invalid array size"); 55 rte_errno = EINVAL; 56 return -rte_errno; 57 } 58 59 /* No lock, it must be called before or during first probing. */ 60 if (gpus != NULL) { 61 GPU_LOG(ERR, "already initialized"); 62 rte_errno = EBUSY; 63 return -rte_errno; 64 } 65 66 gpus = calloc(dev_max, sizeof(struct rte_gpu)); 67 if (gpus == NULL) { 68 GPU_LOG(ERR, "cannot initialize library"); 69 rte_errno = ENOMEM; 70 return -rte_errno; 71 } 72 73 gpu_max = dev_max; 74 return 0; 75 } 76 77 uint16_t 78 rte_gpu_count_avail(void) 79 { 80 return gpu_count; 81 } 82 83 bool 84 rte_gpu_is_valid(int16_t dev_id) 85 { 86 if (dev_id >= 0 && dev_id < gpu_max && 87 gpus[dev_id].process_state == RTE_GPU_STATE_INITIALIZED) 88 return true; 89 return false; 90 } 91 92 static bool 93 gpu_match_parent(int16_t dev_id, int16_t parent) 94 { 95 if (parent == RTE_GPU_ID_ANY) 96 return true; 97 return gpus[dev_id].mpshared->info.parent == parent; 98 } 99 100 int16_t 101 rte_gpu_find_next(int16_t dev_id, int16_t parent) 102 { 103 if (dev_id < 0) 104 dev_id = 0; 105 while (dev_id < gpu_max && 106 (gpus[dev_id].process_state == RTE_GPU_STATE_UNUSED || 107 !gpu_match_parent(dev_id, parent))) 108 dev_id++; 109 110 if (dev_id >= gpu_max) 111 return RTE_GPU_ID_NONE; 112 return dev_id; 113 } 114 115 static int16_t 116 gpu_find_free_id(void) 117 { 118 int16_t dev_id; 119 120 for (dev_id = 0; dev_id < gpu_max; dev_id++) { 121 if (gpus[dev_id].process_state == RTE_GPU_STATE_UNUSED) 122 return dev_id; 123 } 124 return RTE_GPU_ID_NONE; 125 } 126 127 static struct rte_gpu * 128 gpu_get_by_id(int16_t dev_id) 129 { 130 if (!rte_gpu_is_valid(dev_id)) 131 return NULL; 132 return &gpus[dev_id]; 133 } 134 135 struct rte_gpu * 136 rte_gpu_get_by_name(const char *name) 137 { 138 int16_t dev_id; 139 struct rte_gpu *dev; 140 141 if (name == NULL) { 142 rte_errno = EINVAL; 143 return NULL; 144 } 145 146 RTE_GPU_FOREACH(dev_id) { 147 dev = &gpus[dev_id]; 148 if (strncmp(name, dev->mpshared->name, RTE_DEV_NAME_MAX_LEN) == 0) 149 return dev; 150 } 151 return NULL; 152 } 153 154 static int 155 gpu_shared_mem_init(void) 156 { 157 const struct rte_memzone *memzone; 158 159 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 160 memzone = rte_memzone_reserve(GPU_MEMZONE, 161 sizeof(*gpu_shared_mem) + 162 sizeof(*gpu_shared_mem->gpus) * gpu_max, 163 SOCKET_ID_ANY, 0); 164 } else { 165 memzone = rte_memzone_lookup(GPU_MEMZONE); 166 } 167 if (memzone == NULL) { 168 GPU_LOG(ERR, "cannot initialize shared memory"); 169 rte_errno = ENOMEM; 170 return -rte_errno; 171 } 172 173 gpu_shared_mem = memzone->addr; 174 return 0; 175 } 176 177 struct rte_gpu * 178 rte_gpu_allocate(const char *name) 179 { 180 int16_t dev_id; 181 struct rte_gpu *dev; 182 183 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 184 GPU_LOG(ERR, "only primary process can allocate device"); 185 rte_errno = EPERM; 186 return NULL; 187 } 188 if (name == NULL) { 189 GPU_LOG(ERR, "allocate device without a name"); 190 rte_errno = EINVAL; 191 return NULL; 192 } 193 194 /* implicit initialization of library before adding first device */ 195 if (gpus == NULL && rte_gpu_init(RTE_GPU_DEFAULT_MAX) < 0) 196 return NULL; 197 198 /* initialize shared memory before adding first device */ 199 if (gpu_shared_mem == NULL && gpu_shared_mem_init() < 0) 200 return NULL; 201 202 if (rte_gpu_get_by_name(name) != NULL) { 203 GPU_LOG(ERR, "device with name %s already exists", name); 204 rte_errno = EEXIST; 205 return NULL; 206 } 207 dev_id = gpu_find_free_id(); 208 if (dev_id == RTE_GPU_ID_NONE) { 209 GPU_LOG(ERR, "reached maximum number of devices"); 210 rte_errno = ENOENT; 211 return NULL; 212 } 213 214 dev = &gpus[dev_id]; 215 memset(dev, 0, sizeof(*dev)); 216 217 dev->mpshared = &gpu_shared_mem->gpus[dev_id]; 218 memset(dev->mpshared, 0, sizeof(*dev->mpshared)); 219 220 if (rte_strscpy(dev->mpshared->name, name, RTE_DEV_NAME_MAX_LEN) < 0) { 221 GPU_LOG(ERR, "device name too long: %s", name); 222 rte_errno = ENAMETOOLONG; 223 return NULL; 224 } 225 dev->mpshared->info.name = dev->mpshared->name; 226 dev->mpshared->info.dev_id = dev_id; 227 dev->mpshared->info.numa_node = -1; 228 dev->mpshared->info.parent = RTE_GPU_ID_NONE; 229 TAILQ_INIT(&dev->callbacks); 230 __atomic_fetch_add(&dev->mpshared->process_refcnt, 1, __ATOMIC_RELAXED); 231 232 gpu_count++; 233 GPU_LOG(DEBUG, "new device %s (id %d) of total %d", 234 name, dev_id, gpu_count); 235 return dev; 236 } 237 238 struct rte_gpu * 239 rte_gpu_attach(const char *name) 240 { 241 int16_t dev_id; 242 struct rte_gpu *dev; 243 struct rte_gpu_mpshared *shared_dev; 244 245 if (rte_eal_process_type() != RTE_PROC_SECONDARY) { 246 GPU_LOG(ERR, "only secondary process can attach device"); 247 rte_errno = EPERM; 248 return NULL; 249 } 250 if (name == NULL) { 251 GPU_LOG(ERR, "attach device without a name"); 252 rte_errno = EINVAL; 253 return NULL; 254 } 255 256 /* implicit initialization of library before adding first device */ 257 if (gpus == NULL && rte_gpu_init(RTE_GPU_DEFAULT_MAX) < 0) 258 return NULL; 259 260 /* initialize shared memory before adding first device */ 261 if (gpu_shared_mem == NULL && gpu_shared_mem_init() < 0) 262 return NULL; 263 264 for (dev_id = 0; dev_id < gpu_max; dev_id++) { 265 shared_dev = &gpu_shared_mem->gpus[dev_id]; 266 if (strncmp(name, shared_dev->name, RTE_DEV_NAME_MAX_LEN) == 0) 267 break; 268 } 269 if (dev_id >= gpu_max) { 270 GPU_LOG(ERR, "device with name %s not found", name); 271 rte_errno = ENOENT; 272 return NULL; 273 } 274 dev = &gpus[dev_id]; 275 memset(dev, 0, sizeof(*dev)); 276 277 TAILQ_INIT(&dev->callbacks); 278 dev->mpshared = shared_dev; 279 __atomic_fetch_add(&dev->mpshared->process_refcnt, 1, __ATOMIC_RELAXED); 280 281 gpu_count++; 282 GPU_LOG(DEBUG, "attached device %s (id %d) of total %d", 283 name, dev_id, gpu_count); 284 return dev; 285 } 286 287 int16_t 288 rte_gpu_add_child(const char *name, int16_t parent, uint64_t child_context) 289 { 290 struct rte_gpu *dev; 291 292 if (!rte_gpu_is_valid(parent)) { 293 GPU_LOG(ERR, "add child to invalid parent ID %d", parent); 294 rte_errno = ENODEV; 295 return -rte_errno; 296 } 297 298 dev = rte_gpu_allocate(name); 299 if (dev == NULL) 300 return -rte_errno; 301 302 dev->mpshared->info.parent = parent; 303 dev->mpshared->info.context = child_context; 304 305 rte_gpu_complete_new(dev); 306 return dev->mpshared->info.dev_id; 307 } 308 309 void 310 rte_gpu_complete_new(struct rte_gpu *dev) 311 { 312 if (dev == NULL) 313 return; 314 315 dev->process_state = RTE_GPU_STATE_INITIALIZED; 316 rte_gpu_notify(dev, RTE_GPU_EVENT_NEW); 317 } 318 319 int 320 rte_gpu_release(struct rte_gpu *dev) 321 { 322 int16_t dev_id, child; 323 324 if (dev == NULL) { 325 rte_errno = ENODEV; 326 return -rte_errno; 327 } 328 dev_id = dev->mpshared->info.dev_id; 329 RTE_GPU_FOREACH_CHILD(child, dev_id) { 330 GPU_LOG(ERR, "cannot release device %d with child %d", 331 dev_id, child); 332 rte_errno = EBUSY; 333 return -rte_errno; 334 } 335 336 GPU_LOG(DEBUG, "free device %s (id %d)", 337 dev->mpshared->info.name, dev->mpshared->info.dev_id); 338 rte_gpu_notify(dev, RTE_GPU_EVENT_DEL); 339 340 gpu_free_callbacks(dev); 341 dev->process_state = RTE_GPU_STATE_UNUSED; 342 __atomic_fetch_sub(&dev->mpshared->process_refcnt, 1, __ATOMIC_RELAXED); 343 gpu_count--; 344 345 return 0; 346 } 347 348 int 349 rte_gpu_close(int16_t dev_id) 350 { 351 int firsterr, binerr; 352 int *lasterr = &firsterr; 353 struct rte_gpu *dev; 354 355 dev = gpu_get_by_id(dev_id); 356 if (dev == NULL) { 357 GPU_LOG(ERR, "close invalid device ID %d", dev_id); 358 rte_errno = ENODEV; 359 return -rte_errno; 360 } 361 362 if (dev->ops.dev_close != NULL) { 363 *lasterr = GPU_DRV_RET(dev->ops.dev_close(dev)); 364 if (*lasterr != 0) 365 lasterr = &binerr; 366 } 367 368 *lasterr = rte_gpu_release(dev); 369 370 rte_errno = -firsterr; 371 return firsterr; 372 } 373 374 int 375 rte_gpu_callback_register(int16_t dev_id, enum rte_gpu_event event, 376 rte_gpu_callback_t *function, void *user_data) 377 { 378 int16_t next_dev, last_dev; 379 struct rte_gpu_callback_list *callbacks; 380 struct rte_gpu_callback *callback; 381 382 if (!rte_gpu_is_valid(dev_id) && dev_id != RTE_GPU_ID_ANY) { 383 GPU_LOG(ERR, "register callback of invalid ID %d", dev_id); 384 rte_errno = ENODEV; 385 return -rte_errno; 386 } 387 if (function == NULL) { 388 GPU_LOG(ERR, "cannot register callback without function"); 389 rte_errno = EINVAL; 390 return -rte_errno; 391 } 392 393 if (dev_id == RTE_GPU_ID_ANY) { 394 next_dev = 0; 395 last_dev = gpu_max - 1; 396 } else { 397 next_dev = last_dev = dev_id; 398 } 399 400 rte_rwlock_write_lock(&gpu_callback_lock); 401 do { 402 callbacks = &gpus[next_dev].callbacks; 403 404 /* check if not already registered */ 405 TAILQ_FOREACH(callback, callbacks, next) { 406 if (callback->event == event && 407 callback->function == function && 408 callback->user_data == user_data) { 409 GPU_LOG(INFO, "callback already registered"); 410 return 0; 411 } 412 } 413 414 callback = malloc(sizeof(*callback)); 415 if (callback == NULL) { 416 GPU_LOG(ERR, "cannot allocate callback"); 417 return -ENOMEM; 418 } 419 callback->function = function; 420 callback->user_data = user_data; 421 callback->event = event; 422 TAILQ_INSERT_TAIL(callbacks, callback, next); 423 424 } while (++next_dev <= last_dev); 425 rte_rwlock_write_unlock(&gpu_callback_lock); 426 427 return 0; 428 } 429 430 int 431 rte_gpu_callback_unregister(int16_t dev_id, enum rte_gpu_event event, 432 rte_gpu_callback_t *function, void *user_data) 433 { 434 int16_t next_dev, last_dev; 435 struct rte_gpu_callback_list *callbacks; 436 struct rte_gpu_callback *callback, *nextcb; 437 438 if (!rte_gpu_is_valid(dev_id) && dev_id != RTE_GPU_ID_ANY) { 439 GPU_LOG(ERR, "unregister callback of invalid ID %d", dev_id); 440 rte_errno = ENODEV; 441 return -rte_errno; 442 } 443 if (function == NULL) { 444 GPU_LOG(ERR, "cannot unregister callback without function"); 445 rte_errno = EINVAL; 446 return -rte_errno; 447 } 448 449 if (dev_id == RTE_GPU_ID_ANY) { 450 next_dev = 0; 451 last_dev = gpu_max - 1; 452 } else { 453 next_dev = last_dev = dev_id; 454 } 455 456 rte_rwlock_write_lock(&gpu_callback_lock); 457 do { 458 callbacks = &gpus[next_dev].callbacks; 459 RTE_TAILQ_FOREACH_SAFE(callback, callbacks, next, nextcb) { 460 if (callback->event != event || 461 callback->function != function || 462 (callback->user_data != user_data && 463 user_data != (void *)-1)) 464 continue; 465 TAILQ_REMOVE(callbacks, callback, next); 466 free(callback); 467 } 468 } while (++next_dev <= last_dev); 469 rte_rwlock_write_unlock(&gpu_callback_lock); 470 471 return 0; 472 } 473 474 static void 475 gpu_free_callbacks(struct rte_gpu *dev) 476 { 477 struct rte_gpu_callback_list *callbacks; 478 struct rte_gpu_callback *callback, *nextcb; 479 480 callbacks = &dev->callbacks; 481 rte_rwlock_write_lock(&gpu_callback_lock); 482 RTE_TAILQ_FOREACH_SAFE(callback, callbacks, next, nextcb) { 483 TAILQ_REMOVE(callbacks, callback, next); 484 free(callback); 485 } 486 rte_rwlock_write_unlock(&gpu_callback_lock); 487 } 488 489 void 490 rte_gpu_notify(struct rte_gpu *dev, enum rte_gpu_event event) 491 { 492 int16_t dev_id; 493 struct rte_gpu_callback *callback; 494 495 dev_id = dev->mpshared->info.dev_id; 496 rte_rwlock_read_lock(&gpu_callback_lock); 497 TAILQ_FOREACH(callback, &dev->callbacks, next) { 498 if (callback->event != event || callback->function == NULL) 499 continue; 500 callback->function(dev_id, event, callback->user_data); 501 } 502 rte_rwlock_read_unlock(&gpu_callback_lock); 503 } 504 505 int 506 rte_gpu_info_get(int16_t dev_id, struct rte_gpu_info *info) 507 { 508 struct rte_gpu *dev; 509 510 dev = gpu_get_by_id(dev_id); 511 if (dev == NULL) { 512 GPU_LOG(ERR, "query invalid device ID %d", dev_id); 513 rte_errno = ENODEV; 514 return -rte_errno; 515 } 516 if (info == NULL) { 517 GPU_LOG(ERR, "query without storage"); 518 rte_errno = EINVAL; 519 return -rte_errno; 520 } 521 522 if (dev->ops.dev_info_get == NULL) { 523 *info = dev->mpshared->info; 524 return 0; 525 } 526 return GPU_DRV_RET(dev->ops.dev_info_get(dev, info)); 527 } 528 529 void * 530 rte_gpu_mem_alloc(int16_t dev_id, size_t size, unsigned int align) 531 { 532 struct rte_gpu *dev; 533 void *ptr; 534 int ret; 535 536 dev = gpu_get_by_id(dev_id); 537 if (dev == NULL) { 538 GPU_LOG(ERR, "alloc mem for invalid device ID %d", dev_id); 539 rte_errno = ENODEV; 540 return NULL; 541 } 542 543 if (dev->ops.mem_alloc == NULL) { 544 GPU_LOG(ERR, "mem allocation not supported"); 545 rte_errno = ENOTSUP; 546 return NULL; 547 } 548 549 if (size == 0) /* dry-run */ 550 return NULL; 551 552 if (align && !rte_is_power_of_2(align)) { 553 GPU_LOG(ERR, "requested alignment is not a power of two %u", align); 554 rte_errno = EINVAL; 555 return NULL; 556 } 557 558 ret = dev->ops.mem_alloc(dev, size, align, &ptr); 559 560 switch (ret) { 561 case 0: 562 return ptr; 563 case -ENOMEM: 564 case -E2BIG: 565 rte_errno = -ret; 566 return NULL; 567 default: 568 rte_errno = -EPERM; 569 return NULL; 570 } 571 } 572 573 int 574 rte_gpu_mem_free(int16_t dev_id, void *ptr) 575 { 576 struct rte_gpu *dev; 577 578 dev = gpu_get_by_id(dev_id); 579 if (dev == NULL) { 580 GPU_LOG(ERR, "free mem for invalid device ID %d", dev_id); 581 rte_errno = ENODEV; 582 return -rte_errno; 583 } 584 585 if (dev->ops.mem_free == NULL) { 586 rte_errno = ENOTSUP; 587 return -rte_errno; 588 } 589 590 if (ptr == NULL) /* dry-run */ 591 return 0; 592 593 return GPU_DRV_RET(dev->ops.mem_free(dev, ptr)); 594 } 595 596 int 597 rte_gpu_mem_register(int16_t dev_id, size_t size, void *ptr) 598 { 599 struct rte_gpu *dev; 600 601 dev = gpu_get_by_id(dev_id); 602 if (dev == NULL) { 603 GPU_LOG(ERR, "alloc mem for invalid device ID %d", dev_id); 604 rte_errno = ENODEV; 605 return -rte_errno; 606 } 607 608 if (dev->ops.mem_register == NULL) { 609 GPU_LOG(ERR, "mem registration not supported"); 610 rte_errno = ENOTSUP; 611 return -rte_errno; 612 } 613 614 if (ptr == NULL || size == 0) /* dry-run */ 615 return 0; 616 617 return GPU_DRV_RET(dev->ops.mem_register(dev, size, ptr)); 618 } 619 620 int 621 rte_gpu_mem_unregister(int16_t dev_id, void *ptr) 622 { 623 struct rte_gpu *dev; 624 625 dev = gpu_get_by_id(dev_id); 626 if (dev == NULL) { 627 GPU_LOG(ERR, "unregister mem for invalid device ID %d", dev_id); 628 rte_errno = ENODEV; 629 return -rte_errno; 630 } 631 632 if (dev->ops.mem_unregister == NULL) { 633 rte_errno = ENOTSUP; 634 return -rte_errno; 635 } 636 637 if (ptr == NULL) /* dry-run */ 638 return 0; 639 640 return GPU_DRV_RET(dev->ops.mem_unregister(dev, ptr)); 641 } 642 643 void * 644 rte_gpu_mem_cpu_map(int16_t dev_id, size_t size, void *ptr) 645 { 646 struct rte_gpu *dev; 647 void *ptr_out; 648 int ret; 649 650 dev = gpu_get_by_id(dev_id); 651 if (dev == NULL) { 652 GPU_LOG(ERR, "mem CPU map for invalid device ID %d", dev_id); 653 rte_errno = ENODEV; 654 return NULL; 655 } 656 657 if (dev->ops.mem_cpu_map == NULL) { 658 GPU_LOG(ERR, "mem CPU map not supported"); 659 rte_errno = ENOTSUP; 660 return NULL; 661 } 662 663 if (ptr == NULL || size == 0) /* dry-run */ 664 return NULL; 665 666 ret = GPU_DRV_RET(dev->ops.mem_cpu_map(dev, size, ptr, &ptr_out)); 667 668 switch (ret) { 669 case 0: 670 return ptr_out; 671 case -ENOMEM: 672 case -E2BIG: 673 rte_errno = -ret; 674 return NULL; 675 default: 676 rte_errno = -EPERM; 677 return NULL; 678 } 679 } 680 681 int 682 rte_gpu_mem_cpu_unmap(int16_t dev_id, void *ptr) 683 { 684 struct rte_gpu *dev; 685 686 dev = gpu_get_by_id(dev_id); 687 if (dev == NULL) { 688 GPU_LOG(ERR, "cpu_unmap mem for invalid device ID %d", dev_id); 689 rte_errno = ENODEV; 690 return -rte_errno; 691 } 692 693 if (dev->ops.mem_cpu_unmap == NULL) { 694 rte_errno = ENOTSUP; 695 return -rte_errno; 696 } 697 698 if (ptr == NULL) /* dry-run */ 699 return 0; 700 701 return GPU_DRV_RET(dev->ops.mem_cpu_unmap(dev, ptr)); 702 } 703 704 int 705 rte_gpu_wmb(int16_t dev_id) 706 { 707 struct rte_gpu *dev; 708 709 dev = gpu_get_by_id(dev_id); 710 if (dev == NULL) { 711 GPU_LOG(ERR, "memory barrier for invalid device ID %d", dev_id); 712 rte_errno = ENODEV; 713 return -rte_errno; 714 } 715 716 if (dev->ops.wmb == NULL) { 717 rte_errno = ENOTSUP; 718 return -rte_errno; 719 } 720 return GPU_DRV_RET(dev->ops.wmb(dev)); 721 } 722 723 int 724 rte_gpu_comm_create_flag(uint16_t dev_id, struct rte_gpu_comm_flag *devflag, 725 enum rte_gpu_comm_flag_type mtype) 726 { 727 size_t flag_size; 728 int ret; 729 730 if (devflag == NULL) { 731 rte_errno = EINVAL; 732 return -rte_errno; 733 } 734 if (mtype != RTE_GPU_COMM_FLAG_CPU) { 735 rte_errno = EINVAL; 736 return -rte_errno; 737 } 738 739 flag_size = sizeof(uint32_t); 740 741 devflag->ptr = rte_zmalloc(NULL, flag_size, 0); 742 if (devflag->ptr == NULL) { 743 rte_errno = ENOMEM; 744 return -rte_errno; 745 } 746 747 ret = rte_gpu_mem_register(dev_id, flag_size, devflag->ptr); 748 if (ret < 0) { 749 rte_errno = ENOMEM; 750 return -rte_errno; 751 } 752 753 devflag->mtype = mtype; 754 devflag->dev_id = dev_id; 755 756 return 0; 757 } 758 759 int 760 rte_gpu_comm_destroy_flag(struct rte_gpu_comm_flag *devflag) 761 { 762 int ret; 763 764 if (devflag == NULL) { 765 rte_errno = EINVAL; 766 return -rte_errno; 767 } 768 769 ret = rte_gpu_mem_unregister(devflag->dev_id, devflag->ptr); 770 if (ret < 0) { 771 rte_errno = EINVAL; 772 return -1; 773 } 774 775 rte_free(devflag->ptr); 776 777 return 0; 778 } 779 780 int 781 rte_gpu_comm_set_flag(struct rte_gpu_comm_flag *devflag, uint32_t val) 782 { 783 if (devflag == NULL) { 784 rte_errno = EINVAL; 785 return -rte_errno; 786 } 787 788 if (devflag->mtype != RTE_GPU_COMM_FLAG_CPU) { 789 rte_errno = EINVAL; 790 return -rte_errno; 791 } 792 793 RTE_GPU_VOLATILE(*devflag->ptr) = val; 794 795 return 0; 796 } 797 798 int 799 rte_gpu_comm_get_flag_value(struct rte_gpu_comm_flag *devflag, uint32_t *val) 800 { 801 if (devflag == NULL) { 802 rte_errno = EINVAL; 803 return -rte_errno; 804 } 805 if (devflag->mtype != RTE_GPU_COMM_FLAG_CPU) { 806 rte_errno = EINVAL; 807 return -rte_errno; 808 } 809 810 *val = RTE_GPU_VOLATILE(*devflag->ptr); 811 812 return 0; 813 } 814 815 struct rte_gpu_comm_list * 816 rte_gpu_comm_create_list(uint16_t dev_id, 817 uint32_t num_comm_items) 818 { 819 struct rte_gpu_comm_list *comm_list; 820 uint32_t idx_l; 821 int ret; 822 struct rte_gpu *dev; 823 824 if (num_comm_items == 0) { 825 rte_errno = EINVAL; 826 return NULL; 827 } 828 829 dev = gpu_get_by_id(dev_id); 830 if (dev == NULL) { 831 GPU_LOG(ERR, "memory barrier for invalid device ID %d", dev_id); 832 rte_errno = ENODEV; 833 return NULL; 834 } 835 836 comm_list = rte_zmalloc(NULL, 837 sizeof(struct rte_gpu_comm_list) * num_comm_items, 0); 838 if (comm_list == NULL) { 839 rte_errno = ENOMEM; 840 return NULL; 841 } 842 843 ret = rte_gpu_mem_register(dev_id, 844 sizeof(struct rte_gpu_comm_list) * num_comm_items, comm_list); 845 if (ret < 0) { 846 rte_errno = ENOMEM; 847 return NULL; 848 } 849 850 for (idx_l = 0; idx_l < num_comm_items; idx_l++) { 851 comm_list[idx_l].pkt_list = rte_zmalloc(NULL, 852 sizeof(struct rte_gpu_comm_pkt) * RTE_GPU_COMM_LIST_PKTS_MAX, 0); 853 if (comm_list[idx_l].pkt_list == NULL) { 854 rte_errno = ENOMEM; 855 return NULL; 856 } 857 858 ret = rte_gpu_mem_register(dev_id, 859 sizeof(struct rte_gpu_comm_pkt) * RTE_GPU_COMM_LIST_PKTS_MAX, 860 comm_list[idx_l].pkt_list); 861 if (ret < 0) { 862 rte_errno = ENOMEM; 863 return NULL; 864 } 865 866 RTE_GPU_VOLATILE(comm_list[idx_l].status) = RTE_GPU_COMM_LIST_FREE; 867 comm_list[idx_l].num_pkts = 0; 868 comm_list[idx_l].dev_id = dev_id; 869 870 comm_list[idx_l].mbufs = rte_zmalloc(NULL, 871 sizeof(struct rte_mbuf *) * RTE_GPU_COMM_LIST_PKTS_MAX, 0); 872 if (comm_list[idx_l].mbufs == NULL) { 873 rte_errno = ENOMEM; 874 return NULL; 875 } 876 } 877 878 return comm_list; 879 } 880 881 int 882 rte_gpu_comm_destroy_list(struct rte_gpu_comm_list *comm_list, 883 uint32_t num_comm_items) 884 { 885 uint32_t idx_l; 886 int ret; 887 uint16_t dev_id; 888 889 if (comm_list == NULL) { 890 rte_errno = EINVAL; 891 return -rte_errno; 892 } 893 894 dev_id = comm_list[0].dev_id; 895 896 for (idx_l = 0; idx_l < num_comm_items; idx_l++) { 897 ret = rte_gpu_mem_unregister(dev_id, comm_list[idx_l].pkt_list); 898 if (ret < 0) { 899 rte_errno = EINVAL; 900 return -1; 901 } 902 903 rte_free(comm_list[idx_l].pkt_list); 904 rte_free(comm_list[idx_l].mbufs); 905 } 906 907 ret = rte_gpu_mem_unregister(dev_id, comm_list); 908 if (ret < 0) { 909 rte_errno = EINVAL; 910 return -1; 911 } 912 913 rte_free(comm_list); 914 915 return 0; 916 } 917 918 int 919 rte_gpu_comm_populate_list_pkts(struct rte_gpu_comm_list *comm_list_item, 920 struct rte_mbuf **mbufs, uint32_t num_mbufs) 921 { 922 uint32_t idx; 923 924 if (comm_list_item == NULL || comm_list_item->pkt_list == NULL || 925 mbufs == NULL || num_mbufs > RTE_GPU_COMM_LIST_PKTS_MAX) { 926 rte_errno = EINVAL; 927 return -rte_errno; 928 } 929 930 for (idx = 0; idx < num_mbufs; idx++) { 931 /* support only unchained mbufs */ 932 if (unlikely((mbufs[idx]->nb_segs > 1) || 933 (mbufs[idx]->next != NULL) || 934 (mbufs[idx]->data_len != mbufs[idx]->pkt_len))) { 935 rte_errno = ENOTSUP; 936 return -rte_errno; 937 } 938 comm_list_item->pkt_list[idx].addr = 939 rte_pktmbuf_mtod_offset(mbufs[idx], uintptr_t, 0); 940 comm_list_item->pkt_list[idx].size = mbufs[idx]->pkt_len; 941 comm_list_item->mbufs[idx] = mbufs[idx]; 942 } 943 944 RTE_GPU_VOLATILE(comm_list_item->num_pkts) = num_mbufs; 945 rte_gpu_wmb(comm_list_item->dev_id); 946 RTE_GPU_VOLATILE(comm_list_item->status) = RTE_GPU_COMM_LIST_READY; 947 948 return 0; 949 } 950 951 int 952 rte_gpu_comm_cleanup_list(struct rte_gpu_comm_list *comm_list_item) 953 { 954 uint32_t idx = 0; 955 956 if (comm_list_item == NULL) { 957 rte_errno = EINVAL; 958 return -rte_errno; 959 } 960 961 if (RTE_GPU_VOLATILE(comm_list_item->status) == 962 RTE_GPU_COMM_LIST_READY) { 963 GPU_LOG(ERR, "packet list is still in progress"); 964 rte_errno = EINVAL; 965 return -rte_errno; 966 } 967 968 for (idx = 0; idx < RTE_GPU_COMM_LIST_PKTS_MAX; idx++) { 969 if (comm_list_item->pkt_list[idx].addr == 0) 970 break; 971 972 comm_list_item->pkt_list[idx].addr = 0; 973 comm_list_item->pkt_list[idx].size = 0; 974 comm_list_item->mbufs[idx] = NULL; 975 } 976 977 RTE_GPU_VOLATILE(comm_list_item->status) = RTE_GPU_COMM_LIST_FREE; 978 RTE_GPU_VOLATILE(comm_list_item->num_pkts) = 0; 979 rte_mb(); 980 981 return 0; 982 } 983