1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016 RehiveTech. All rights reserved. 3 */ 4 5 #include <string.h> 6 #include <inttypes.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <stdint.h> 10 #include <stdbool.h> 11 #include <sys/queue.h> 12 13 #include <rte_eal.h> 14 #include <dev_driver.h> 15 #include <bus_driver.h> 16 #include <rte_common.h> 17 #include <rte_devargs.h> 18 #include <rte_memory.h> 19 #include <rte_tailq.h> 20 #include <rte_spinlock.h> 21 #include <rte_string_fns.h> 22 #include <rte_errno.h> 23 24 #include "bus_vdev_driver.h" 25 #include "vdev_logs.h" 26 #include "vdev_private.h" 27 28 #define VDEV_MP_KEY "bus_vdev_mp" 29 30 /* Forward declare to access virtual bus name */ 31 static struct rte_bus rte_vdev_bus; 32 33 34 static TAILQ_HEAD(, rte_vdev_device) vdev_device_list = 35 TAILQ_HEAD_INITIALIZER(vdev_device_list); 36 /* The lock needs to be recursive because a vdev can manage another vdev. */ 37 static rte_spinlock_recursive_t vdev_device_list_lock = 38 RTE_SPINLOCK_RECURSIVE_INITIALIZER; 39 40 static TAILQ_HEAD(, rte_vdev_driver) vdev_driver_list = 41 TAILQ_HEAD_INITIALIZER(vdev_driver_list); 42 43 struct vdev_custom_scan { 44 TAILQ_ENTRY(vdev_custom_scan) next; 45 rte_vdev_scan_callback callback; 46 void *user_arg; 47 }; 48 TAILQ_HEAD(vdev_custom_scans, vdev_custom_scan); 49 static struct vdev_custom_scans vdev_custom_scans = 50 TAILQ_HEAD_INITIALIZER(vdev_custom_scans); 51 static rte_spinlock_t vdev_custom_scan_lock = RTE_SPINLOCK_INITIALIZER; 52 53 /* register a driver */ 54 void 55 rte_vdev_register(struct rte_vdev_driver *driver) 56 { 57 TAILQ_INSERT_TAIL(&vdev_driver_list, driver, next); 58 } 59 60 /* unregister a driver */ 61 void 62 rte_vdev_unregister(struct rte_vdev_driver *driver) 63 { 64 TAILQ_REMOVE(&vdev_driver_list, driver, next); 65 } 66 67 int 68 rte_vdev_add_custom_scan(rte_vdev_scan_callback callback, void *user_arg) 69 { 70 struct vdev_custom_scan *custom_scan; 71 72 rte_spinlock_lock(&vdev_custom_scan_lock); 73 74 /* check if already registered */ 75 TAILQ_FOREACH(custom_scan, &vdev_custom_scans, next) { 76 if (custom_scan->callback == callback && 77 custom_scan->user_arg == user_arg) 78 break; 79 } 80 81 if (custom_scan == NULL) { 82 custom_scan = malloc(sizeof(struct vdev_custom_scan)); 83 if (custom_scan != NULL) { 84 custom_scan->callback = callback; 85 custom_scan->user_arg = user_arg; 86 TAILQ_INSERT_TAIL(&vdev_custom_scans, custom_scan, next); 87 } 88 } 89 90 rte_spinlock_unlock(&vdev_custom_scan_lock); 91 92 return (custom_scan == NULL) ? -1 : 0; 93 } 94 95 int 96 rte_vdev_remove_custom_scan(rte_vdev_scan_callback callback, void *user_arg) 97 { 98 struct vdev_custom_scan *custom_scan, *tmp_scan; 99 100 rte_spinlock_lock(&vdev_custom_scan_lock); 101 RTE_TAILQ_FOREACH_SAFE(custom_scan, &vdev_custom_scans, next, 102 tmp_scan) { 103 if (custom_scan->callback != callback || 104 (custom_scan->user_arg != (void *)-1 && 105 custom_scan->user_arg != user_arg)) 106 continue; 107 TAILQ_REMOVE(&vdev_custom_scans, custom_scan, next); 108 free(custom_scan); 109 } 110 rte_spinlock_unlock(&vdev_custom_scan_lock); 111 112 return 0; 113 } 114 115 static int 116 vdev_parse(const char *name, void *addr) 117 { 118 struct rte_vdev_driver **out = addr; 119 struct rte_vdev_driver *driver = NULL; 120 121 TAILQ_FOREACH(driver, &vdev_driver_list, next) { 122 if (strncmp(driver->driver.name, name, 123 strlen(driver->driver.name)) == 0) 124 break; 125 if (driver->driver.alias && 126 strncmp(driver->driver.alias, name, 127 strlen(driver->driver.alias)) == 0) 128 break; 129 } 130 if (driver != NULL && 131 addr != NULL) 132 *out = driver; 133 return driver == NULL; 134 } 135 136 static int 137 vdev_dma_map(struct rte_device *dev, void *addr, uint64_t iova, size_t len) 138 { 139 struct rte_vdev_device *vdev = RTE_DEV_TO_VDEV(dev); 140 const struct rte_vdev_driver *driver; 141 142 if (!vdev) { 143 rte_errno = EINVAL; 144 return -1; 145 } 146 147 if (!vdev->device.driver) { 148 VDEV_LOG(DEBUG, "no driver attach to device %s", dev->name); 149 return 1; 150 } 151 152 driver = container_of(vdev->device.driver, const struct rte_vdev_driver, 153 driver); 154 155 if (driver->dma_map) 156 return driver->dma_map(vdev, addr, iova, len); 157 158 return 0; 159 } 160 161 static int 162 vdev_dma_unmap(struct rte_device *dev, void *addr, uint64_t iova, size_t len) 163 { 164 struct rte_vdev_device *vdev = RTE_DEV_TO_VDEV(dev); 165 const struct rte_vdev_driver *driver; 166 167 if (!vdev) { 168 rte_errno = EINVAL; 169 return -1; 170 } 171 172 if (!vdev->device.driver) { 173 VDEV_LOG(DEBUG, "no driver attach to device %s", dev->name); 174 return 1; 175 } 176 177 driver = container_of(vdev->device.driver, const struct rte_vdev_driver, 178 driver); 179 180 if (driver->dma_unmap) 181 return driver->dma_unmap(vdev, addr, iova, len); 182 183 return 0; 184 } 185 186 static int 187 vdev_probe_all_drivers(struct rte_vdev_device *dev) 188 { 189 const char *name; 190 struct rte_vdev_driver *driver; 191 enum rte_iova_mode iova_mode; 192 int ret; 193 194 if (rte_dev_is_probed(&dev->device)) 195 return -EEXIST; 196 197 name = rte_vdev_device_name(dev); 198 VDEV_LOG(DEBUG, "Search driver to probe device %s", name); 199 200 if (vdev_parse(name, &driver)) 201 return -1; 202 203 iova_mode = rte_eal_iova_mode(); 204 if ((driver->drv_flags & RTE_VDEV_DRV_NEED_IOVA_AS_VA) && (iova_mode == RTE_IOVA_PA)) { 205 VDEV_LOG(ERR, "%s requires VA IOVA mode but current mode is PA, not initializing", 206 name); 207 return -1; 208 } 209 210 ret = driver->probe(dev); 211 if (ret == 0) 212 dev->device.driver = &driver->driver; 213 return ret; 214 } 215 216 /* The caller shall be responsible for thread-safe */ 217 static struct rte_vdev_device * 218 find_vdev(const char *name) 219 { 220 struct rte_vdev_device *dev; 221 222 if (!name) 223 return NULL; 224 225 TAILQ_FOREACH(dev, &vdev_device_list, next) { 226 const char *devname = rte_vdev_device_name(dev); 227 228 if (!strcmp(devname, name)) 229 return dev; 230 } 231 232 return NULL; 233 } 234 235 static struct rte_devargs * 236 alloc_devargs(const char *name, const char *args) 237 { 238 struct rte_devargs *devargs; 239 int ret; 240 241 devargs = calloc(1, sizeof(*devargs)); 242 if (!devargs) 243 return NULL; 244 245 devargs->bus = &rte_vdev_bus; 246 if (args) 247 devargs->data = strdup(args); 248 else 249 devargs->data = strdup(""); 250 if (devargs->data == NULL) { 251 free(devargs); 252 return NULL; 253 } 254 devargs->args = devargs->data; 255 256 ret = strlcpy(devargs->name, name, sizeof(devargs->name)); 257 if (ret < 0 || ret >= (int)sizeof(devargs->name)) { 258 rte_devargs_reset(devargs); 259 free(devargs); 260 return NULL; 261 } 262 263 return devargs; 264 } 265 266 static struct rte_devargs * 267 vdev_devargs_lookup(const char *name) 268 { 269 struct rte_devargs *devargs; 270 char dev_name[32]; 271 272 RTE_EAL_DEVARGS_FOREACH("vdev", devargs) { 273 devargs->bus->parse(devargs->name, &dev_name); 274 if (strcmp(dev_name, name) == 0) { 275 VDEV_LOG(INFO, "devargs matched %s", dev_name); 276 return devargs; 277 } 278 } 279 return NULL; 280 } 281 282 static int 283 insert_vdev(const char *name, const char *args, 284 struct rte_vdev_device **p_dev, 285 bool init) 286 { 287 struct rte_vdev_device *dev; 288 struct rte_devargs *devargs; 289 int ret; 290 291 if (name == NULL) 292 return -EINVAL; 293 294 if (rte_eal_process_type() == RTE_PROC_PRIMARY) 295 devargs = alloc_devargs(name, args); 296 else 297 devargs = vdev_devargs_lookup(name); 298 299 if (!devargs) 300 return -ENOMEM; 301 302 dev = calloc(1, sizeof(*dev)); 303 if (!dev) { 304 ret = -ENOMEM; 305 goto fail; 306 } 307 308 dev->device.bus = &rte_vdev_bus; 309 dev->device.numa_node = SOCKET_ID_ANY; 310 dev->device.name = devargs->name; 311 312 if (find_vdev(name)) { 313 /* 314 * A vdev is expected to have only one port. 315 * So there is no reason to try probing again, 316 * even with new arguments. 317 */ 318 ret = -EEXIST; 319 goto fail; 320 } 321 322 if (init) 323 rte_devargs_insert(&devargs); 324 dev->device.devargs = devargs; 325 TAILQ_INSERT_TAIL(&vdev_device_list, dev, next); 326 327 if (p_dev) 328 *p_dev = dev; 329 330 return 0; 331 fail: 332 rte_devargs_reset(devargs); 333 free(devargs); 334 free(dev); 335 return ret; 336 } 337 338 int 339 rte_vdev_init(const char *name, const char *args) 340 { 341 struct rte_vdev_device *dev; 342 int ret; 343 344 rte_spinlock_recursive_lock(&vdev_device_list_lock); 345 ret = insert_vdev(name, args, &dev, true); 346 if (ret == 0) { 347 ret = vdev_probe_all_drivers(dev); 348 if (ret) { 349 if (ret > 0) 350 VDEV_LOG(ERR, "no driver found for %s", name); 351 /* If fails, remove it from vdev list */ 352 TAILQ_REMOVE(&vdev_device_list, dev, next); 353 rte_devargs_remove(dev->device.devargs); 354 free(dev); 355 } 356 } 357 rte_spinlock_recursive_unlock(&vdev_device_list_lock); 358 return ret; 359 } 360 361 static int 362 vdev_remove_driver(struct rte_vdev_device *dev) 363 { 364 const char *name = rte_vdev_device_name(dev); 365 const struct rte_vdev_driver *driver; 366 367 if (!dev->device.driver) { 368 VDEV_LOG(DEBUG, "no driver attach to device %s", name); 369 return 1; 370 } 371 372 driver = container_of(dev->device.driver, const struct rte_vdev_driver, 373 driver); 374 return driver->remove(dev); 375 } 376 377 int 378 rte_vdev_uninit(const char *name) 379 { 380 struct rte_vdev_device *dev; 381 int ret; 382 383 if (name == NULL) 384 return -EINVAL; 385 386 rte_spinlock_recursive_lock(&vdev_device_list_lock); 387 388 dev = find_vdev(name); 389 if (!dev) { 390 ret = -ENOENT; 391 goto unlock; 392 } 393 394 ret = vdev_remove_driver(dev); 395 if (ret) 396 goto unlock; 397 398 TAILQ_REMOVE(&vdev_device_list, dev, next); 399 rte_devargs_remove(dev->device.devargs); 400 free(dev); 401 402 unlock: 403 rte_spinlock_recursive_unlock(&vdev_device_list_lock); 404 return ret; 405 } 406 407 struct vdev_param { 408 #define VDEV_SCAN_REQ 1 409 #define VDEV_SCAN_ONE 2 410 #define VDEV_SCAN_REP 3 411 int type; 412 int num; 413 char name[RTE_DEV_NAME_MAX_LEN]; 414 }; 415 416 static int vdev_plug(struct rte_device *dev); 417 418 /** 419 * This function works as the action for both primary and secondary process 420 * for static vdev discovery when a secondary process is booting. 421 * 422 * step 1, secondary process sends a sync request to ask for vdev in primary; 423 * step 2, primary process receives the request, and send vdevs one by one; 424 * step 3, primary process sends back reply, which indicates how many vdevs 425 * are sent. 426 */ 427 static int 428 vdev_action(const struct rte_mp_msg *mp_msg, const void *peer) 429 { 430 struct rte_vdev_device *dev; 431 struct rte_mp_msg mp_resp; 432 struct vdev_param *ou = (struct vdev_param *)&mp_resp.param; 433 const struct vdev_param *in = (const struct vdev_param *)mp_msg->param; 434 const char *devname; 435 int num; 436 int ret; 437 438 strlcpy(mp_resp.name, VDEV_MP_KEY, sizeof(mp_resp.name)); 439 mp_resp.len_param = sizeof(*ou); 440 mp_resp.num_fds = 0; 441 442 switch (in->type) { 443 case VDEV_SCAN_REQ: 444 ou->type = VDEV_SCAN_ONE; 445 ou->num = 1; 446 num = 0; 447 448 rte_spinlock_recursive_lock(&vdev_device_list_lock); 449 TAILQ_FOREACH(dev, &vdev_device_list, next) { 450 devname = rte_vdev_device_name(dev); 451 if (strlen(devname) == 0) { 452 VDEV_LOG(INFO, "vdev with no name is not sent"); 453 continue; 454 } 455 VDEV_LOG(INFO, "send vdev, %s", devname); 456 strlcpy(ou->name, devname, RTE_DEV_NAME_MAX_LEN); 457 if (rte_mp_sendmsg(&mp_resp) < 0) 458 VDEV_LOG(ERR, "send vdev, %s, failed, %s", 459 devname, strerror(rte_errno)); 460 num++; 461 } 462 rte_spinlock_recursive_unlock(&vdev_device_list_lock); 463 464 ou->type = VDEV_SCAN_REP; 465 ou->num = num; 466 if (rte_mp_reply(&mp_resp, peer) < 0) 467 VDEV_LOG(ERR, "Failed to reply a scan request"); 468 break; 469 case VDEV_SCAN_ONE: 470 VDEV_LOG(INFO, "receive vdev, %s", in->name); 471 ret = insert_vdev(in->name, NULL, NULL, false); 472 if (ret == -EEXIST) 473 VDEV_LOG(DEBUG, "device already exist, %s", in->name); 474 else if (ret < 0) 475 VDEV_LOG(ERR, "failed to add vdev, %s", in->name); 476 break; 477 default: 478 VDEV_LOG(ERR, "vdev cannot recognize this message"); 479 } 480 481 return 0; 482 } 483 484 static int 485 vdev_scan(void) 486 { 487 struct rte_vdev_device *dev; 488 struct rte_devargs *devargs; 489 struct vdev_custom_scan *custom_scan; 490 491 if (rte_mp_action_register(VDEV_MP_KEY, vdev_action) < 0 && 492 rte_errno != EEXIST) { 493 /* for primary, unsupported IPC is not an error */ 494 if (rte_eal_process_type() == RTE_PROC_PRIMARY && 495 rte_errno == ENOTSUP) 496 goto scan; 497 VDEV_LOG(ERR, "Failed to add vdev mp action"); 498 return -1; 499 } 500 501 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 502 struct rte_mp_msg mp_req, *mp_rep; 503 struct rte_mp_reply mp_reply; 504 struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; 505 struct vdev_param *req = (struct vdev_param *)mp_req.param; 506 struct vdev_param *resp; 507 508 strlcpy(mp_req.name, VDEV_MP_KEY, sizeof(mp_req.name)); 509 mp_req.len_param = sizeof(*req); 510 mp_req.num_fds = 0; 511 req->type = VDEV_SCAN_REQ; 512 if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 && 513 mp_reply.nb_received == 1) { 514 mp_rep = &mp_reply.msgs[0]; 515 resp = (struct vdev_param *)mp_rep->param; 516 VDEV_LOG(INFO, "Received %d vdevs", resp->num); 517 free(mp_reply.msgs); 518 } else 519 VDEV_LOG(ERR, "Failed to request vdev from primary"); 520 521 /* Fall through to allow private vdevs in secondary process */ 522 } 523 524 scan: 525 /* call custom scan callbacks if any */ 526 rte_spinlock_lock(&vdev_custom_scan_lock); 527 TAILQ_FOREACH(custom_scan, &vdev_custom_scans, next) { 528 if (custom_scan->callback != NULL) 529 /* 530 * the callback should update devargs list 531 * by calling rte_devargs_insert() with 532 * devargs.bus = rte_bus_find_by_name("vdev"); 533 * devargs.type = RTE_DEVTYPE_VIRTUAL; 534 * devargs.policy = RTE_DEV_ALLOWED; 535 */ 536 custom_scan->callback(custom_scan->user_arg); 537 } 538 rte_spinlock_unlock(&vdev_custom_scan_lock); 539 540 /* for virtual devices we scan the devargs_list populated via cmdline */ 541 RTE_EAL_DEVARGS_FOREACH("vdev", devargs) { 542 543 dev = calloc(1, sizeof(*dev)); 544 if (!dev) 545 return -1; 546 547 rte_spinlock_recursive_lock(&vdev_device_list_lock); 548 549 if (find_vdev(devargs->name)) { 550 rte_spinlock_recursive_unlock(&vdev_device_list_lock); 551 free(dev); 552 continue; 553 } 554 555 dev->device.bus = &rte_vdev_bus; 556 dev->device.devargs = devargs; 557 dev->device.numa_node = SOCKET_ID_ANY; 558 dev->device.name = devargs->name; 559 560 TAILQ_INSERT_TAIL(&vdev_device_list, dev, next); 561 562 rte_spinlock_recursive_unlock(&vdev_device_list_lock); 563 } 564 565 return 0; 566 } 567 568 static int 569 vdev_probe(void) 570 { 571 struct rte_vdev_device *dev; 572 int r, ret = 0; 573 574 /* call the init function for each virtual device */ 575 TAILQ_FOREACH(dev, &vdev_device_list, next) { 576 /* we don't use the vdev lock here, as it's only used in DPDK 577 * initialization; and we don't want to hold such a lock when 578 * we call each driver probe. 579 */ 580 581 r = vdev_probe_all_drivers(dev); 582 if (r != 0) { 583 if (r == -EEXIST) 584 continue; 585 VDEV_LOG(ERR, "failed to initialize %s device", 586 rte_vdev_device_name(dev)); 587 ret = -1; 588 } 589 } 590 591 return ret; 592 } 593 594 static int 595 vdev_cleanup(void) 596 { 597 struct rte_vdev_device *dev, *tmp_dev; 598 int error = 0; 599 600 RTE_TAILQ_FOREACH_SAFE(dev, &vdev_device_list, next, tmp_dev) { 601 const struct rte_vdev_driver *drv; 602 int ret = 0; 603 604 if (dev->device.driver == NULL) 605 goto free; 606 607 drv = container_of(dev->device.driver, const struct rte_vdev_driver, driver); 608 609 if (drv->remove == NULL) 610 goto free; 611 612 ret = drv->remove(dev); 613 if (ret < 0) 614 error = -1; 615 616 dev->device.driver = NULL; 617 free: 618 free(dev); 619 } 620 621 return error; 622 } 623 624 struct rte_device * 625 rte_vdev_find_device(const struct rte_device *start, rte_dev_cmp_t cmp, 626 const void *data) 627 { 628 const struct rte_vdev_device *vstart; 629 struct rte_vdev_device *dev; 630 631 rte_spinlock_recursive_lock(&vdev_device_list_lock); 632 if (start != NULL) { 633 vstart = RTE_DEV_TO_VDEV_CONST(start); 634 dev = TAILQ_NEXT(vstart, next); 635 } else { 636 dev = TAILQ_FIRST(&vdev_device_list); 637 } 638 while (dev != NULL) { 639 if (cmp(&dev->device, data) == 0) 640 break; 641 dev = TAILQ_NEXT(dev, next); 642 } 643 rte_spinlock_recursive_unlock(&vdev_device_list_lock); 644 645 return dev ? &dev->device : NULL; 646 } 647 648 static int 649 vdev_plug(struct rte_device *dev) 650 { 651 return vdev_probe_all_drivers(RTE_DEV_TO_VDEV(dev)); 652 } 653 654 static int 655 vdev_unplug(struct rte_device *dev) 656 { 657 return rte_vdev_uninit(dev->name); 658 } 659 660 static enum rte_iova_mode 661 vdev_get_iommu_class(void) 662 { 663 const char *name; 664 struct rte_vdev_device *dev; 665 struct rte_vdev_driver *driver; 666 667 TAILQ_FOREACH(dev, &vdev_device_list, next) { 668 name = rte_vdev_device_name(dev); 669 if (vdev_parse(name, &driver)) 670 continue; 671 672 if (driver->drv_flags & RTE_VDEV_DRV_NEED_IOVA_AS_VA) 673 return RTE_IOVA_VA; 674 } 675 676 return RTE_IOVA_DC; 677 } 678 679 static struct rte_bus rte_vdev_bus = { 680 .scan = vdev_scan, 681 .probe = vdev_probe, 682 .cleanup = vdev_cleanup, 683 .find_device = rte_vdev_find_device, 684 .plug = vdev_plug, 685 .unplug = vdev_unplug, 686 .parse = vdev_parse, 687 .dma_map = vdev_dma_map, 688 .dma_unmap = vdev_dma_unmap, 689 .get_iommu_class = vdev_get_iommu_class, 690 .dev_iterate = rte_vdev_dev_iterate, 691 }; 692 693 RTE_REGISTER_BUS(vdev, rte_vdev_bus); 694 RTE_LOG_REGISTER_DEFAULT(vdev_logtype_bus, NOTICE); 695