1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "env_internal.h" 7 8 #include <rte_alarm.h> 9 #include <rte_devargs.h> 10 #include "spdk/env.h" 11 #include "spdk/log.h" 12 #include "spdk/string.h" 13 14 #define SYSFS_PCI_DRIVERS "/sys/bus/pci/drivers" 15 16 /* Compatibility for versions < 20.11 */ 17 #if RTE_VERSION < RTE_VERSION_NUM(20, 11, 0, 0) 18 #define RTE_DEV_ALLOWED RTE_DEV_WHITELISTED 19 #define RTE_DEV_BLOCKED RTE_DEV_BLACKLISTED 20 #define RTE_BUS_SCAN_ALLOWLIST RTE_BUS_SCAN_WHITELIST 21 #endif 22 23 #define PCI_CFG_SIZE 256 24 #define PCI_EXT_CAP_ID_SN 0x03 25 26 /* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time 27 * might cause the internal IPC to misbehave. Just retry in such case. 28 */ 29 #define DPDK_HOTPLUG_RETRY_COUNT 4 30 31 /* DPDK alarm/interrupt thread */ 32 static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER; 33 static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices); 34 /* devices hotplugged on a dpdk thread */ 35 static TAILQ_HEAD(, spdk_pci_device) g_pci_hotplugged_devices = 36 TAILQ_HEAD_INITIALIZER(g_pci_hotplugged_devices); 37 static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers); 38 static TAILQ_HEAD(, spdk_pci_device_provider) g_pci_device_providers = 39 TAILQ_HEAD_INITIALIZER(g_pci_device_providers); 40 41 struct env_devargs { 42 struct rte_bus *bus; 43 char name[128]; 44 uint64_t allowed_at; 45 TAILQ_ENTRY(env_devargs) link; 46 }; 47 static TAILQ_HEAD(, env_devargs) g_env_devargs = TAILQ_HEAD_INITIALIZER(g_env_devargs); 48 49 static struct env_devargs * 50 find_env_devargs(struct rte_bus *bus, const char *name) 51 { 52 struct env_devargs *da; 53 54 TAILQ_FOREACH(da, &g_env_devargs, link) { 55 if (bus == da->bus && !strcmp(name, da->name)) { 56 return da; 57 } 58 } 59 60 return NULL; 61 } 62 63 static int 64 map_bar_rte(struct spdk_pci_device *device, uint32_t bar, 65 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 66 { 67 struct rte_pci_device *dev = device->dev_handle; 68 69 *mapped_addr = dev->mem_resource[bar].addr; 70 *phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr; 71 *size = (uint64_t)dev->mem_resource[bar].len; 72 73 return 0; 74 } 75 76 static int 77 unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr) 78 { 79 return 0; 80 } 81 82 static int 83 cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 84 { 85 int rc; 86 87 rc = rte_pci_read_config(dev->dev_handle, value, len, offset); 88 89 return (rc > 0 && (uint32_t) rc == len) ? 0 : -1; 90 } 91 92 static int 93 cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 94 { 95 int rc; 96 97 rc = rte_pci_write_config(dev->dev_handle, value, len, offset); 98 99 #ifdef __FreeBSD__ 100 /* DPDK returns 0 on success and -1 on failure */ 101 return rc; 102 #endif 103 return (rc > 0 && (uint32_t) rc == len) ? 0 : -1; 104 } 105 106 static void 107 remove_rte_dev(struct rte_pci_device *rte_dev) 108 { 109 char bdf[32]; 110 int i = 0, rc; 111 112 snprintf(bdf, sizeof(bdf), "%s", rte_dev->device.name); 113 do { 114 rc = rte_eal_hotplug_remove("pci", bdf); 115 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 116 } 117 118 static void 119 detach_rte_cb(void *_dev) 120 { 121 remove_rte_dev(_dev); 122 } 123 124 /* if it's a physical device we need to deal with DPDK on 125 * a different process and we can't just unset one flag 126 * here. We also want to stop using any device resources 127 * so that the device isn't "in use" by the userspace driver 128 * once we detach it. This would allow attaching the device 129 * to a different process, or to a kernel driver like nvme. 130 */ 131 static void 132 detach_rte(struct spdk_pci_device *dev) 133 { 134 struct rte_pci_device *rte_dev = dev->dev_handle; 135 int i; 136 bool removed; 137 138 if (!spdk_process_is_primary()) { 139 remove_rte_dev(rte_dev); 140 return; 141 } 142 143 pthread_mutex_lock(&g_pci_mutex); 144 dev->internal.attached = false; 145 /* prevent the hotremove notification from removing this device */ 146 dev->internal.pending_removal = true; 147 pthread_mutex_unlock(&g_pci_mutex); 148 149 rte_eal_alarm_set(1, detach_rte_cb, rte_dev); 150 151 /* wait up to 2s for the cb to execute */ 152 for (i = 2000; i > 0; i--) { 153 154 spdk_delay_us(1000); 155 pthread_mutex_lock(&g_pci_mutex); 156 removed = dev->internal.removed; 157 pthread_mutex_unlock(&g_pci_mutex); 158 159 if (removed) { 160 break; 161 } 162 } 163 164 /* besides checking the removed flag, we also need to wait 165 * for the dpdk detach function to unwind, as it's doing some 166 * operations even after calling our detach callback. Simply 167 * cancel the alarm - if it started executing already, this 168 * call will block and wait for it to finish. 169 */ 170 rte_eal_alarm_cancel(detach_rte_cb, rte_dev); 171 172 /* the device could have been finally removed, so just check 173 * it again. 174 */ 175 pthread_mutex_lock(&g_pci_mutex); 176 removed = dev->internal.removed; 177 pthread_mutex_unlock(&g_pci_mutex); 178 if (!removed) { 179 SPDK_ERRLOG("Timeout waiting for DPDK to remove PCI device %s.\n", 180 rte_dev->name); 181 /* If we reach this state, then the device couldn't be removed and most likely 182 a subsequent hot add of a device in the same BDF will fail */ 183 } 184 } 185 186 void 187 spdk_pci_driver_register(const char *name, struct spdk_pci_id *id_table, uint32_t flags) 188 { 189 struct spdk_pci_driver *driver; 190 191 driver = calloc(1, sizeof(*driver)); 192 if (!driver) { 193 /* we can't do any better than bailing atm */ 194 return; 195 } 196 197 driver->name = name; 198 driver->id_table = id_table; 199 driver->drv_flags = flags; 200 TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq); 201 } 202 203 struct spdk_pci_driver * 204 spdk_pci_nvme_get_driver(void) 205 { 206 return spdk_pci_get_driver("nvme"); 207 } 208 209 struct spdk_pci_driver * 210 spdk_pci_get_driver(const char *name) 211 { 212 struct spdk_pci_driver *driver; 213 214 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 215 if (strcmp(driver->name, name) == 0) { 216 return driver; 217 } 218 } 219 220 return NULL; 221 } 222 223 static void 224 pci_device_rte_dev_event(const char *device_name, 225 enum rte_dev_event_type event, 226 void *cb_arg) 227 { 228 struct spdk_pci_device *dev; 229 bool can_detach = false; 230 231 switch (event) { 232 default: 233 case RTE_DEV_EVENT_ADD: 234 /* Nothing to do here yet. */ 235 break; 236 case RTE_DEV_EVENT_REMOVE: 237 pthread_mutex_lock(&g_pci_mutex); 238 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 239 struct rte_pci_device *rte_dev = dev->dev_handle; 240 241 if (strcmp(rte_dev->name, device_name) == 0 && 242 !dev->internal.pending_removal) { 243 can_detach = !dev->internal.attached; 244 /* prevent any further attaches */ 245 dev->internal.pending_removal = true; 246 break; 247 } 248 } 249 pthread_mutex_unlock(&g_pci_mutex); 250 251 if (dev != NULL && can_detach) { 252 /* if device is not attached we can remove it right away. 253 * Otherwise it will be removed at detach. 254 * 255 * Because the user's callback is invoked in eal interrupt 256 * callback, the interrupt callback need to be finished before 257 * it can be unregistered when detaching device. So finish 258 * callback soon and use a deferred removal to detach device 259 * is need. It is a workaround, once the device detaching be 260 * moved into the eal in the future, the deferred removal could 261 * be deleted. 262 */ 263 rte_eal_alarm_set(1, detach_rte_cb, dev->dev_handle); 264 } 265 break; 266 } 267 } 268 269 static void 270 cleanup_pci_devices(void) 271 { 272 struct spdk_pci_device *dev, *tmp; 273 274 pthread_mutex_lock(&g_pci_mutex); 275 /* cleanup removed devices */ 276 TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) { 277 if (!dev->internal.removed) { 278 continue; 279 } 280 281 vtophys_pci_device_removed(dev->dev_handle); 282 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 283 free(dev); 284 } 285 286 /* add newly-attached devices */ 287 TAILQ_FOREACH_SAFE(dev, &g_pci_hotplugged_devices, internal.tailq, tmp) { 288 TAILQ_REMOVE(&g_pci_hotplugged_devices, dev, internal.tailq); 289 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 290 vtophys_pci_device_added(dev->dev_handle); 291 } 292 pthread_mutex_unlock(&g_pci_mutex); 293 } 294 295 static int scan_pci_bus(bool delay_init); 296 297 /* translate spdk_pci_driver to an rte_pci_driver and register it to dpdk */ 298 static int 299 register_rte_driver(struct spdk_pci_driver *driver) 300 { 301 unsigned pci_id_count = 0; 302 struct rte_pci_id *rte_id_table; 303 char *rte_name; 304 size_t rte_name_len; 305 uint32_t rte_flags; 306 307 assert(driver->id_table); 308 while (driver->id_table[pci_id_count].vendor_id) { 309 pci_id_count++; 310 } 311 assert(pci_id_count > 0); 312 313 rte_id_table = calloc(pci_id_count + 1, sizeof(*rte_id_table)); 314 if (!rte_id_table) { 315 return -ENOMEM; 316 } 317 318 while (pci_id_count > 0) { 319 struct rte_pci_id *rte_id = &rte_id_table[pci_id_count - 1]; 320 const struct spdk_pci_id *spdk_id = &driver->id_table[pci_id_count - 1]; 321 322 rte_id->class_id = spdk_id->class_id; 323 rte_id->vendor_id = spdk_id->vendor_id; 324 rte_id->device_id = spdk_id->device_id; 325 rte_id->subsystem_vendor_id = spdk_id->subvendor_id; 326 rte_id->subsystem_device_id = spdk_id->subdevice_id; 327 pci_id_count--; 328 } 329 330 assert(driver->name); 331 rte_name_len = strlen(driver->name) + strlen("spdk_") + 1; 332 rte_name = calloc(rte_name_len, 1); 333 if (!rte_name) { 334 free(rte_id_table); 335 return -ENOMEM; 336 } 337 338 snprintf(rte_name, rte_name_len, "spdk_%s", driver->name); 339 driver->driver.driver.name = rte_name; 340 driver->driver.id_table = rte_id_table; 341 342 rte_flags = 0; 343 if (driver->drv_flags & SPDK_PCI_DRIVER_NEED_MAPPING) { 344 rte_flags |= RTE_PCI_DRV_NEED_MAPPING; 345 } 346 if (driver->drv_flags & SPDK_PCI_DRIVER_WC_ACTIVATE) { 347 rte_flags |= RTE_PCI_DRV_WC_ACTIVATE; 348 } 349 driver->driver.drv_flags = rte_flags; 350 351 driver->driver.probe = pci_device_init; 352 driver->driver.remove = pci_device_fini; 353 354 rte_pci_register(&driver->driver); 355 return 0; 356 } 357 358 static inline void 359 _pci_env_init(void) 360 { 361 /* We assume devices were present on the bus for more than 2 seconds 362 * before initializing SPDK and there's no need to wait more. We scan 363 * the bus, but we don't block any devices. 364 */ 365 scan_pci_bus(false); 366 367 /* Register a single hotremove callback for all devices. */ 368 if (spdk_process_is_primary()) { 369 rte_dev_event_callback_register(NULL, pci_device_rte_dev_event, NULL); 370 } 371 } 372 373 void 374 pci_env_init(void) 375 { 376 struct spdk_pci_driver *driver; 377 378 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 379 register_rte_driver(driver); 380 } 381 382 _pci_env_init(); 383 } 384 385 void 386 pci_env_reinit(void) 387 { 388 /* There is no need to register pci drivers again, since they were 389 * already pre-registered in pci_env_init. 390 */ 391 392 _pci_env_init(); 393 } 394 395 void 396 pci_env_fini(void) 397 { 398 struct spdk_pci_device *dev; 399 char bdf[32]; 400 401 cleanup_pci_devices(); 402 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 403 if (dev->internal.attached) { 404 spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->addr); 405 SPDK_ERRLOG("Device %s is still attached at shutdown!\n", bdf); 406 } 407 } 408 409 if (spdk_process_is_primary()) { 410 rte_dev_event_callback_unregister(NULL, pci_device_rte_dev_event, NULL); 411 } 412 } 413 414 int 415 pci_device_init(struct rte_pci_driver *_drv, 416 struct rte_pci_device *_dev) 417 { 418 struct spdk_pci_driver *driver = (struct spdk_pci_driver *)_drv; 419 struct spdk_pci_device *dev; 420 int rc; 421 422 dev = calloc(1, sizeof(*dev)); 423 if (dev == NULL) { 424 return -1; 425 } 426 427 dev->dev_handle = _dev; 428 429 dev->addr.domain = _dev->addr.domain; 430 dev->addr.bus = _dev->addr.bus; 431 dev->addr.dev = _dev->addr.devid; 432 dev->addr.func = _dev->addr.function; 433 dev->id.class_id = _dev->id.class_id; 434 dev->id.vendor_id = _dev->id.vendor_id; 435 dev->id.device_id = _dev->id.device_id; 436 dev->id.subvendor_id = _dev->id.subsystem_vendor_id; 437 dev->id.subdevice_id = _dev->id.subsystem_device_id; 438 dev->socket_id = _dev->device.numa_node; 439 dev->type = "pci"; 440 441 dev->map_bar = map_bar_rte; 442 dev->unmap_bar = unmap_bar_rte; 443 dev->cfg_read = cfg_read_rte; 444 dev->cfg_write = cfg_write_rte; 445 446 dev->internal.driver = driver; 447 dev->internal.claim_fd = -1; 448 449 if (driver->cb_fn != NULL) { 450 rc = driver->cb_fn(driver->cb_arg, dev); 451 if (rc != 0) { 452 free(dev); 453 return rc; 454 } 455 dev->internal.attached = true; 456 } 457 458 pthread_mutex_lock(&g_pci_mutex); 459 TAILQ_INSERT_TAIL(&g_pci_hotplugged_devices, dev, internal.tailq); 460 pthread_mutex_unlock(&g_pci_mutex); 461 return 0; 462 } 463 464 static void 465 set_allowed_at(struct rte_devargs *rte_da, uint64_t tsc) 466 { 467 struct env_devargs *env_da; 468 469 env_da = find_env_devargs(rte_da->bus, rte_da->name); 470 if (env_da == NULL) { 471 env_da = calloc(1, sizeof(*env_da)); 472 if (env_da == NULL) { 473 SPDK_ERRLOG("could not set_allowed_at for device %s\n", rte_da->name); 474 return; 475 } 476 env_da->bus = rte_da->bus; 477 spdk_strcpy_pad(env_da->name, rte_da->name, sizeof(env_da->name), 0); 478 TAILQ_INSERT_TAIL(&g_env_devargs, env_da, link); 479 } 480 481 env_da->allowed_at = tsc; 482 } 483 484 static uint64_t 485 get_allowed_at(struct rte_devargs *rte_da) 486 { 487 struct env_devargs *env_da; 488 489 env_da = find_env_devargs(rte_da->bus, rte_da->name); 490 if (env_da) { 491 return env_da->allowed_at; 492 } else { 493 return 0; 494 } 495 } 496 497 int 498 pci_device_fini(struct rte_pci_device *_dev) 499 { 500 struct spdk_pci_device *dev; 501 502 pthread_mutex_lock(&g_pci_mutex); 503 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 504 if (dev->dev_handle == _dev) { 505 break; 506 } 507 } 508 509 if (dev == NULL || dev->internal.attached) { 510 /* The device might be still referenced somewhere in SPDK. */ 511 pthread_mutex_unlock(&g_pci_mutex); 512 return -EBUSY; 513 } 514 515 /* remove our allowed_at option */ 516 if (_dev->device.devargs) { 517 set_allowed_at(_dev->device.devargs, 0); 518 } 519 520 /* It is possible that removed flag was already set when there is a race 521 * between the remove notification for this process, and another process 522 * that is also detaching from this same device (for example, when using 523 * nvme driver in multi-process mode. So do not assert here. See 524 * #2456 for additional details. 525 */ 526 dev->internal.removed = true; 527 pthread_mutex_unlock(&g_pci_mutex); 528 return 0; 529 530 } 531 532 void 533 spdk_pci_device_detach(struct spdk_pci_device *dev) 534 { 535 struct spdk_pci_device_provider *provider; 536 537 assert(dev->internal.attached); 538 539 if (dev->internal.claim_fd >= 0) { 540 spdk_pci_device_unclaim(dev); 541 } 542 543 TAILQ_FOREACH(provider, &g_pci_device_providers, tailq) { 544 if (strcmp(dev->type, provider->name) == 0) { 545 break; 546 } 547 } 548 549 assert(provider != NULL); 550 dev->internal.attached = false; 551 provider->detach_cb(dev); 552 553 cleanup_pci_devices(); 554 } 555 556 static int 557 scan_pci_bus(bool delay_init) 558 { 559 struct spdk_pci_driver *driver; 560 struct rte_pci_device *rte_dev; 561 uint64_t now; 562 563 rte_bus_scan(); 564 now = spdk_get_ticks(); 565 566 driver = TAILQ_FIRST(&g_pci_drivers); 567 if (!driver) { 568 return 0; 569 } 570 571 TAILQ_FOREACH(rte_dev, &driver->driver.bus->device_list, next) { 572 struct rte_devargs *da; 573 574 da = rte_dev->device.devargs; 575 if (!da) { 576 char devargs_str[128]; 577 578 /* the device was never blocked or allowed */ 579 da = calloc(1, sizeof(*da)); 580 if (!da) { 581 return -1; 582 } 583 584 snprintf(devargs_str, sizeof(devargs_str), "pci:%s", rte_dev->device.name); 585 if (rte_devargs_parse(da, devargs_str) != 0) { 586 free(da); 587 return -1; 588 } 589 590 rte_devargs_insert(&da); 591 rte_dev->device.devargs = da; 592 } 593 594 if (get_allowed_at(da)) { 595 uint64_t allowed_at = get_allowed_at(da); 596 597 /* this device was seen by spdk before... */ 598 if (da->policy == RTE_DEV_BLOCKED && allowed_at <= now) { 599 da->policy = RTE_DEV_ALLOWED; 600 } 601 } else if ((driver->driver.bus->bus.conf.scan_mode == RTE_BUS_SCAN_ALLOWLIST && 602 da->policy == RTE_DEV_ALLOWED) || da->policy != RTE_DEV_BLOCKED) { 603 /* override the policy only if not permanently blocked */ 604 605 if (delay_init) { 606 da->policy = RTE_DEV_BLOCKED; 607 set_allowed_at(da, now + 2 * spdk_get_ticks_hz()); 608 } else { 609 da->policy = RTE_DEV_ALLOWED; 610 set_allowed_at(da, now); 611 } 612 } 613 } 614 615 return 0; 616 } 617 618 static int 619 pci_attach_rte(const struct spdk_pci_addr *addr) 620 { 621 char bdf[32]; 622 int rc, i = 0; 623 624 spdk_pci_addr_fmt(bdf, sizeof(bdf), addr); 625 626 do { 627 rc = rte_eal_hotplug_add("pci", bdf, ""); 628 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 629 630 if (i > 1 && rc == -EEXIST) { 631 /* Even though the previous request timed out, the device 632 * was attached successfully. 633 */ 634 rc = 0; 635 } 636 637 return rc; 638 } 639 640 static struct spdk_pci_device_provider g_pci_rte_provider = { 641 .name = "pci", 642 .attach_cb = pci_attach_rte, 643 .detach_cb = detach_rte, 644 }; 645 646 SPDK_PCI_REGISTER_DEVICE_PROVIDER(pci, &g_pci_rte_provider); 647 648 int 649 spdk_pci_device_attach(struct spdk_pci_driver *driver, 650 spdk_pci_enum_cb enum_cb, 651 void *enum_ctx, struct spdk_pci_addr *pci_address) 652 { 653 struct spdk_pci_device *dev; 654 struct spdk_pci_device_provider *provider; 655 struct rte_pci_device *rte_dev; 656 struct rte_devargs *da; 657 int rc; 658 659 cleanup_pci_devices(); 660 661 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 662 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 663 break; 664 } 665 } 666 667 if (dev != NULL && dev->internal.driver == driver) { 668 pthread_mutex_lock(&g_pci_mutex); 669 if (dev->internal.attached || dev->internal.pending_removal) { 670 pthread_mutex_unlock(&g_pci_mutex); 671 return -1; 672 } 673 674 rc = enum_cb(enum_ctx, dev); 675 if (rc == 0) { 676 dev->internal.attached = true; 677 } 678 pthread_mutex_unlock(&g_pci_mutex); 679 return rc; 680 } 681 682 driver->cb_fn = enum_cb; 683 driver->cb_arg = enum_ctx; 684 685 rc = -ENODEV; 686 TAILQ_FOREACH(provider, &g_pci_device_providers, tailq) { 687 rc = provider->attach_cb(pci_address); 688 if (rc == 0) { 689 break; 690 } 691 } 692 693 driver->cb_arg = NULL; 694 driver->cb_fn = NULL; 695 696 cleanup_pci_devices(); 697 698 if (rc != 0) { 699 return -1; 700 } 701 702 /* explicit attach ignores the allowlist, so if we blocked this 703 * device before let's enable it now - just for clarity. 704 */ 705 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 706 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 707 break; 708 } 709 } 710 assert(dev != NULL); 711 712 rte_dev = dev->dev_handle; 713 if (rte_dev != NULL) { 714 da = rte_dev->device.devargs; 715 if (da && get_allowed_at(da)) { 716 set_allowed_at(da, spdk_get_ticks()); 717 da->policy = RTE_DEV_ALLOWED; 718 } 719 } 720 721 return 0; 722 } 723 724 /* Note: You can call spdk_pci_enumerate from more than one thread 725 * simultaneously safely, but you cannot call spdk_pci_enumerate 726 * and rte_eal_pci_probe simultaneously. 727 */ 728 int 729 spdk_pci_enumerate(struct spdk_pci_driver *driver, 730 spdk_pci_enum_cb enum_cb, 731 void *enum_ctx) 732 { 733 struct spdk_pci_device *dev; 734 int rc; 735 736 cleanup_pci_devices(); 737 738 pthread_mutex_lock(&g_pci_mutex); 739 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 740 if (dev->internal.attached || 741 dev->internal.driver != driver || 742 dev->internal.pending_removal) { 743 continue; 744 } 745 746 rc = enum_cb(enum_ctx, dev); 747 if (rc == 0) { 748 dev->internal.attached = true; 749 } else if (rc < 0) { 750 pthread_mutex_unlock(&g_pci_mutex); 751 return -1; 752 } 753 } 754 pthread_mutex_unlock(&g_pci_mutex); 755 756 if (scan_pci_bus(true) != 0) { 757 return -1; 758 } 759 760 driver->cb_fn = enum_cb; 761 driver->cb_arg = enum_ctx; 762 763 if (rte_bus_probe() != 0) { 764 driver->cb_arg = NULL; 765 driver->cb_fn = NULL; 766 return -1; 767 } 768 769 driver->cb_arg = NULL; 770 driver->cb_fn = NULL; 771 772 cleanup_pci_devices(); 773 return 0; 774 } 775 776 void 777 spdk_pci_for_each_device(void *ctx, void (*fn)(void *ctx, struct spdk_pci_device *dev)) 778 { 779 struct spdk_pci_device *dev, *tmp; 780 781 pthread_mutex_lock(&g_pci_mutex); 782 TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) { 783 fn(ctx, dev); 784 } 785 pthread_mutex_unlock(&g_pci_mutex); 786 } 787 788 int 789 spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar, 790 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 791 { 792 return dev->map_bar(dev, bar, mapped_addr, phys_addr, size); 793 } 794 795 int 796 spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr) 797 { 798 return dev->unmap_bar(dev, bar, addr); 799 } 800 801 int 802 spdk_pci_device_enable_interrupt(struct spdk_pci_device *dev) 803 { 804 struct rte_pci_device *rte_dev = dev->dev_handle; 805 #if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0) 806 return rte_intr_enable(&rte_dev->intr_handle); 807 #else 808 return rte_intr_enable(rte_dev->intr_handle); 809 #endif 810 } 811 812 int 813 spdk_pci_device_disable_interrupt(struct spdk_pci_device *dev) 814 { 815 struct rte_pci_device *rte_dev = dev->dev_handle; 816 #if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0) 817 return rte_intr_disable(&rte_dev->intr_handle); 818 #else 819 return rte_intr_disable(rte_dev->intr_handle); 820 #endif 821 } 822 823 int 824 spdk_pci_device_get_interrupt_efd(struct spdk_pci_device *dev) 825 { 826 struct rte_pci_device *rte_dev = dev->dev_handle; 827 #if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0) 828 return rte_dev->intr_handle.fd; 829 #else 830 return rte_intr_fd_get(rte_dev->intr_handle); 831 #endif 832 } 833 834 uint32_t 835 spdk_pci_device_get_domain(struct spdk_pci_device *dev) 836 { 837 return dev->addr.domain; 838 } 839 840 uint8_t 841 spdk_pci_device_get_bus(struct spdk_pci_device *dev) 842 { 843 return dev->addr.bus; 844 } 845 846 uint8_t 847 spdk_pci_device_get_dev(struct spdk_pci_device *dev) 848 { 849 return dev->addr.dev; 850 } 851 852 uint8_t 853 spdk_pci_device_get_func(struct spdk_pci_device *dev) 854 { 855 return dev->addr.func; 856 } 857 858 uint16_t 859 spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev) 860 { 861 return dev->id.vendor_id; 862 } 863 864 uint16_t 865 spdk_pci_device_get_device_id(struct spdk_pci_device *dev) 866 { 867 return dev->id.device_id; 868 } 869 870 uint16_t 871 spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev) 872 { 873 return dev->id.subvendor_id; 874 } 875 876 uint16_t 877 spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev) 878 { 879 return dev->id.subdevice_id; 880 } 881 882 struct spdk_pci_id 883 spdk_pci_device_get_id(struct spdk_pci_device *dev) 884 { 885 return dev->id; 886 } 887 888 int 889 spdk_pci_device_get_socket_id(struct spdk_pci_device *dev) 890 { 891 return dev->socket_id; 892 } 893 894 int 895 spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 896 { 897 return dev->cfg_read(dev, value, len, offset); 898 } 899 900 int 901 spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 902 { 903 return dev->cfg_write(dev, value, len, offset); 904 } 905 906 int 907 spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset) 908 { 909 return spdk_pci_device_cfg_read(dev, value, 1, offset); 910 } 911 912 int 913 spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset) 914 { 915 return spdk_pci_device_cfg_write(dev, &value, 1, offset); 916 } 917 918 int 919 spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset) 920 { 921 return spdk_pci_device_cfg_read(dev, value, 2, offset); 922 } 923 924 int 925 spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset) 926 { 927 return spdk_pci_device_cfg_write(dev, &value, 2, offset); 928 } 929 930 int 931 spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset) 932 { 933 return spdk_pci_device_cfg_read(dev, value, 4, offset); 934 } 935 936 int 937 spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset) 938 { 939 return spdk_pci_device_cfg_write(dev, &value, 4, offset); 940 } 941 942 int 943 spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len) 944 { 945 int err; 946 uint32_t pos, header = 0; 947 uint32_t i, buf[2]; 948 949 if (len < 17) { 950 return -1; 951 } 952 953 err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE); 954 if (err || !header) { 955 return -1; 956 } 957 958 pos = PCI_CFG_SIZE; 959 while (1) { 960 if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) { 961 if (pos) { 962 /* skip the header */ 963 pos += 4; 964 for (i = 0; i < 2; i++) { 965 err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i); 966 if (err) { 967 return -1; 968 } 969 } 970 snprintf(sn, len, "%08x%08x", buf[1], buf[0]); 971 return 0; 972 } 973 } 974 pos = (header >> 20) & 0xffc; 975 /* 0 if no other items exist */ 976 if (pos < PCI_CFG_SIZE) { 977 return -1; 978 } 979 err = spdk_pci_device_cfg_read32(dev, &header, pos); 980 if (err) { 981 return -1; 982 } 983 } 984 return -1; 985 } 986 987 struct spdk_pci_addr 988 spdk_pci_device_get_addr(struct spdk_pci_device *dev) 989 { 990 return dev->addr; 991 } 992 993 bool 994 spdk_pci_device_is_removed(struct spdk_pci_device *dev) 995 { 996 return dev->internal.pending_removal; 997 } 998 999 int 1000 spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2) 1001 { 1002 if (a1->domain > a2->domain) { 1003 return 1; 1004 } else if (a1->domain < a2->domain) { 1005 return -1; 1006 } else if (a1->bus > a2->bus) { 1007 return 1; 1008 } else if (a1->bus < a2->bus) { 1009 return -1; 1010 } else if (a1->dev > a2->dev) { 1011 return 1; 1012 } else if (a1->dev < a2->dev) { 1013 return -1; 1014 } else if (a1->func > a2->func) { 1015 return 1; 1016 } else if (a1->func < a2->func) { 1017 return -1; 1018 } 1019 1020 return 0; 1021 } 1022 1023 #ifdef __linux__ 1024 int 1025 spdk_pci_device_claim(struct spdk_pci_device *dev) 1026 { 1027 int dev_fd; 1028 char dev_name[64]; 1029 int pid; 1030 void *dev_map; 1031 struct flock pcidev_lock = { 1032 .l_type = F_WRLCK, 1033 .l_whence = SEEK_SET, 1034 .l_start = 0, 1035 .l_len = 0, 1036 }; 1037 1038 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 1039 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 1040 1041 dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 1042 if (dev_fd == -1) { 1043 SPDK_ERRLOG("could not open %s\n", dev_name); 1044 return -errno; 1045 } 1046 1047 if (ftruncate(dev_fd, sizeof(int)) != 0) { 1048 SPDK_ERRLOG("could not truncate %s\n", dev_name); 1049 close(dev_fd); 1050 return -errno; 1051 } 1052 1053 dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, 1054 MAP_SHARED, dev_fd, 0); 1055 if (dev_map == MAP_FAILED) { 1056 SPDK_ERRLOG("could not mmap dev %s (%d)\n", dev_name, errno); 1057 close(dev_fd); 1058 return -errno; 1059 } 1060 1061 if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) { 1062 pid = *(int *)dev_map; 1063 SPDK_ERRLOG("Cannot create lock on device %s, probably" 1064 " process %d has claimed it\n", dev_name, pid); 1065 munmap(dev_map, sizeof(int)); 1066 close(dev_fd); 1067 /* F_SETLK returns unspecified errnos, normalize them */ 1068 return -EACCES; 1069 } 1070 1071 *(int *)dev_map = (int)getpid(); 1072 munmap(dev_map, sizeof(int)); 1073 dev->internal.claim_fd = dev_fd; 1074 /* Keep dev_fd open to maintain the lock. */ 1075 return 0; 1076 } 1077 1078 void 1079 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 1080 { 1081 char dev_name[64]; 1082 1083 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 1084 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 1085 1086 close(dev->internal.claim_fd); 1087 dev->internal.claim_fd = -1; 1088 unlink(dev_name); 1089 } 1090 #else /* !__linux__ */ 1091 int 1092 spdk_pci_device_claim(struct spdk_pci_device *dev) 1093 { 1094 /* TODO */ 1095 return 0; 1096 } 1097 1098 void 1099 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 1100 { 1101 /* TODO */ 1102 } 1103 #endif /* __linux__ */ 1104 1105 int 1106 spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf) 1107 { 1108 unsigned domain, bus, dev, func; 1109 1110 if (addr == NULL || bdf == NULL) { 1111 return -EINVAL; 1112 } 1113 1114 if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) || 1115 (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) { 1116 /* Matched a full address - all variables are initialized */ 1117 } else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) { 1118 func = 0; 1119 } else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) || 1120 (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) { 1121 domain = 0; 1122 } else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) || 1123 (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) { 1124 domain = 0; 1125 func = 0; 1126 } else { 1127 return -EINVAL; 1128 } 1129 1130 if (bus > 0xFF || dev > 0x1F || func > 7) { 1131 return -EINVAL; 1132 } 1133 1134 addr->domain = domain; 1135 addr->bus = bus; 1136 addr->dev = dev; 1137 addr->func = func; 1138 1139 return 0; 1140 } 1141 1142 int 1143 spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr) 1144 { 1145 int rc; 1146 1147 rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x", 1148 addr->domain, addr->bus, 1149 addr->dev, addr->func); 1150 1151 if (rc > 0 && (size_t)rc < sz) { 1152 return 0; 1153 } 1154 1155 return -1; 1156 } 1157 1158 int 1159 spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev) 1160 { 1161 int rc; 1162 1163 assert(dev->map_bar != NULL); 1164 assert(dev->unmap_bar != NULL); 1165 assert(dev->cfg_read != NULL); 1166 assert(dev->cfg_write != NULL); 1167 dev->internal.driver = drv; 1168 1169 if (drv->cb_fn != NULL) { 1170 rc = drv->cb_fn(drv->cb_arg, dev); 1171 if (rc != 0) { 1172 return -ECANCELED; 1173 } 1174 1175 dev->internal.attached = true; 1176 } 1177 1178 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 1179 1180 return 0; 1181 } 1182 1183 void 1184 spdk_pci_unhook_device(struct spdk_pci_device *dev) 1185 { 1186 assert(!dev->internal.attached); 1187 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 1188 } 1189 1190 void 1191 spdk_pci_register_device_provider(struct spdk_pci_device_provider *provider) 1192 { 1193 TAILQ_INSERT_TAIL(&g_pci_device_providers, provider, tailq); 1194 } 1195 1196 const char * 1197 spdk_pci_device_get_type(const struct spdk_pci_device *dev) 1198 { 1199 return dev->type; 1200 } 1201 1202 int 1203 spdk_pci_device_allow(struct spdk_pci_addr *pci_addr) 1204 { 1205 struct rte_devargs *da; 1206 char devargs_str[128]; 1207 1208 da = calloc(1, sizeof(*da)); 1209 if (da == NULL) { 1210 SPDK_ERRLOG("could not allocate rte_devargs\n"); 1211 return -ENOMEM; 1212 } 1213 1214 snprintf(devargs_str, sizeof(devargs_str), "pci:%04x:%02x:%02x.%x", 1215 pci_addr->domain, pci_addr->bus, pci_addr->dev, pci_addr->func); 1216 if (rte_devargs_parse(da, devargs_str) != 0) { 1217 SPDK_ERRLOG("rte_devargs_parse() failed on '%s'\n", devargs_str); 1218 free(da); 1219 return -EINVAL; 1220 } 1221 da->policy = RTE_DEV_ALLOWED; 1222 /* Note: if a devargs already exists for this device address, it just gets 1223 * overridden. So we do not need to check if the devargs already exists. 1224 * DPDK will take care of memory management for the devargs structure after 1225 * it has been inserted, so there's nothing SPDK needs to track. 1226 */ 1227 if (rte_devargs_insert(&da) != 0) { 1228 SPDK_ERRLOG("rte_devargs_insert() failed on '%s'\n", devargs_str); 1229 free(da); 1230 return -EINVAL; 1231 } 1232 1233 return 0; 1234 } 1235