1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "env_internal.h" 7 8 #include <rte_alarm.h> 9 #include <rte_bus_pci.h> 10 #include <rte_devargs.h> 11 #include "spdk/env.h" 12 #include "spdk/log.h" 13 #include "spdk/string.h" 14 15 #define SYSFS_PCI_DRIVERS "/sys/bus/pci/drivers" 16 17 /* Compatibility for versions < 20.11 */ 18 #if RTE_VERSION < RTE_VERSION_NUM(20, 11, 0, 0) 19 #define RTE_DEV_ALLOWED RTE_DEV_WHITELISTED 20 #define RTE_DEV_BLOCKED RTE_DEV_BLACKLISTED 21 #define RTE_BUS_SCAN_ALLOWLIST RTE_BUS_SCAN_WHITELIST 22 #endif 23 24 #define PCI_CFG_SIZE 256 25 #define PCI_EXT_CAP_ID_SN 0x03 26 27 /* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time 28 * might cause the internal IPC to misbehave. Just retry in such case. 29 */ 30 #define DPDK_HOTPLUG_RETRY_COUNT 4 31 32 /* DPDK alarm/interrupt thread */ 33 static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER; 34 static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices); 35 /* devices hotplugged on a dpdk thread */ 36 static TAILQ_HEAD(, spdk_pci_device) g_pci_hotplugged_devices = 37 TAILQ_HEAD_INITIALIZER(g_pci_hotplugged_devices); 38 static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers); 39 static TAILQ_HEAD(, spdk_pci_device_provider) g_pci_device_providers = 40 TAILQ_HEAD_INITIALIZER(g_pci_device_providers); 41 42 struct spdk_pci_driver { 43 struct rte_pci_driver driver; 44 45 const char *name; 46 const struct spdk_pci_id *id_table; 47 uint32_t drv_flags; 48 49 spdk_pci_enum_cb cb_fn; 50 void *cb_arg; 51 TAILQ_ENTRY(spdk_pci_driver) tailq; 52 }; 53 54 int pci_device_init(struct rte_pci_driver *driver, struct rte_pci_device *device); 55 int pci_device_fini(struct rte_pci_device *device); 56 57 struct env_devargs { 58 struct rte_bus *bus; 59 char name[128]; 60 uint64_t allowed_at; 61 TAILQ_ENTRY(env_devargs) link; 62 }; 63 static TAILQ_HEAD(, env_devargs) g_env_devargs = TAILQ_HEAD_INITIALIZER(g_env_devargs); 64 65 static struct env_devargs * 66 find_env_devargs(struct rte_bus *bus, const char *name) 67 { 68 struct env_devargs *da; 69 70 TAILQ_FOREACH(da, &g_env_devargs, link) { 71 if (bus == da->bus && !strcmp(name, da->name)) { 72 return da; 73 } 74 } 75 76 return NULL; 77 } 78 79 static int 80 map_bar_rte(struct spdk_pci_device *device, uint32_t bar, 81 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 82 { 83 struct rte_pci_device *dev = device->dev_handle; 84 85 *mapped_addr = dev->mem_resource[bar].addr; 86 *phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr; 87 *size = (uint64_t)dev->mem_resource[bar].len; 88 89 return 0; 90 } 91 92 static int 93 unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr) 94 { 95 return 0; 96 } 97 98 static int 99 cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 100 { 101 int rc; 102 103 rc = rte_pci_read_config(dev->dev_handle, value, len, offset); 104 105 return (rc > 0 && (uint32_t) rc == len) ? 0 : -1; 106 } 107 108 static int 109 cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 110 { 111 int rc; 112 113 rc = rte_pci_write_config(dev->dev_handle, value, len, offset); 114 115 #ifdef __FreeBSD__ 116 /* DPDK returns 0 on success and -1 on failure */ 117 return rc; 118 #endif 119 return (rc > 0 && (uint32_t) rc == len) ? 0 : -1; 120 } 121 122 static void 123 remove_rte_dev(struct rte_pci_device *rte_dev) 124 { 125 char bdf[32]; 126 int i = 0, rc; 127 128 snprintf(bdf, sizeof(bdf), "%s", rte_dev->device.name); 129 do { 130 rc = rte_eal_hotplug_remove("pci", bdf); 131 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 132 } 133 134 static void 135 detach_rte_cb(void *_dev) 136 { 137 remove_rte_dev(_dev); 138 } 139 140 /* if it's a physical device we need to deal with DPDK on 141 * a different process and we can't just unset one flag 142 * here. We also want to stop using any device resources 143 * so that the device isn't "in use" by the userspace driver 144 * once we detach it. This would allow attaching the device 145 * to a different process, or to a kernel driver like nvme. 146 */ 147 static void 148 detach_rte(struct spdk_pci_device *dev) 149 { 150 struct rte_pci_device *rte_dev = dev->dev_handle; 151 int i; 152 bool removed; 153 154 if (!spdk_process_is_primary()) { 155 remove_rte_dev(rte_dev); 156 return; 157 } 158 159 pthread_mutex_lock(&g_pci_mutex); 160 dev->internal.attached = false; 161 /* prevent the hotremove notification from removing this device */ 162 dev->internal.pending_removal = true; 163 pthread_mutex_unlock(&g_pci_mutex); 164 165 rte_eal_alarm_set(1, detach_rte_cb, rte_dev); 166 167 /* wait up to 2s for the cb to execute */ 168 for (i = 2000; i > 0; i--) { 169 170 spdk_delay_us(1000); 171 pthread_mutex_lock(&g_pci_mutex); 172 removed = dev->internal.removed; 173 pthread_mutex_unlock(&g_pci_mutex); 174 175 if (removed) { 176 break; 177 } 178 } 179 180 /* besides checking the removed flag, we also need to wait 181 * for the dpdk detach function to unwind, as it's doing some 182 * operations even after calling our detach callback. Simply 183 * cancel the alarm - if it started executing already, this 184 * call will block and wait for it to finish. 185 */ 186 rte_eal_alarm_cancel(detach_rte_cb, rte_dev); 187 188 /* the device could have been finally removed, so just check 189 * it again. 190 */ 191 pthread_mutex_lock(&g_pci_mutex); 192 removed = dev->internal.removed; 193 pthread_mutex_unlock(&g_pci_mutex); 194 if (!removed) { 195 SPDK_ERRLOG("Timeout waiting for DPDK to remove PCI device %s.\n", 196 rte_dev->name); 197 /* If we reach this state, then the device couldn't be removed and most likely 198 a subsequent hot add of a device in the same BDF will fail */ 199 } 200 } 201 202 void 203 spdk_pci_driver_register(const char *name, struct spdk_pci_id *id_table, uint32_t flags) 204 { 205 struct spdk_pci_driver *driver; 206 207 driver = calloc(1, sizeof(*driver)); 208 if (!driver) { 209 /* we can't do any better than bailing atm */ 210 return; 211 } 212 213 driver->name = name; 214 driver->id_table = id_table; 215 driver->drv_flags = flags; 216 TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq); 217 } 218 219 struct spdk_pci_driver * 220 spdk_pci_nvme_get_driver(void) 221 { 222 return spdk_pci_get_driver("nvme"); 223 } 224 225 struct spdk_pci_driver * 226 spdk_pci_get_driver(const char *name) 227 { 228 struct spdk_pci_driver *driver; 229 230 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 231 if (strcmp(driver->name, name) == 0) { 232 return driver; 233 } 234 } 235 236 return NULL; 237 } 238 239 static void 240 pci_device_rte_dev_event(const char *device_name, 241 enum rte_dev_event_type event, 242 void *cb_arg) 243 { 244 struct spdk_pci_device *dev; 245 bool can_detach = false; 246 247 switch (event) { 248 default: 249 case RTE_DEV_EVENT_ADD: 250 /* Nothing to do here yet. */ 251 break; 252 case RTE_DEV_EVENT_REMOVE: 253 pthread_mutex_lock(&g_pci_mutex); 254 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 255 struct rte_pci_device *rte_dev = dev->dev_handle; 256 257 if (strcmp(rte_dev->name, device_name) == 0 && 258 !dev->internal.pending_removal) { 259 can_detach = !dev->internal.attached; 260 /* prevent any further attaches */ 261 dev->internal.pending_removal = true; 262 break; 263 } 264 } 265 pthread_mutex_unlock(&g_pci_mutex); 266 267 if (dev != NULL && can_detach) { 268 /* if device is not attached we can remove it right away. 269 * Otherwise it will be removed at detach. 270 * 271 * Because the user's callback is invoked in eal interrupt 272 * callback, the interrupt callback need to be finished before 273 * it can be unregistered when detaching device. So finish 274 * callback soon and use a deferred removal to detach device 275 * is need. It is a workaround, once the device detaching be 276 * moved into the eal in the future, the deferred removal could 277 * be deleted. 278 */ 279 rte_eal_alarm_set(1, detach_rte_cb, dev->dev_handle); 280 } 281 break; 282 } 283 } 284 285 static void 286 cleanup_pci_devices(void) 287 { 288 struct spdk_pci_device *dev, *tmp; 289 290 pthread_mutex_lock(&g_pci_mutex); 291 /* cleanup removed devices */ 292 TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) { 293 if (!dev->internal.removed) { 294 continue; 295 } 296 297 vtophys_pci_device_removed(dev->dev_handle); 298 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 299 free(dev); 300 } 301 302 /* add newly-attached devices */ 303 TAILQ_FOREACH_SAFE(dev, &g_pci_hotplugged_devices, internal.tailq, tmp) { 304 TAILQ_REMOVE(&g_pci_hotplugged_devices, dev, internal.tailq); 305 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 306 vtophys_pci_device_added(dev->dev_handle); 307 } 308 pthread_mutex_unlock(&g_pci_mutex); 309 } 310 311 static int scan_pci_bus(bool delay_init); 312 313 /* translate spdk_pci_driver to an rte_pci_driver and register it to dpdk */ 314 static int 315 register_rte_driver(struct spdk_pci_driver *driver) 316 { 317 unsigned pci_id_count = 0; 318 struct rte_pci_id *rte_id_table; 319 char *rte_name; 320 size_t rte_name_len; 321 uint32_t rte_flags; 322 323 assert(driver->id_table); 324 while (driver->id_table[pci_id_count].vendor_id) { 325 pci_id_count++; 326 } 327 assert(pci_id_count > 0); 328 329 rte_id_table = calloc(pci_id_count + 1, sizeof(*rte_id_table)); 330 if (!rte_id_table) { 331 return -ENOMEM; 332 } 333 334 while (pci_id_count > 0) { 335 struct rte_pci_id *rte_id = &rte_id_table[pci_id_count - 1]; 336 const struct spdk_pci_id *spdk_id = &driver->id_table[pci_id_count - 1]; 337 338 rte_id->class_id = spdk_id->class_id; 339 rte_id->vendor_id = spdk_id->vendor_id; 340 rte_id->device_id = spdk_id->device_id; 341 rte_id->subsystem_vendor_id = spdk_id->subvendor_id; 342 rte_id->subsystem_device_id = spdk_id->subdevice_id; 343 pci_id_count--; 344 } 345 346 assert(driver->name); 347 rte_name_len = strlen(driver->name) + strlen("spdk_") + 1; 348 rte_name = calloc(rte_name_len, 1); 349 if (!rte_name) { 350 free(rte_id_table); 351 return -ENOMEM; 352 } 353 354 snprintf(rte_name, rte_name_len, "spdk_%s", driver->name); 355 driver->driver.driver.name = rte_name; 356 driver->driver.id_table = rte_id_table; 357 358 rte_flags = 0; 359 if (driver->drv_flags & SPDK_PCI_DRIVER_NEED_MAPPING) { 360 rte_flags |= RTE_PCI_DRV_NEED_MAPPING; 361 } 362 if (driver->drv_flags & SPDK_PCI_DRIVER_WC_ACTIVATE) { 363 rte_flags |= RTE_PCI_DRV_WC_ACTIVATE; 364 } 365 driver->driver.drv_flags = rte_flags; 366 367 driver->driver.probe = pci_device_init; 368 driver->driver.remove = pci_device_fini; 369 370 rte_pci_register(&driver->driver); 371 return 0; 372 } 373 374 static inline void 375 _pci_env_init(void) 376 { 377 /* We assume devices were present on the bus for more than 2 seconds 378 * before initializing SPDK and there's no need to wait more. We scan 379 * the bus, but we don't block any devices. 380 */ 381 scan_pci_bus(false); 382 383 /* Register a single hotremove callback for all devices. */ 384 if (spdk_process_is_primary()) { 385 rte_dev_event_callback_register(NULL, pci_device_rte_dev_event, NULL); 386 } 387 } 388 389 void 390 pci_env_init(void) 391 { 392 struct spdk_pci_driver *driver; 393 394 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 395 register_rte_driver(driver); 396 } 397 398 _pci_env_init(); 399 } 400 401 void 402 pci_env_reinit(void) 403 { 404 /* There is no need to register pci drivers again, since they were 405 * already pre-registered in pci_env_init. 406 */ 407 408 _pci_env_init(); 409 } 410 411 void 412 pci_env_fini(void) 413 { 414 struct spdk_pci_device *dev; 415 char bdf[32]; 416 417 cleanup_pci_devices(); 418 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 419 if (dev->internal.attached) { 420 spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->addr); 421 SPDK_ERRLOG("Device %s is still attached at shutdown!\n", bdf); 422 } 423 } 424 425 if (spdk_process_is_primary()) { 426 rte_dev_event_callback_unregister(NULL, pci_device_rte_dev_event, NULL); 427 } 428 } 429 430 int 431 pci_device_init(struct rte_pci_driver *_drv, 432 struct rte_pci_device *_dev) 433 { 434 struct spdk_pci_driver *driver = (struct spdk_pci_driver *)_drv; 435 struct spdk_pci_device *dev; 436 int rc; 437 438 dev = calloc(1, sizeof(*dev)); 439 if (dev == NULL) { 440 return -1; 441 } 442 443 dev->dev_handle = _dev; 444 445 dev->addr.domain = _dev->addr.domain; 446 dev->addr.bus = _dev->addr.bus; 447 dev->addr.dev = _dev->addr.devid; 448 dev->addr.func = _dev->addr.function; 449 dev->id.class_id = _dev->id.class_id; 450 dev->id.vendor_id = _dev->id.vendor_id; 451 dev->id.device_id = _dev->id.device_id; 452 dev->id.subvendor_id = _dev->id.subsystem_vendor_id; 453 dev->id.subdevice_id = _dev->id.subsystem_device_id; 454 dev->socket_id = _dev->device.numa_node; 455 dev->type = "pci"; 456 457 dev->map_bar = map_bar_rte; 458 dev->unmap_bar = unmap_bar_rte; 459 dev->cfg_read = cfg_read_rte; 460 dev->cfg_write = cfg_write_rte; 461 462 dev->internal.driver = driver; 463 dev->internal.claim_fd = -1; 464 465 if (driver->cb_fn != NULL) { 466 rc = driver->cb_fn(driver->cb_arg, dev); 467 if (rc != 0) { 468 free(dev); 469 return rc; 470 } 471 dev->internal.attached = true; 472 } 473 474 pthread_mutex_lock(&g_pci_mutex); 475 TAILQ_INSERT_TAIL(&g_pci_hotplugged_devices, dev, internal.tailq); 476 pthread_mutex_unlock(&g_pci_mutex); 477 return 0; 478 } 479 480 static void 481 set_allowed_at(struct rte_devargs *rte_da, uint64_t tsc) 482 { 483 struct env_devargs *env_da; 484 485 env_da = find_env_devargs(rte_da->bus, rte_da->name); 486 if (env_da == NULL) { 487 env_da = calloc(1, sizeof(*env_da)); 488 if (env_da == NULL) { 489 SPDK_ERRLOG("could not set_allowed_at for device %s\n", rte_da->name); 490 return; 491 } 492 env_da->bus = rte_da->bus; 493 spdk_strcpy_pad(env_da->name, rte_da->name, sizeof(env_da->name), 0); 494 TAILQ_INSERT_TAIL(&g_env_devargs, env_da, link); 495 } 496 497 env_da->allowed_at = tsc; 498 } 499 500 static uint64_t 501 get_allowed_at(struct rte_devargs *rte_da) 502 { 503 struct env_devargs *env_da; 504 505 env_da = find_env_devargs(rte_da->bus, rte_da->name); 506 if (env_da) { 507 return env_da->allowed_at; 508 } else { 509 return 0; 510 } 511 } 512 513 int 514 pci_device_fini(struct rte_pci_device *_dev) 515 { 516 struct spdk_pci_device *dev; 517 518 pthread_mutex_lock(&g_pci_mutex); 519 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 520 if (dev->dev_handle == _dev) { 521 break; 522 } 523 } 524 525 if (dev == NULL || dev->internal.attached) { 526 /* The device might be still referenced somewhere in SPDK. */ 527 pthread_mutex_unlock(&g_pci_mutex); 528 return -EBUSY; 529 } 530 531 /* remove our allowed_at option */ 532 if (_dev->device.devargs) { 533 set_allowed_at(_dev->device.devargs, 0); 534 } 535 536 /* It is possible that removed flag was already set when there is a race 537 * between the remove notification for this process, and another process 538 * that is also detaching from this same device (for example, when using 539 * nvme driver in multi-process mode. So do not assert here. See 540 * #2456 for additional details. 541 */ 542 dev->internal.removed = true; 543 pthread_mutex_unlock(&g_pci_mutex); 544 return 0; 545 546 } 547 548 void 549 spdk_pci_device_detach(struct spdk_pci_device *dev) 550 { 551 struct spdk_pci_device_provider *provider; 552 553 assert(dev->internal.attached); 554 555 if (dev->internal.claim_fd >= 0) { 556 spdk_pci_device_unclaim(dev); 557 } 558 559 TAILQ_FOREACH(provider, &g_pci_device_providers, tailq) { 560 if (strcmp(dev->type, provider->name) == 0) { 561 break; 562 } 563 } 564 565 assert(provider != NULL); 566 dev->internal.attached = false; 567 provider->detach_cb(dev); 568 569 cleanup_pci_devices(); 570 } 571 572 static int 573 scan_pci_bus(bool delay_init) 574 { 575 struct rte_dev_iterator it; 576 struct rte_device *rte_dev; 577 uint64_t now; 578 579 rte_bus_scan(); 580 now = spdk_get_ticks(); 581 582 if (!TAILQ_FIRST(&g_pci_drivers)) { 583 return 0; 584 } 585 586 RTE_DEV_FOREACH(rte_dev, "bus=pci", &it) { 587 struct rte_devargs *da; 588 589 da = rte_dev->devargs; 590 if (!da) { 591 char devargs_str[128]; 592 593 /* the device was never blocked or allowed */ 594 da = calloc(1, sizeof(*da)); 595 if (!da) { 596 return -1; 597 } 598 599 snprintf(devargs_str, sizeof(devargs_str), "pci:%s", rte_dev->name); 600 if (rte_devargs_parse(da, devargs_str) != 0) { 601 free(da); 602 return -1; 603 } 604 605 rte_devargs_insert(&da); 606 rte_dev->devargs = da; 607 } 608 609 if (get_allowed_at(da)) { 610 uint64_t allowed_at = get_allowed_at(da); 611 612 /* this device was seen by spdk before... */ 613 if (da->policy == RTE_DEV_BLOCKED && allowed_at <= now) { 614 da->policy = RTE_DEV_ALLOWED; 615 } 616 } else if ((rte_dev->bus->conf.scan_mode == RTE_BUS_SCAN_ALLOWLIST && 617 da->policy == RTE_DEV_ALLOWED) || da->policy != RTE_DEV_BLOCKED) { 618 /* override the policy only if not permanently blocked */ 619 620 if (delay_init) { 621 da->policy = RTE_DEV_BLOCKED; 622 set_allowed_at(da, now + 2 * spdk_get_ticks_hz()); 623 } else { 624 da->policy = RTE_DEV_ALLOWED; 625 set_allowed_at(da, now); 626 } 627 } 628 } 629 630 return 0; 631 } 632 633 static int 634 pci_attach_rte(const struct spdk_pci_addr *addr) 635 { 636 char bdf[32]; 637 int rc, i = 0; 638 639 spdk_pci_addr_fmt(bdf, sizeof(bdf), addr); 640 641 do { 642 rc = rte_eal_hotplug_add("pci", bdf, ""); 643 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 644 645 if (i > 1 && rc == -EEXIST) { 646 /* Even though the previous request timed out, the device 647 * was attached successfully. 648 */ 649 rc = 0; 650 } 651 652 return rc; 653 } 654 655 static struct spdk_pci_device_provider g_pci_rte_provider = { 656 .name = "pci", 657 .attach_cb = pci_attach_rte, 658 .detach_cb = detach_rte, 659 }; 660 661 SPDK_PCI_REGISTER_DEVICE_PROVIDER(pci, &g_pci_rte_provider); 662 663 int 664 spdk_pci_device_attach(struct spdk_pci_driver *driver, 665 spdk_pci_enum_cb enum_cb, 666 void *enum_ctx, struct spdk_pci_addr *pci_address) 667 { 668 struct spdk_pci_device *dev; 669 struct spdk_pci_device_provider *provider; 670 struct rte_pci_device *rte_dev; 671 struct rte_devargs *da; 672 int rc; 673 674 cleanup_pci_devices(); 675 676 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 677 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 678 break; 679 } 680 } 681 682 if (dev != NULL && dev->internal.driver == driver) { 683 pthread_mutex_lock(&g_pci_mutex); 684 if (dev->internal.attached || dev->internal.pending_removal) { 685 pthread_mutex_unlock(&g_pci_mutex); 686 return -1; 687 } 688 689 rc = enum_cb(enum_ctx, dev); 690 if (rc == 0) { 691 dev->internal.attached = true; 692 } 693 pthread_mutex_unlock(&g_pci_mutex); 694 return rc; 695 } 696 697 driver->cb_fn = enum_cb; 698 driver->cb_arg = enum_ctx; 699 700 rc = -ENODEV; 701 TAILQ_FOREACH(provider, &g_pci_device_providers, tailq) { 702 rc = provider->attach_cb(pci_address); 703 if (rc == 0) { 704 break; 705 } 706 } 707 708 driver->cb_arg = NULL; 709 driver->cb_fn = NULL; 710 711 cleanup_pci_devices(); 712 713 if (rc != 0) { 714 return -1; 715 } 716 717 /* explicit attach ignores the allowlist, so if we blocked this 718 * device before let's enable it now - just for clarity. 719 */ 720 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 721 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 722 break; 723 } 724 } 725 assert(dev != NULL); 726 727 rte_dev = dev->dev_handle; 728 if (rte_dev != NULL) { 729 da = rte_dev->device.devargs; 730 if (da && get_allowed_at(da)) { 731 set_allowed_at(da, spdk_get_ticks()); 732 da->policy = RTE_DEV_ALLOWED; 733 } 734 } 735 736 return 0; 737 } 738 739 /* Note: You can call spdk_pci_enumerate from more than one thread 740 * simultaneously safely, but you cannot call spdk_pci_enumerate 741 * and rte_eal_pci_probe simultaneously. 742 */ 743 int 744 spdk_pci_enumerate(struct spdk_pci_driver *driver, 745 spdk_pci_enum_cb enum_cb, 746 void *enum_ctx) 747 { 748 struct spdk_pci_device *dev; 749 int rc; 750 751 cleanup_pci_devices(); 752 753 pthread_mutex_lock(&g_pci_mutex); 754 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 755 if (dev->internal.attached || 756 dev->internal.driver != driver || 757 dev->internal.pending_removal) { 758 continue; 759 } 760 761 rc = enum_cb(enum_ctx, dev); 762 if (rc == 0) { 763 dev->internal.attached = true; 764 } else if (rc < 0) { 765 pthread_mutex_unlock(&g_pci_mutex); 766 return -1; 767 } 768 } 769 pthread_mutex_unlock(&g_pci_mutex); 770 771 if (scan_pci_bus(true) != 0) { 772 return -1; 773 } 774 775 driver->cb_fn = enum_cb; 776 driver->cb_arg = enum_ctx; 777 778 if (rte_bus_probe() != 0) { 779 driver->cb_arg = NULL; 780 driver->cb_fn = NULL; 781 return -1; 782 } 783 784 driver->cb_arg = NULL; 785 driver->cb_fn = NULL; 786 787 cleanup_pci_devices(); 788 return 0; 789 } 790 791 void 792 spdk_pci_for_each_device(void *ctx, void (*fn)(void *ctx, struct spdk_pci_device *dev)) 793 { 794 struct spdk_pci_device *dev, *tmp; 795 796 pthread_mutex_lock(&g_pci_mutex); 797 TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) { 798 fn(ctx, dev); 799 } 800 pthread_mutex_unlock(&g_pci_mutex); 801 } 802 803 int 804 spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar, 805 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 806 { 807 return dev->map_bar(dev, bar, mapped_addr, phys_addr, size); 808 } 809 810 int 811 spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr) 812 { 813 return dev->unmap_bar(dev, bar, addr); 814 } 815 816 int 817 spdk_pci_device_enable_interrupt(struct spdk_pci_device *dev) 818 { 819 struct rte_pci_device *rte_dev = dev->dev_handle; 820 #if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0) 821 return rte_intr_enable(&rte_dev->intr_handle); 822 #else 823 return rte_intr_enable(rte_dev->intr_handle); 824 #endif 825 } 826 827 int 828 spdk_pci_device_disable_interrupt(struct spdk_pci_device *dev) 829 { 830 struct rte_pci_device *rte_dev = dev->dev_handle; 831 #if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0) 832 return rte_intr_disable(&rte_dev->intr_handle); 833 #else 834 return rte_intr_disable(rte_dev->intr_handle); 835 #endif 836 } 837 838 int 839 spdk_pci_device_get_interrupt_efd(struct spdk_pci_device *dev) 840 { 841 struct rte_pci_device *rte_dev = dev->dev_handle; 842 #if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0) 843 return rte_dev->intr_handle.fd; 844 #else 845 return rte_intr_fd_get(rte_dev->intr_handle); 846 #endif 847 } 848 849 uint32_t 850 spdk_pci_device_get_domain(struct spdk_pci_device *dev) 851 { 852 return dev->addr.domain; 853 } 854 855 uint8_t 856 spdk_pci_device_get_bus(struct spdk_pci_device *dev) 857 { 858 return dev->addr.bus; 859 } 860 861 uint8_t 862 spdk_pci_device_get_dev(struct spdk_pci_device *dev) 863 { 864 return dev->addr.dev; 865 } 866 867 uint8_t 868 spdk_pci_device_get_func(struct spdk_pci_device *dev) 869 { 870 return dev->addr.func; 871 } 872 873 uint16_t 874 spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev) 875 { 876 return dev->id.vendor_id; 877 } 878 879 uint16_t 880 spdk_pci_device_get_device_id(struct spdk_pci_device *dev) 881 { 882 return dev->id.device_id; 883 } 884 885 uint16_t 886 spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev) 887 { 888 return dev->id.subvendor_id; 889 } 890 891 uint16_t 892 spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev) 893 { 894 return dev->id.subdevice_id; 895 } 896 897 struct spdk_pci_id 898 spdk_pci_device_get_id(struct spdk_pci_device *dev) 899 { 900 return dev->id; 901 } 902 903 int 904 spdk_pci_device_get_socket_id(struct spdk_pci_device *dev) 905 { 906 return dev->socket_id; 907 } 908 909 int 910 spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 911 { 912 return dev->cfg_read(dev, value, len, offset); 913 } 914 915 int 916 spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 917 { 918 return dev->cfg_write(dev, value, len, offset); 919 } 920 921 int 922 spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset) 923 { 924 return spdk_pci_device_cfg_read(dev, value, 1, offset); 925 } 926 927 int 928 spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset) 929 { 930 return spdk_pci_device_cfg_write(dev, &value, 1, offset); 931 } 932 933 int 934 spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset) 935 { 936 return spdk_pci_device_cfg_read(dev, value, 2, offset); 937 } 938 939 int 940 spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset) 941 { 942 return spdk_pci_device_cfg_write(dev, &value, 2, offset); 943 } 944 945 int 946 spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset) 947 { 948 return spdk_pci_device_cfg_read(dev, value, 4, offset); 949 } 950 951 int 952 spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset) 953 { 954 return spdk_pci_device_cfg_write(dev, &value, 4, offset); 955 } 956 957 int 958 spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len) 959 { 960 int err; 961 uint32_t pos, header = 0; 962 uint32_t i, buf[2]; 963 964 if (len < 17) { 965 return -1; 966 } 967 968 err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE); 969 if (err || !header) { 970 return -1; 971 } 972 973 pos = PCI_CFG_SIZE; 974 while (1) { 975 if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) { 976 if (pos) { 977 /* skip the header */ 978 pos += 4; 979 for (i = 0; i < 2; i++) { 980 err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i); 981 if (err) { 982 return -1; 983 } 984 } 985 snprintf(sn, len, "%08x%08x", buf[1], buf[0]); 986 return 0; 987 } 988 } 989 pos = (header >> 20) & 0xffc; 990 /* 0 if no other items exist */ 991 if (pos < PCI_CFG_SIZE) { 992 return -1; 993 } 994 err = spdk_pci_device_cfg_read32(dev, &header, pos); 995 if (err) { 996 return -1; 997 } 998 } 999 return -1; 1000 } 1001 1002 struct spdk_pci_addr 1003 spdk_pci_device_get_addr(struct spdk_pci_device *dev) 1004 { 1005 return dev->addr; 1006 } 1007 1008 bool 1009 spdk_pci_device_is_removed(struct spdk_pci_device *dev) 1010 { 1011 return dev->internal.pending_removal; 1012 } 1013 1014 int 1015 spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2) 1016 { 1017 if (a1->domain > a2->domain) { 1018 return 1; 1019 } else if (a1->domain < a2->domain) { 1020 return -1; 1021 } else if (a1->bus > a2->bus) { 1022 return 1; 1023 } else if (a1->bus < a2->bus) { 1024 return -1; 1025 } else if (a1->dev > a2->dev) { 1026 return 1; 1027 } else if (a1->dev < a2->dev) { 1028 return -1; 1029 } else if (a1->func > a2->func) { 1030 return 1; 1031 } else if (a1->func < a2->func) { 1032 return -1; 1033 } 1034 1035 return 0; 1036 } 1037 1038 #ifdef __linux__ 1039 int 1040 spdk_pci_device_claim(struct spdk_pci_device *dev) 1041 { 1042 int dev_fd; 1043 char dev_name[64]; 1044 int pid; 1045 void *dev_map; 1046 struct flock pcidev_lock = { 1047 .l_type = F_WRLCK, 1048 .l_whence = SEEK_SET, 1049 .l_start = 0, 1050 .l_len = 0, 1051 }; 1052 1053 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 1054 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 1055 1056 dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 1057 if (dev_fd == -1) { 1058 SPDK_ERRLOG("could not open %s\n", dev_name); 1059 return -errno; 1060 } 1061 1062 if (ftruncate(dev_fd, sizeof(int)) != 0) { 1063 SPDK_ERRLOG("could not truncate %s\n", dev_name); 1064 close(dev_fd); 1065 return -errno; 1066 } 1067 1068 dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, 1069 MAP_SHARED, dev_fd, 0); 1070 if (dev_map == MAP_FAILED) { 1071 SPDK_ERRLOG("could not mmap dev %s (%d)\n", dev_name, errno); 1072 close(dev_fd); 1073 return -errno; 1074 } 1075 1076 if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) { 1077 pid = *(int *)dev_map; 1078 SPDK_ERRLOG("Cannot create lock on device %s, probably" 1079 " process %d has claimed it\n", dev_name, pid); 1080 munmap(dev_map, sizeof(int)); 1081 close(dev_fd); 1082 /* F_SETLK returns unspecified errnos, normalize them */ 1083 return -EACCES; 1084 } 1085 1086 *(int *)dev_map = (int)getpid(); 1087 munmap(dev_map, sizeof(int)); 1088 dev->internal.claim_fd = dev_fd; 1089 /* Keep dev_fd open to maintain the lock. */ 1090 return 0; 1091 } 1092 1093 void 1094 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 1095 { 1096 char dev_name[64]; 1097 1098 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 1099 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 1100 1101 close(dev->internal.claim_fd); 1102 dev->internal.claim_fd = -1; 1103 unlink(dev_name); 1104 } 1105 #else /* !__linux__ */ 1106 int 1107 spdk_pci_device_claim(struct spdk_pci_device *dev) 1108 { 1109 /* TODO */ 1110 return 0; 1111 } 1112 1113 void 1114 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 1115 { 1116 /* TODO */ 1117 } 1118 #endif /* __linux__ */ 1119 1120 int 1121 spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf) 1122 { 1123 unsigned domain, bus, dev, func; 1124 1125 if (addr == NULL || bdf == NULL) { 1126 return -EINVAL; 1127 } 1128 1129 if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) || 1130 (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) { 1131 /* Matched a full address - all variables are initialized */ 1132 } else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) { 1133 func = 0; 1134 } else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) || 1135 (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) { 1136 domain = 0; 1137 } else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) || 1138 (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) { 1139 domain = 0; 1140 func = 0; 1141 } else { 1142 return -EINVAL; 1143 } 1144 1145 if (bus > 0xFF || dev > 0x1F || func > 7) { 1146 return -EINVAL; 1147 } 1148 1149 addr->domain = domain; 1150 addr->bus = bus; 1151 addr->dev = dev; 1152 addr->func = func; 1153 1154 return 0; 1155 } 1156 1157 int 1158 spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr) 1159 { 1160 int rc; 1161 1162 rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x", 1163 addr->domain, addr->bus, 1164 addr->dev, addr->func); 1165 1166 if (rc > 0 && (size_t)rc < sz) { 1167 return 0; 1168 } 1169 1170 return -1; 1171 } 1172 1173 int 1174 spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev) 1175 { 1176 int rc; 1177 1178 assert(dev->map_bar != NULL); 1179 assert(dev->unmap_bar != NULL); 1180 assert(dev->cfg_read != NULL); 1181 assert(dev->cfg_write != NULL); 1182 dev->internal.driver = drv; 1183 1184 if (drv->cb_fn != NULL) { 1185 rc = drv->cb_fn(drv->cb_arg, dev); 1186 if (rc != 0) { 1187 return -ECANCELED; 1188 } 1189 1190 dev->internal.attached = true; 1191 } 1192 1193 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 1194 1195 return 0; 1196 } 1197 1198 void 1199 spdk_pci_unhook_device(struct spdk_pci_device *dev) 1200 { 1201 assert(!dev->internal.attached); 1202 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 1203 } 1204 1205 void 1206 spdk_pci_register_device_provider(struct spdk_pci_device_provider *provider) 1207 { 1208 TAILQ_INSERT_TAIL(&g_pci_device_providers, provider, tailq); 1209 } 1210 1211 const char * 1212 spdk_pci_device_get_type(const struct spdk_pci_device *dev) 1213 { 1214 return dev->type; 1215 } 1216 1217 int 1218 spdk_pci_device_allow(struct spdk_pci_addr *pci_addr) 1219 { 1220 struct rte_devargs *da; 1221 char devargs_str[128]; 1222 1223 da = calloc(1, sizeof(*da)); 1224 if (da == NULL) { 1225 SPDK_ERRLOG("could not allocate rte_devargs\n"); 1226 return -ENOMEM; 1227 } 1228 1229 snprintf(devargs_str, sizeof(devargs_str), "pci:%04x:%02x:%02x.%x", 1230 pci_addr->domain, pci_addr->bus, pci_addr->dev, pci_addr->func); 1231 if (rte_devargs_parse(da, devargs_str) != 0) { 1232 SPDK_ERRLOG("rte_devargs_parse() failed on '%s'\n", devargs_str); 1233 free(da); 1234 return -EINVAL; 1235 } 1236 da->policy = RTE_DEV_ALLOWED; 1237 /* Note: if a devargs already exists for this device address, it just gets 1238 * overridden. So we do not need to check if the devargs already exists. 1239 * DPDK will take care of memory management for the devargs structure after 1240 * it has been inserted, so there's nothing SPDK needs to track. 1241 */ 1242 if (rte_devargs_insert(&da) != 0) { 1243 SPDK_ERRLOG("rte_devargs_insert() failed on '%s'\n", devargs_str); 1244 free(da); 1245 return -EINVAL; 1246 } 1247 1248 return 0; 1249 } 1250 1251 uint64_t 1252 dpdk_pci_device_vtophys(struct rte_pci_device *dev, uint64_t vaddr) 1253 { 1254 struct rte_mem_resource *res; 1255 uint64_t paddr; 1256 unsigned r; 1257 1258 for (r = 0; r < PCI_MAX_RESOURCE; r++) { 1259 res = &dev->mem_resource[r]; 1260 if (res->phys_addr && vaddr >= (uint64_t)res->addr && 1261 vaddr < (uint64_t)res->addr + res->len) { 1262 paddr = res->phys_addr + (vaddr - (uint64_t)res->addr); 1263 return paddr; 1264 } 1265 } 1266 1267 return SPDK_VTOPHYS_ERROR; 1268 } 1269