1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "env_internal.h" 7 8 #include <rte_alarm.h> 9 #include <rte_devargs.h> 10 #include "spdk/env.h" 11 #include "spdk/log.h" 12 #include "spdk/string.h" 13 14 #define SYSFS_PCI_DRIVERS "/sys/bus/pci/drivers" 15 16 /* Compatibility for versions < 20.11 */ 17 #if RTE_VERSION < RTE_VERSION_NUM(20, 11, 0, 0) 18 #define RTE_DEV_ALLOWED RTE_DEV_WHITELISTED 19 #define RTE_DEV_BLOCKED RTE_DEV_BLACKLISTED 20 #define RTE_BUS_SCAN_ALLOWLIST RTE_BUS_SCAN_WHITELIST 21 #endif 22 23 #define PCI_CFG_SIZE 256 24 #define PCI_EXT_CAP_ID_SN 0x03 25 26 /* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time 27 * might cause the internal IPC to misbehave. Just retry in such case. 28 */ 29 #define DPDK_HOTPLUG_RETRY_COUNT 4 30 31 /* DPDK alarm/interrupt thread */ 32 static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER; 33 static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices); 34 /* devices hotplugged on a dpdk thread */ 35 static TAILQ_HEAD(, spdk_pci_device) g_pci_hotplugged_devices = 36 TAILQ_HEAD_INITIALIZER(g_pci_hotplugged_devices); 37 static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers); 38 39 struct env_devargs { 40 struct rte_bus *bus; 41 char name[128]; 42 uint64_t allowed_at; 43 TAILQ_ENTRY(env_devargs) link; 44 }; 45 static TAILQ_HEAD(, env_devargs) g_env_devargs = TAILQ_HEAD_INITIALIZER(g_env_devargs); 46 47 static struct env_devargs * 48 find_env_devargs(struct rte_bus *bus, const char *name) 49 { 50 struct env_devargs *da; 51 52 TAILQ_FOREACH(da, &g_env_devargs, link) { 53 if (bus == da->bus && !strcmp(name, da->name)) { 54 return da; 55 } 56 } 57 58 return NULL; 59 } 60 61 static int 62 map_bar_rte(struct spdk_pci_device *device, uint32_t bar, 63 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 64 { 65 struct rte_pci_device *dev = device->dev_handle; 66 67 *mapped_addr = dev->mem_resource[bar].addr; 68 *phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr; 69 *size = (uint64_t)dev->mem_resource[bar].len; 70 71 return 0; 72 } 73 74 static int 75 unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr) 76 { 77 return 0; 78 } 79 80 static int 81 cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 82 { 83 int rc; 84 85 rc = rte_pci_read_config(dev->dev_handle, value, len, offset); 86 87 return (rc > 0 && (uint32_t) rc == len) ? 0 : -1; 88 } 89 90 static int 91 cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 92 { 93 int rc; 94 95 rc = rte_pci_write_config(dev->dev_handle, value, len, offset); 96 97 #ifdef __FreeBSD__ 98 /* DPDK returns 0 on success and -1 on failure */ 99 return rc; 100 #endif 101 return (rc > 0 && (uint32_t) rc == len) ? 0 : -1; 102 } 103 104 static void 105 remove_rte_dev(struct rte_pci_device *rte_dev) 106 { 107 char bdf[32]; 108 int i = 0, rc; 109 110 snprintf(bdf, sizeof(bdf), "%s", rte_dev->device.name); 111 do { 112 rc = rte_eal_hotplug_remove("pci", bdf); 113 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 114 } 115 116 static void 117 detach_rte_cb(void *_dev) 118 { 119 remove_rte_dev(_dev); 120 } 121 122 static void 123 detach_rte(struct spdk_pci_device *dev) 124 { 125 struct rte_pci_device *rte_dev = dev->dev_handle; 126 int i; 127 bool removed; 128 129 if (!spdk_process_is_primary()) { 130 remove_rte_dev(rte_dev); 131 return; 132 } 133 134 pthread_mutex_lock(&g_pci_mutex); 135 dev->internal.attached = false; 136 /* prevent the hotremove notification from removing this device */ 137 dev->internal.pending_removal = true; 138 pthread_mutex_unlock(&g_pci_mutex); 139 140 rte_eal_alarm_set(1, detach_rte_cb, rte_dev); 141 142 /* wait up to 2s for the cb to execute */ 143 for (i = 2000; i > 0; i--) { 144 145 spdk_delay_us(1000); 146 pthread_mutex_lock(&g_pci_mutex); 147 removed = dev->internal.removed; 148 pthread_mutex_unlock(&g_pci_mutex); 149 150 if (removed) { 151 break; 152 } 153 } 154 155 /* besides checking the removed flag, we also need to wait 156 * for the dpdk detach function to unwind, as it's doing some 157 * operations even after calling our detach callback. Simply 158 * cancel the alarm - if it started executing already, this 159 * call will block and wait for it to finish. 160 */ 161 rte_eal_alarm_cancel(detach_rte_cb, rte_dev); 162 163 /* the device could have been finally removed, so just check 164 * it again. 165 */ 166 pthread_mutex_lock(&g_pci_mutex); 167 removed = dev->internal.removed; 168 pthread_mutex_unlock(&g_pci_mutex); 169 if (!removed) { 170 SPDK_ERRLOG("Timeout waiting for DPDK to remove PCI device %s.\n", 171 rte_dev->name); 172 /* If we reach this state, then the device couldn't be removed and most likely 173 a subsequent hot add of a device in the same BDF will fail */ 174 } 175 } 176 177 void 178 spdk_pci_driver_register(const char *name, struct spdk_pci_id *id_table, uint32_t flags) 179 { 180 struct spdk_pci_driver *driver; 181 182 driver = calloc(1, sizeof(*driver)); 183 if (!driver) { 184 /* we can't do any better than bailing atm */ 185 return; 186 } 187 188 driver->name = name; 189 driver->id_table = id_table; 190 driver->drv_flags = flags; 191 TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq); 192 } 193 194 struct spdk_pci_driver * 195 spdk_pci_nvme_get_driver(void) 196 { 197 return spdk_pci_get_driver("nvme"); 198 } 199 200 struct spdk_pci_driver * 201 spdk_pci_get_driver(const char *name) 202 { 203 struct spdk_pci_driver *driver; 204 205 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 206 if (strcmp(driver->name, name) == 0) { 207 return driver; 208 } 209 } 210 211 return NULL; 212 } 213 214 static void 215 pci_device_rte_dev_event(const char *device_name, 216 enum rte_dev_event_type event, 217 void *cb_arg) 218 { 219 struct spdk_pci_device *dev; 220 bool can_detach = false; 221 222 switch (event) { 223 default: 224 case RTE_DEV_EVENT_ADD: 225 /* Nothing to do here yet. */ 226 break; 227 case RTE_DEV_EVENT_REMOVE: 228 pthread_mutex_lock(&g_pci_mutex); 229 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 230 struct rte_pci_device *rte_dev = dev->dev_handle; 231 232 if (strcmp(rte_dev->name, device_name) == 0 && 233 !dev->internal.pending_removal) { 234 can_detach = !dev->internal.attached; 235 /* prevent any further attaches */ 236 dev->internal.pending_removal = true; 237 break; 238 } 239 } 240 pthread_mutex_unlock(&g_pci_mutex); 241 242 if (dev != NULL && can_detach) { 243 /* if device is not attached we can remove it right away. 244 * Otherwise it will be removed at detach. 245 * 246 * Because the user's callback is invoked in eal interrupt 247 * callback, the interrupt callback need to be finished before 248 * it can be unregistered when detaching device. So finish 249 * callback soon and use a deferred removal to detach device 250 * is need. It is a workaround, once the device detaching be 251 * moved into the eal in the future, the deferred removal could 252 * be deleted. 253 */ 254 rte_eal_alarm_set(1, detach_rte_cb, dev->dev_handle); 255 } 256 break; 257 } 258 } 259 260 static void 261 cleanup_pci_devices(void) 262 { 263 struct spdk_pci_device *dev, *tmp; 264 265 pthread_mutex_lock(&g_pci_mutex); 266 /* cleanup removed devices */ 267 TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) { 268 if (!dev->internal.removed) { 269 continue; 270 } 271 272 vtophys_pci_device_removed(dev->dev_handle); 273 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 274 free(dev); 275 } 276 277 /* add newly-attached devices */ 278 TAILQ_FOREACH_SAFE(dev, &g_pci_hotplugged_devices, internal.tailq, tmp) { 279 TAILQ_REMOVE(&g_pci_hotplugged_devices, dev, internal.tailq); 280 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 281 vtophys_pci_device_added(dev->dev_handle); 282 } 283 pthread_mutex_unlock(&g_pci_mutex); 284 } 285 286 static int scan_pci_bus(bool delay_init); 287 288 /* translate spdk_pci_driver to an rte_pci_driver and register it to dpdk */ 289 static int 290 register_rte_driver(struct spdk_pci_driver *driver) 291 { 292 unsigned pci_id_count = 0; 293 struct rte_pci_id *rte_id_table; 294 char *rte_name; 295 size_t rte_name_len; 296 uint32_t rte_flags; 297 298 assert(driver->id_table); 299 while (driver->id_table[pci_id_count].vendor_id) { 300 pci_id_count++; 301 } 302 assert(pci_id_count > 0); 303 304 rte_id_table = calloc(pci_id_count + 1, sizeof(*rte_id_table)); 305 if (!rte_id_table) { 306 return -ENOMEM; 307 } 308 309 while (pci_id_count > 0) { 310 struct rte_pci_id *rte_id = &rte_id_table[pci_id_count - 1]; 311 const struct spdk_pci_id *spdk_id = &driver->id_table[pci_id_count - 1]; 312 313 rte_id->class_id = spdk_id->class_id; 314 rte_id->vendor_id = spdk_id->vendor_id; 315 rte_id->device_id = spdk_id->device_id; 316 rte_id->subsystem_vendor_id = spdk_id->subvendor_id; 317 rte_id->subsystem_device_id = spdk_id->subdevice_id; 318 pci_id_count--; 319 } 320 321 assert(driver->name); 322 rte_name_len = strlen(driver->name) + strlen("spdk_") + 1; 323 rte_name = calloc(rte_name_len, 1); 324 if (!rte_name) { 325 free(rte_id_table); 326 return -ENOMEM; 327 } 328 329 snprintf(rte_name, rte_name_len, "spdk_%s", driver->name); 330 driver->driver.driver.name = rte_name; 331 driver->driver.id_table = rte_id_table; 332 333 rte_flags = 0; 334 if (driver->drv_flags & SPDK_PCI_DRIVER_NEED_MAPPING) { 335 rte_flags |= RTE_PCI_DRV_NEED_MAPPING; 336 } 337 if (driver->drv_flags & SPDK_PCI_DRIVER_WC_ACTIVATE) { 338 rte_flags |= RTE_PCI_DRV_WC_ACTIVATE; 339 } 340 driver->driver.drv_flags = rte_flags; 341 342 driver->driver.probe = pci_device_init; 343 driver->driver.remove = pci_device_fini; 344 345 rte_pci_register(&driver->driver); 346 return 0; 347 } 348 349 static inline void 350 _pci_env_init(void) 351 { 352 /* We assume devices were present on the bus for more than 2 seconds 353 * before initializing SPDK and there's no need to wait more. We scan 354 * the bus, but we don't block any devices. 355 */ 356 scan_pci_bus(false); 357 358 /* Register a single hotremove callback for all devices. */ 359 if (spdk_process_is_primary()) { 360 rte_dev_event_callback_register(NULL, pci_device_rte_dev_event, NULL); 361 } 362 } 363 364 void 365 pci_env_init(void) 366 { 367 struct spdk_pci_driver *driver; 368 369 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 370 register_rte_driver(driver); 371 } 372 373 _pci_env_init(); 374 } 375 376 void 377 pci_env_reinit(void) 378 { 379 /* There is no need to register pci drivers again, since they were 380 * already pre-registered in pci_env_init. 381 */ 382 383 _pci_env_init(); 384 } 385 386 void 387 pci_env_fini(void) 388 { 389 struct spdk_pci_device *dev; 390 char bdf[32]; 391 392 cleanup_pci_devices(); 393 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 394 if (dev->internal.attached) { 395 spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->addr); 396 SPDK_ERRLOG("Device %s is still attached at shutdown!\n", bdf); 397 } 398 } 399 400 if (spdk_process_is_primary()) { 401 rte_dev_event_callback_unregister(NULL, pci_device_rte_dev_event, NULL); 402 } 403 } 404 405 int 406 pci_device_init(struct rte_pci_driver *_drv, 407 struct rte_pci_device *_dev) 408 { 409 struct spdk_pci_driver *driver = (struct spdk_pci_driver *)_drv; 410 struct spdk_pci_device *dev; 411 int rc; 412 413 dev = calloc(1, sizeof(*dev)); 414 if (dev == NULL) { 415 return -1; 416 } 417 418 dev->dev_handle = _dev; 419 420 dev->addr.domain = _dev->addr.domain; 421 dev->addr.bus = _dev->addr.bus; 422 dev->addr.dev = _dev->addr.devid; 423 dev->addr.func = _dev->addr.function; 424 dev->id.class_id = _dev->id.class_id; 425 dev->id.vendor_id = _dev->id.vendor_id; 426 dev->id.device_id = _dev->id.device_id; 427 dev->id.subvendor_id = _dev->id.subsystem_vendor_id; 428 dev->id.subdevice_id = _dev->id.subsystem_device_id; 429 dev->socket_id = _dev->device.numa_node; 430 dev->type = "pci"; 431 432 dev->map_bar = map_bar_rte; 433 dev->unmap_bar = unmap_bar_rte; 434 dev->cfg_read = cfg_read_rte; 435 dev->cfg_write = cfg_write_rte; 436 437 dev->internal.driver = driver; 438 dev->internal.claim_fd = -1; 439 440 if (driver->cb_fn != NULL) { 441 rc = driver->cb_fn(driver->cb_arg, dev); 442 if (rc != 0) { 443 free(dev); 444 return rc; 445 } 446 dev->internal.attached = true; 447 } 448 449 pthread_mutex_lock(&g_pci_mutex); 450 TAILQ_INSERT_TAIL(&g_pci_hotplugged_devices, dev, internal.tailq); 451 pthread_mutex_unlock(&g_pci_mutex); 452 return 0; 453 } 454 455 static void 456 set_allowed_at(struct rte_devargs *rte_da, uint64_t tsc) 457 { 458 struct env_devargs *env_da; 459 460 env_da = find_env_devargs(rte_da->bus, rte_da->name); 461 if (env_da == NULL) { 462 env_da = calloc(1, sizeof(*env_da)); 463 if (env_da == NULL) { 464 SPDK_ERRLOG("could not set_allowed_at for device %s\n", rte_da->name); 465 return; 466 } 467 env_da->bus = rte_da->bus; 468 spdk_strcpy_pad(env_da->name, rte_da->name, sizeof(env_da->name), 0); 469 TAILQ_INSERT_TAIL(&g_env_devargs, env_da, link); 470 } 471 472 env_da->allowed_at = tsc; 473 } 474 475 static uint64_t 476 get_allowed_at(struct rte_devargs *rte_da) 477 { 478 struct env_devargs *env_da; 479 480 env_da = find_env_devargs(rte_da->bus, rte_da->name); 481 if (env_da) { 482 return env_da->allowed_at; 483 } else { 484 return 0; 485 } 486 } 487 488 int 489 pci_device_fini(struct rte_pci_device *_dev) 490 { 491 struct spdk_pci_device *dev; 492 493 pthread_mutex_lock(&g_pci_mutex); 494 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 495 if (dev->dev_handle == _dev) { 496 break; 497 } 498 } 499 500 if (dev == NULL || dev->internal.attached) { 501 /* The device might be still referenced somewhere in SPDK. */ 502 pthread_mutex_unlock(&g_pci_mutex); 503 return -EBUSY; 504 } 505 506 /* remove our allowed_at option */ 507 if (_dev->device.devargs) { 508 set_allowed_at(_dev->device.devargs, 0); 509 } 510 511 /* It is possible that removed flag was already set when there is a race 512 * between the remove notification for this process, and another process 513 * that is also detaching from this same device (for example, when using 514 * nvme driver in multi-process mode. So do not assert here. See 515 * #2456 for additional details. 516 */ 517 dev->internal.removed = true; 518 pthread_mutex_unlock(&g_pci_mutex); 519 return 0; 520 521 } 522 523 void 524 spdk_pci_device_detach(struct spdk_pci_device *dev) 525 { 526 assert(dev->internal.attached); 527 528 if (dev->internal.claim_fd >= 0) { 529 spdk_pci_device_unclaim(dev); 530 } 531 532 dev->internal.attached = false; 533 if (strcmp(dev->type, "pci") == 0) { 534 /* if it's a physical device we need to deal with DPDK on 535 * a different process and we can't just unset one flag 536 * here. We also want to stop using any device resources 537 * so that the device isn't "in use" by the userspace driver 538 * once we detach it. This would allow attaching the device 539 * to a different process, or to a kernel driver like nvme. 540 */ 541 detach_rte(dev); 542 } 543 544 cleanup_pci_devices(); 545 } 546 547 static int 548 scan_pci_bus(bool delay_init) 549 { 550 struct spdk_pci_driver *driver; 551 struct rte_pci_device *rte_dev; 552 uint64_t now; 553 554 rte_bus_scan(); 555 now = spdk_get_ticks(); 556 557 driver = TAILQ_FIRST(&g_pci_drivers); 558 if (!driver) { 559 return 0; 560 } 561 562 TAILQ_FOREACH(rte_dev, &driver->driver.bus->device_list, next) { 563 struct rte_devargs *da; 564 565 da = rte_dev->device.devargs; 566 if (!da) { 567 char devargs_str[128]; 568 569 /* the device was never blocked or allowed */ 570 da = calloc(1, sizeof(*da)); 571 if (!da) { 572 return -1; 573 } 574 575 snprintf(devargs_str, sizeof(devargs_str), "pci:%s", rte_dev->device.name); 576 if (rte_devargs_parse(da, devargs_str) != 0) { 577 free(da); 578 return -1; 579 } 580 581 rte_devargs_insert(&da); 582 rte_dev->device.devargs = da; 583 } 584 585 if (get_allowed_at(da)) { 586 uint64_t allowed_at = get_allowed_at(da); 587 588 /* this device was seen by spdk before... */ 589 if (da->policy == RTE_DEV_BLOCKED && allowed_at <= now) { 590 da->policy = RTE_DEV_ALLOWED; 591 } 592 } else if ((driver->driver.bus->bus.conf.scan_mode == RTE_BUS_SCAN_ALLOWLIST && 593 da->policy == RTE_DEV_ALLOWED) || da->policy != RTE_DEV_BLOCKED) { 594 /* override the policy only if not permanently blocked */ 595 596 if (delay_init) { 597 da->policy = RTE_DEV_BLOCKED; 598 set_allowed_at(da, now + 2 * spdk_get_ticks_hz()); 599 } else { 600 da->policy = RTE_DEV_ALLOWED; 601 set_allowed_at(da, now); 602 } 603 } 604 } 605 606 return 0; 607 } 608 609 int 610 spdk_pci_device_attach(struct spdk_pci_driver *driver, 611 spdk_pci_enum_cb enum_cb, 612 void *enum_ctx, struct spdk_pci_addr *pci_address) 613 { 614 struct spdk_pci_device *dev; 615 struct rte_pci_device *rte_dev; 616 struct rte_devargs *da; 617 int rc; 618 char bdf[32]; 619 620 spdk_pci_addr_fmt(bdf, sizeof(bdf), pci_address); 621 622 cleanup_pci_devices(); 623 624 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 625 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 626 break; 627 } 628 } 629 630 if (dev != NULL && dev->internal.driver == driver) { 631 pthread_mutex_lock(&g_pci_mutex); 632 if (dev->internal.attached || dev->internal.pending_removal) { 633 pthread_mutex_unlock(&g_pci_mutex); 634 return -1; 635 } 636 637 rc = enum_cb(enum_ctx, dev); 638 if (rc == 0) { 639 dev->internal.attached = true; 640 } 641 pthread_mutex_unlock(&g_pci_mutex); 642 return rc; 643 } 644 645 driver->cb_fn = enum_cb; 646 driver->cb_arg = enum_ctx; 647 648 int i = 0; 649 650 do { 651 rc = rte_eal_hotplug_add("pci", bdf, ""); 652 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 653 654 if (i > 1 && rc == -EEXIST) { 655 /* Even though the previous request timed out, the device 656 * was attached successfully. 657 */ 658 rc = 0; 659 } 660 661 driver->cb_arg = NULL; 662 driver->cb_fn = NULL; 663 664 cleanup_pci_devices(); 665 666 if (rc != 0) { 667 return -1; 668 } 669 670 /* explicit attach ignores the allowlist, so if we blocked this 671 * device before let's enable it now - just for clarity. 672 */ 673 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 674 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 675 break; 676 } 677 } 678 assert(dev != NULL); 679 680 rte_dev = dev->dev_handle; 681 da = rte_dev->device.devargs; 682 if (da && get_allowed_at(da)) { 683 set_allowed_at(da, spdk_get_ticks()); 684 da->policy = RTE_DEV_ALLOWED; 685 } 686 687 return 0; 688 } 689 690 /* Note: You can call spdk_pci_enumerate from more than one thread 691 * simultaneously safely, but you cannot call spdk_pci_enumerate 692 * and rte_eal_pci_probe simultaneously. 693 */ 694 int 695 spdk_pci_enumerate(struct spdk_pci_driver *driver, 696 spdk_pci_enum_cb enum_cb, 697 void *enum_ctx) 698 { 699 struct spdk_pci_device *dev; 700 int rc; 701 702 cleanup_pci_devices(); 703 704 pthread_mutex_lock(&g_pci_mutex); 705 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 706 if (dev->internal.attached || 707 dev->internal.driver != driver || 708 dev->internal.pending_removal) { 709 continue; 710 } 711 712 rc = enum_cb(enum_ctx, dev); 713 if (rc == 0) { 714 dev->internal.attached = true; 715 } else if (rc < 0) { 716 pthread_mutex_unlock(&g_pci_mutex); 717 return -1; 718 } 719 } 720 pthread_mutex_unlock(&g_pci_mutex); 721 722 if (scan_pci_bus(true) != 0) { 723 return -1; 724 } 725 726 driver->cb_fn = enum_cb; 727 driver->cb_arg = enum_ctx; 728 729 if (rte_bus_probe() != 0) { 730 driver->cb_arg = NULL; 731 driver->cb_fn = NULL; 732 return -1; 733 } 734 735 driver->cb_arg = NULL; 736 driver->cb_fn = NULL; 737 738 cleanup_pci_devices(); 739 return 0; 740 } 741 742 void 743 spdk_pci_for_each_device(void *ctx, void (*fn)(void *ctx, struct spdk_pci_device *dev)) 744 { 745 struct spdk_pci_device *dev; 746 747 pthread_mutex_lock(&g_pci_mutex); 748 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 749 fn(ctx, dev); 750 } 751 pthread_mutex_unlock(&g_pci_mutex); 752 } 753 754 int 755 spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar, 756 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 757 { 758 return dev->map_bar(dev, bar, mapped_addr, phys_addr, size); 759 } 760 761 int 762 spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr) 763 { 764 return dev->unmap_bar(dev, bar, addr); 765 } 766 767 int 768 spdk_pci_device_enable_interrupt(struct spdk_pci_device *dev) 769 { 770 struct rte_pci_device *rte_dev = dev->dev_handle; 771 #if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0) 772 return rte_intr_enable(&rte_dev->intr_handle); 773 #else 774 return rte_intr_enable(rte_dev->intr_handle); 775 #endif 776 } 777 778 int 779 spdk_pci_device_disable_interrupt(struct spdk_pci_device *dev) 780 { 781 struct rte_pci_device *rte_dev = dev->dev_handle; 782 #if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0) 783 return rte_intr_disable(&rte_dev->intr_handle); 784 #else 785 return rte_intr_disable(rte_dev->intr_handle); 786 #endif 787 } 788 789 int 790 spdk_pci_device_get_interrupt_efd(struct spdk_pci_device *dev) 791 { 792 struct rte_pci_device *rte_dev = dev->dev_handle; 793 #if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0) 794 return rte_dev->intr_handle.fd; 795 #else 796 return rte_intr_fd_get(rte_dev->intr_handle); 797 #endif 798 } 799 800 uint32_t 801 spdk_pci_device_get_domain(struct spdk_pci_device *dev) 802 { 803 return dev->addr.domain; 804 } 805 806 uint8_t 807 spdk_pci_device_get_bus(struct spdk_pci_device *dev) 808 { 809 return dev->addr.bus; 810 } 811 812 uint8_t 813 spdk_pci_device_get_dev(struct spdk_pci_device *dev) 814 { 815 return dev->addr.dev; 816 } 817 818 uint8_t 819 spdk_pci_device_get_func(struct spdk_pci_device *dev) 820 { 821 return dev->addr.func; 822 } 823 824 uint16_t 825 spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev) 826 { 827 return dev->id.vendor_id; 828 } 829 830 uint16_t 831 spdk_pci_device_get_device_id(struct spdk_pci_device *dev) 832 { 833 return dev->id.device_id; 834 } 835 836 uint16_t 837 spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev) 838 { 839 return dev->id.subvendor_id; 840 } 841 842 uint16_t 843 spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev) 844 { 845 return dev->id.subdevice_id; 846 } 847 848 struct spdk_pci_id 849 spdk_pci_device_get_id(struct spdk_pci_device *dev) 850 { 851 return dev->id; 852 } 853 854 int 855 spdk_pci_device_get_socket_id(struct spdk_pci_device *dev) 856 { 857 return dev->socket_id; 858 } 859 860 int 861 spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 862 { 863 return dev->cfg_read(dev, value, len, offset); 864 } 865 866 int 867 spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 868 { 869 return dev->cfg_write(dev, value, len, offset); 870 } 871 872 int 873 spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset) 874 { 875 return spdk_pci_device_cfg_read(dev, value, 1, offset); 876 } 877 878 int 879 spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset) 880 { 881 return spdk_pci_device_cfg_write(dev, &value, 1, offset); 882 } 883 884 int 885 spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset) 886 { 887 return spdk_pci_device_cfg_read(dev, value, 2, offset); 888 } 889 890 int 891 spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset) 892 { 893 return spdk_pci_device_cfg_write(dev, &value, 2, offset); 894 } 895 896 int 897 spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset) 898 { 899 return spdk_pci_device_cfg_read(dev, value, 4, offset); 900 } 901 902 int 903 spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset) 904 { 905 return spdk_pci_device_cfg_write(dev, &value, 4, offset); 906 } 907 908 int 909 spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len) 910 { 911 int err; 912 uint32_t pos, header = 0; 913 uint32_t i, buf[2]; 914 915 if (len < 17) { 916 return -1; 917 } 918 919 err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE); 920 if (err || !header) { 921 return -1; 922 } 923 924 pos = PCI_CFG_SIZE; 925 while (1) { 926 if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) { 927 if (pos) { 928 /* skip the header */ 929 pos += 4; 930 for (i = 0; i < 2; i++) { 931 err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i); 932 if (err) { 933 return -1; 934 } 935 } 936 snprintf(sn, len, "%08x%08x", buf[1], buf[0]); 937 return 0; 938 } 939 } 940 pos = (header >> 20) & 0xffc; 941 /* 0 if no other items exist */ 942 if (pos < PCI_CFG_SIZE) { 943 return -1; 944 } 945 err = spdk_pci_device_cfg_read32(dev, &header, pos); 946 if (err) { 947 return -1; 948 } 949 } 950 return -1; 951 } 952 953 struct spdk_pci_addr 954 spdk_pci_device_get_addr(struct spdk_pci_device *dev) 955 { 956 return dev->addr; 957 } 958 959 bool 960 spdk_pci_device_is_removed(struct spdk_pci_device *dev) 961 { 962 return dev->internal.pending_removal; 963 } 964 965 int 966 spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2) 967 { 968 if (a1->domain > a2->domain) { 969 return 1; 970 } else if (a1->domain < a2->domain) { 971 return -1; 972 } else if (a1->bus > a2->bus) { 973 return 1; 974 } else if (a1->bus < a2->bus) { 975 return -1; 976 } else if (a1->dev > a2->dev) { 977 return 1; 978 } else if (a1->dev < a2->dev) { 979 return -1; 980 } else if (a1->func > a2->func) { 981 return 1; 982 } else if (a1->func < a2->func) { 983 return -1; 984 } 985 986 return 0; 987 } 988 989 #ifdef __linux__ 990 int 991 spdk_pci_device_claim(struct spdk_pci_device *dev) 992 { 993 int dev_fd; 994 char dev_name[64]; 995 int pid; 996 void *dev_map; 997 struct flock pcidev_lock = { 998 .l_type = F_WRLCK, 999 .l_whence = SEEK_SET, 1000 .l_start = 0, 1001 .l_len = 0, 1002 }; 1003 1004 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 1005 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 1006 1007 dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 1008 if (dev_fd == -1) { 1009 SPDK_ERRLOG("could not open %s\n", dev_name); 1010 return -errno; 1011 } 1012 1013 if (ftruncate(dev_fd, sizeof(int)) != 0) { 1014 SPDK_ERRLOG("could not truncate %s\n", dev_name); 1015 close(dev_fd); 1016 return -errno; 1017 } 1018 1019 dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, 1020 MAP_SHARED, dev_fd, 0); 1021 if (dev_map == MAP_FAILED) { 1022 SPDK_ERRLOG("could not mmap dev %s (%d)\n", dev_name, errno); 1023 close(dev_fd); 1024 return -errno; 1025 } 1026 1027 if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) { 1028 pid = *(int *)dev_map; 1029 SPDK_ERRLOG("Cannot create lock on device %s, probably" 1030 " process %d has claimed it\n", dev_name, pid); 1031 munmap(dev_map, sizeof(int)); 1032 close(dev_fd); 1033 /* F_SETLK returns unspecified errnos, normalize them */ 1034 return -EACCES; 1035 } 1036 1037 *(int *)dev_map = (int)getpid(); 1038 munmap(dev_map, sizeof(int)); 1039 dev->internal.claim_fd = dev_fd; 1040 /* Keep dev_fd open to maintain the lock. */ 1041 return 0; 1042 } 1043 1044 void 1045 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 1046 { 1047 char dev_name[64]; 1048 1049 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 1050 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 1051 1052 close(dev->internal.claim_fd); 1053 dev->internal.claim_fd = -1; 1054 unlink(dev_name); 1055 } 1056 #else /* !__linux__ */ 1057 int 1058 spdk_pci_device_claim(struct spdk_pci_device *dev) 1059 { 1060 /* TODO */ 1061 return 0; 1062 } 1063 1064 void 1065 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 1066 { 1067 /* TODO */ 1068 } 1069 #endif /* __linux__ */ 1070 1071 int 1072 spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf) 1073 { 1074 unsigned domain, bus, dev, func; 1075 1076 if (addr == NULL || bdf == NULL) { 1077 return -EINVAL; 1078 } 1079 1080 if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) || 1081 (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) { 1082 /* Matched a full address - all variables are initialized */ 1083 } else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) { 1084 func = 0; 1085 } else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) || 1086 (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) { 1087 domain = 0; 1088 } else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) || 1089 (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) { 1090 domain = 0; 1091 func = 0; 1092 } else { 1093 return -EINVAL; 1094 } 1095 1096 if (bus > 0xFF || dev > 0x1F || func > 7) { 1097 return -EINVAL; 1098 } 1099 1100 addr->domain = domain; 1101 addr->bus = bus; 1102 addr->dev = dev; 1103 addr->func = func; 1104 1105 return 0; 1106 } 1107 1108 int 1109 spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr) 1110 { 1111 int rc; 1112 1113 rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x", 1114 addr->domain, addr->bus, 1115 addr->dev, addr->func); 1116 1117 if (rc > 0 && (size_t)rc < sz) { 1118 return 0; 1119 } 1120 1121 return -1; 1122 } 1123 1124 void 1125 spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev) 1126 { 1127 assert(dev->map_bar != NULL); 1128 assert(dev->unmap_bar != NULL); 1129 assert(dev->cfg_read != NULL); 1130 assert(dev->cfg_write != NULL); 1131 dev->internal.driver = drv; 1132 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 1133 } 1134 1135 void 1136 spdk_pci_unhook_device(struct spdk_pci_device *dev) 1137 { 1138 assert(!dev->internal.attached); 1139 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 1140 } 1141 1142 const char * 1143 spdk_pci_device_get_type(const struct spdk_pci_device *dev) 1144 { 1145 return dev->type; 1146 } 1147 1148 int 1149 spdk_pci_device_allow(struct spdk_pci_addr *pci_addr) 1150 { 1151 struct rte_devargs *da; 1152 char devargs_str[128]; 1153 1154 da = calloc(1, sizeof(*da)); 1155 if (da == NULL) { 1156 SPDK_ERRLOG("could not allocate rte_devargs\n"); 1157 return -ENOMEM; 1158 } 1159 1160 snprintf(devargs_str, sizeof(devargs_str), "pci:%04x:%02x:%02x.%x", 1161 pci_addr->domain, pci_addr->bus, pci_addr->dev, pci_addr->func); 1162 if (rte_devargs_parse(da, devargs_str) != 0) { 1163 SPDK_ERRLOG("rte_devargs_parse() failed on '%s'\n", devargs_str); 1164 free(da); 1165 return -EINVAL; 1166 } 1167 da->policy = RTE_DEV_ALLOWED; 1168 /* Note: if a devargs already exists for this device address, it just gets 1169 * overridden. So we do not need to check if the devargs already exists. 1170 * DPDK will take care of memory management for the devargs structure after 1171 * it has been inserted, so there's nothing SPDK needs to track. 1172 */ 1173 if (rte_devargs_insert(&da) != 0) { 1174 SPDK_ERRLOG("rte_devargs_insert() failed on '%s'\n", devargs_str); 1175 free(da); 1176 return -EINVAL; 1177 } 1178 1179 return 0; 1180 } 1181