1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "env_internal.h" 7 #include "pci_dpdk.h" 8 9 #include <rte_alarm.h> 10 #include <rte_devargs.h> 11 #include "spdk/env.h" 12 #include "spdk/log.h" 13 #include "spdk/string.h" 14 15 #define SYSFS_PCI_DRIVERS "/sys/bus/pci/drivers" 16 17 /* Compatibility for versions < 20.11 */ 18 #if RTE_VERSION < RTE_VERSION_NUM(20, 11, 0, 0) 19 #define RTE_DEV_ALLOWED RTE_DEV_WHITELISTED 20 #define RTE_DEV_BLOCKED RTE_DEV_BLACKLISTED 21 #define RTE_BUS_SCAN_ALLOWLIST RTE_BUS_SCAN_WHITELIST 22 #endif 23 24 #define PCI_CFG_SIZE 256 25 #define PCI_EXT_CAP_ID_SN 0x03 26 27 /* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time 28 * might cause the internal IPC to misbehave. Just retry in such case. 29 */ 30 #define DPDK_HOTPLUG_RETRY_COUNT 4 31 32 /* DPDK alarm/interrupt thread */ 33 static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER; 34 static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices); 35 /* devices hotplugged on a dpdk thread */ 36 static TAILQ_HEAD(, spdk_pci_device) g_pci_hotplugged_devices = 37 TAILQ_HEAD_INITIALIZER(g_pci_hotplugged_devices); 38 static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers); 39 static TAILQ_HEAD(, spdk_pci_device_provider) g_pci_device_providers = 40 TAILQ_HEAD_INITIALIZER(g_pci_device_providers); 41 42 int pci_device_init(struct rte_pci_driver *driver, struct rte_pci_device *device); 43 int pci_device_fini(struct rte_pci_device *device); 44 45 struct env_devargs { 46 struct rte_bus *bus; 47 char name[128]; 48 uint64_t allowed_at; 49 TAILQ_ENTRY(env_devargs) link; 50 }; 51 static TAILQ_HEAD(, env_devargs) g_env_devargs = TAILQ_HEAD_INITIALIZER(g_env_devargs); 52 53 static struct env_devargs * 54 find_env_devargs(struct rte_bus *bus, const char *name) 55 { 56 struct env_devargs *da; 57 58 TAILQ_FOREACH(da, &g_env_devargs, link) { 59 if (bus == da->bus && !strcmp(name, da->name)) { 60 return da; 61 } 62 } 63 64 return NULL; 65 } 66 67 static int 68 map_bar_rte(struct spdk_pci_device *device, uint32_t bar, 69 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 70 { 71 return dpdk_pci_device_map_bar(device->dev_handle, bar, mapped_addr, phys_addr, size); 72 } 73 74 static int 75 unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr) 76 { 77 return 0; 78 } 79 80 static int 81 cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 82 { 83 return dpdk_pci_device_read_config(dev->dev_handle, value, len, offset); 84 } 85 86 static int 87 cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 88 { 89 return dpdk_pci_device_write_config(dev->dev_handle, value, len, offset); 90 } 91 92 static void 93 remove_rte_dev(struct rte_pci_device *rte_dev) 94 { 95 char bdf[32]; 96 int i = 0, rc; 97 98 snprintf(bdf, sizeof(bdf), "%s", dpdk_pci_device_get_name(rte_dev)); 99 do { 100 rc = rte_eal_hotplug_remove("pci", bdf); 101 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 102 } 103 104 static void 105 detach_rte_cb(void *_dev) 106 { 107 remove_rte_dev(_dev); 108 } 109 110 /* if it's a physical device we need to deal with DPDK on 111 * a different process and we can't just unset one flag 112 * here. We also want to stop using any device resources 113 * so that the device isn't "in use" by the userspace driver 114 * once we detach it. This would allow attaching the device 115 * to a different process, or to a kernel driver like nvme. 116 */ 117 static void 118 detach_rte(struct spdk_pci_device *dev) 119 { 120 struct rte_pci_device *rte_dev = dev->dev_handle; 121 int i; 122 bool removed; 123 124 if (!spdk_process_is_primary()) { 125 remove_rte_dev(rte_dev); 126 return; 127 } 128 129 pthread_mutex_lock(&g_pci_mutex); 130 dev->internal.attached = false; 131 /* prevent the hotremove notification from removing this device */ 132 dev->internal.pending_removal = true; 133 pthread_mutex_unlock(&g_pci_mutex); 134 135 rte_eal_alarm_set(1, detach_rte_cb, rte_dev); 136 137 /* wait up to 2s for the cb to execute */ 138 for (i = 2000; i > 0; i--) { 139 140 spdk_delay_us(1000); 141 pthread_mutex_lock(&g_pci_mutex); 142 removed = dev->internal.removed; 143 pthread_mutex_unlock(&g_pci_mutex); 144 145 if (removed) { 146 break; 147 } 148 } 149 150 /* besides checking the removed flag, we also need to wait 151 * for the dpdk detach function to unwind, as it's doing some 152 * operations even after calling our detach callback. Simply 153 * cancel the alarm - if it started executing already, this 154 * call will block and wait for it to finish. 155 */ 156 rte_eal_alarm_cancel(detach_rte_cb, rte_dev); 157 158 /* the device could have been finally removed, so just check 159 * it again. 160 */ 161 pthread_mutex_lock(&g_pci_mutex); 162 removed = dev->internal.removed; 163 pthread_mutex_unlock(&g_pci_mutex); 164 if (!removed) { 165 SPDK_ERRLOG("Timeout waiting for DPDK to remove PCI device %s.\n", 166 dpdk_pci_device_get_name(rte_dev)); 167 /* If we reach this state, then the device couldn't be removed and most likely 168 a subsequent hot add of a device in the same BDF will fail */ 169 } 170 } 171 172 void 173 spdk_pci_driver_register(const char *name, struct spdk_pci_id *id_table, uint32_t flags) 174 { 175 struct spdk_pci_driver *driver; 176 177 driver = calloc(1, sizeof(*driver)); 178 if (!driver) { 179 /* we can't do any better than bailing atm */ 180 return; 181 } 182 183 driver->name = name; 184 driver->id_table = id_table; 185 driver->drv_flags = flags; 186 driver->driver = (struct rte_pci_driver *)driver->driver_buf; 187 TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq); 188 } 189 190 struct spdk_pci_driver * 191 spdk_pci_nvme_get_driver(void) 192 { 193 return spdk_pci_get_driver("nvme"); 194 } 195 196 struct spdk_pci_driver * 197 spdk_pci_get_driver(const char *name) 198 { 199 struct spdk_pci_driver *driver; 200 201 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 202 if (strcmp(driver->name, name) == 0) { 203 return driver; 204 } 205 } 206 207 return NULL; 208 } 209 210 static void 211 pci_device_rte_dev_event(const char *device_name, 212 enum rte_dev_event_type event, 213 void *cb_arg) 214 { 215 struct spdk_pci_device *dev; 216 bool can_detach = false; 217 218 switch (event) { 219 default: 220 case RTE_DEV_EVENT_ADD: 221 /* Nothing to do here yet. */ 222 break; 223 case RTE_DEV_EVENT_REMOVE: 224 pthread_mutex_lock(&g_pci_mutex); 225 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 226 struct rte_pci_device *rte_dev = dev->dev_handle; 227 228 if (strcmp(dpdk_pci_device_get_name(rte_dev), device_name) == 0 && 229 !dev->internal.pending_removal) { 230 can_detach = !dev->internal.attached; 231 /* prevent any further attaches */ 232 dev->internal.pending_removal = true; 233 break; 234 } 235 } 236 pthread_mutex_unlock(&g_pci_mutex); 237 238 if (dev != NULL && can_detach) { 239 /* if device is not attached we can remove it right away. 240 * Otherwise it will be removed at detach. 241 * 242 * Because the user's callback is invoked in eal interrupt 243 * callback, the interrupt callback need to be finished before 244 * it can be unregistered when detaching device. So finish 245 * callback soon and use a deferred removal to detach device 246 * is need. It is a workaround, once the device detaching be 247 * moved into the eal in the future, the deferred removal could 248 * be deleted. 249 */ 250 rte_eal_alarm_set(1, detach_rte_cb, dev->dev_handle); 251 } 252 break; 253 } 254 } 255 256 static void 257 cleanup_pci_devices(void) 258 { 259 struct spdk_pci_device *dev, *tmp; 260 261 pthread_mutex_lock(&g_pci_mutex); 262 /* cleanup removed devices */ 263 TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) { 264 if (!dev->internal.removed) { 265 continue; 266 } 267 268 vtophys_pci_device_removed(dev->dev_handle); 269 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 270 free(dev); 271 } 272 273 /* add newly-attached devices */ 274 TAILQ_FOREACH_SAFE(dev, &g_pci_hotplugged_devices, internal.tailq, tmp) { 275 TAILQ_REMOVE(&g_pci_hotplugged_devices, dev, internal.tailq); 276 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 277 vtophys_pci_device_added(dev->dev_handle); 278 } 279 pthread_mutex_unlock(&g_pci_mutex); 280 } 281 282 static int scan_pci_bus(bool delay_init); 283 284 static inline void 285 _pci_env_init(void) 286 { 287 /* We assume devices were present on the bus for more than 2 seconds 288 * before initializing SPDK and there's no need to wait more. We scan 289 * the bus, but we don't block any devices. 290 */ 291 scan_pci_bus(false); 292 293 /* Register a single hotremove callback for all devices. */ 294 if (spdk_process_is_primary()) { 295 rte_dev_event_callback_register(NULL, pci_device_rte_dev_event, NULL); 296 } 297 } 298 299 void 300 pci_env_init(void) 301 { 302 struct spdk_pci_driver *driver; 303 304 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 305 dpdk_pci_driver_register(driver, pci_device_init, pci_device_fini); 306 } 307 308 _pci_env_init(); 309 } 310 311 void 312 pci_env_reinit(void) 313 { 314 /* There is no need to register pci drivers again, since they were 315 * already pre-registered in pci_env_init. 316 */ 317 318 _pci_env_init(); 319 } 320 321 void 322 pci_env_fini(void) 323 { 324 struct spdk_pci_device *dev; 325 char bdf[32]; 326 327 cleanup_pci_devices(); 328 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 329 if (dev->internal.attached) { 330 spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->addr); 331 SPDK_ERRLOG("Device %s is still attached at shutdown!\n", bdf); 332 } 333 } 334 335 if (spdk_process_is_primary()) { 336 rte_dev_event_callback_unregister(NULL, pci_device_rte_dev_event, NULL); 337 } 338 } 339 340 int 341 pci_device_init(struct rte_pci_driver *_drv, 342 struct rte_pci_device *_dev) 343 { 344 struct spdk_pci_driver *driver = (struct spdk_pci_driver *)_drv; 345 struct spdk_pci_device *dev; 346 int rc; 347 348 dev = calloc(1, sizeof(*dev)); 349 if (dev == NULL) { 350 return -1; 351 } 352 353 dev->dev_handle = _dev; 354 355 dpdk_pci_device_copy_identifiers(_dev, dev); 356 dev->type = "pci"; 357 358 dev->map_bar = map_bar_rte; 359 dev->unmap_bar = unmap_bar_rte; 360 dev->cfg_read = cfg_read_rte; 361 dev->cfg_write = cfg_write_rte; 362 363 dev->internal.driver = driver; 364 dev->internal.claim_fd = -1; 365 366 if (driver->cb_fn != NULL) { 367 rc = driver->cb_fn(driver->cb_arg, dev); 368 if (rc != 0) { 369 free(dev); 370 return rc; 371 } 372 dev->internal.attached = true; 373 } 374 375 pthread_mutex_lock(&g_pci_mutex); 376 TAILQ_INSERT_TAIL(&g_pci_hotplugged_devices, dev, internal.tailq); 377 pthread_mutex_unlock(&g_pci_mutex); 378 return 0; 379 } 380 381 static void 382 set_allowed_at(struct rte_devargs *rte_da, uint64_t tsc) 383 { 384 struct env_devargs *env_da; 385 386 env_da = find_env_devargs(rte_da->bus, rte_da->name); 387 if (env_da == NULL) { 388 env_da = calloc(1, sizeof(*env_da)); 389 if (env_da == NULL) { 390 SPDK_ERRLOG("could not set_allowed_at for device %s\n", rte_da->name); 391 return; 392 } 393 env_da->bus = rte_da->bus; 394 spdk_strcpy_pad(env_da->name, rte_da->name, sizeof(env_da->name), 0); 395 TAILQ_INSERT_TAIL(&g_env_devargs, env_da, link); 396 } 397 398 env_da->allowed_at = tsc; 399 } 400 401 static uint64_t 402 get_allowed_at(struct rte_devargs *rte_da) 403 { 404 struct env_devargs *env_da; 405 406 env_da = find_env_devargs(rte_da->bus, rte_da->name); 407 if (env_da) { 408 return env_da->allowed_at; 409 } else { 410 return 0; 411 } 412 } 413 414 int 415 pci_device_fini(struct rte_pci_device *_dev) 416 { 417 struct spdk_pci_device *dev; 418 419 pthread_mutex_lock(&g_pci_mutex); 420 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 421 if (dev->dev_handle == _dev) { 422 break; 423 } 424 } 425 426 if (dev == NULL || dev->internal.attached) { 427 /* The device might be still referenced somewhere in SPDK. */ 428 pthread_mutex_unlock(&g_pci_mutex); 429 return -EBUSY; 430 } 431 432 /* remove our allowed_at option */ 433 if (dpdk_pci_device_get_devargs(_dev)) { 434 set_allowed_at(dpdk_pci_device_get_devargs(_dev), 0); 435 } 436 437 /* It is possible that removed flag was already set when there is a race 438 * between the remove notification for this process, and another process 439 * that is also detaching from this same device (for example, when using 440 * nvme driver in multi-process mode. So do not assert here. See 441 * #2456 for additional details. 442 */ 443 dev->internal.removed = true; 444 pthread_mutex_unlock(&g_pci_mutex); 445 return 0; 446 447 } 448 449 void 450 spdk_pci_device_detach(struct spdk_pci_device *dev) 451 { 452 struct spdk_pci_device_provider *provider; 453 454 assert(dev->internal.attached); 455 456 if (dev->internal.claim_fd >= 0) { 457 spdk_pci_device_unclaim(dev); 458 } 459 460 TAILQ_FOREACH(provider, &g_pci_device_providers, tailq) { 461 if (strcmp(dev->type, provider->name) == 0) { 462 break; 463 } 464 } 465 466 assert(provider != NULL); 467 dev->internal.attached = false; 468 provider->detach_cb(dev); 469 470 cleanup_pci_devices(); 471 } 472 473 static int 474 scan_pci_bus(bool delay_init) 475 { 476 struct rte_dev_iterator it; 477 struct rte_device *rte_dev; 478 uint64_t now; 479 480 dpdk_bus_scan(); 481 now = spdk_get_ticks(); 482 483 if (!TAILQ_FIRST(&g_pci_drivers)) { 484 return 0; 485 } 486 487 RTE_DEV_FOREACH(rte_dev, "bus=pci", &it) { 488 struct rte_devargs *da; 489 490 da = dpdk_device_get_devargs(rte_dev); 491 if (!da) { 492 char devargs_str[128]; 493 494 /* the device was never blocked or allowed */ 495 da = calloc(1, sizeof(*da)); 496 if (!da) { 497 return -1; 498 } 499 500 snprintf(devargs_str, sizeof(devargs_str), "pci:%s", dpdk_device_get_name(rte_dev)); 501 if (rte_devargs_parse(da, devargs_str) != 0) { 502 free(da); 503 return -1; 504 } 505 506 rte_devargs_insert(&da); 507 dpdk_device_set_devargs(rte_dev, da); 508 } 509 510 if (get_allowed_at(da)) { 511 uint64_t allowed_at = get_allowed_at(da); 512 513 /* this device was seen by spdk before... */ 514 if (da->policy == RTE_DEV_BLOCKED && allowed_at <= now) { 515 da->policy = RTE_DEV_ALLOWED; 516 } 517 } else if ((dpdk_device_scan_allowed(rte_dev) && da->policy == RTE_DEV_ALLOWED) || 518 da->policy != RTE_DEV_BLOCKED) { 519 /* override the policy only if not permanently blocked */ 520 521 if (delay_init) { 522 da->policy = RTE_DEV_BLOCKED; 523 set_allowed_at(da, now + 2 * spdk_get_ticks_hz()); 524 } else { 525 da->policy = RTE_DEV_ALLOWED; 526 set_allowed_at(da, now); 527 } 528 } 529 } 530 531 return 0; 532 } 533 534 static int 535 pci_attach_rte(const struct spdk_pci_addr *addr) 536 { 537 char bdf[32]; 538 int rc, i = 0; 539 540 spdk_pci_addr_fmt(bdf, sizeof(bdf), addr); 541 542 do { 543 rc = rte_eal_hotplug_add("pci", bdf, ""); 544 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 545 546 if (i > 1 && rc == -EEXIST) { 547 /* Even though the previous request timed out, the device 548 * was attached successfully. 549 */ 550 rc = 0; 551 } 552 553 return rc; 554 } 555 556 static struct spdk_pci_device_provider g_pci_rte_provider = { 557 .name = "pci", 558 .attach_cb = pci_attach_rte, 559 .detach_cb = detach_rte, 560 }; 561 562 SPDK_PCI_REGISTER_DEVICE_PROVIDER(pci, &g_pci_rte_provider); 563 564 int 565 spdk_pci_device_attach(struct spdk_pci_driver *driver, 566 spdk_pci_enum_cb enum_cb, 567 void *enum_ctx, struct spdk_pci_addr *pci_address) 568 { 569 struct spdk_pci_device *dev; 570 struct spdk_pci_device_provider *provider; 571 struct rte_pci_device *rte_dev; 572 struct rte_devargs *da; 573 int rc; 574 575 cleanup_pci_devices(); 576 577 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 578 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 579 break; 580 } 581 } 582 583 if (dev != NULL && dev->internal.driver == driver) { 584 pthread_mutex_lock(&g_pci_mutex); 585 if (dev->internal.attached || dev->internal.pending_removal) { 586 pthread_mutex_unlock(&g_pci_mutex); 587 return -1; 588 } 589 590 rc = enum_cb(enum_ctx, dev); 591 if (rc == 0) { 592 dev->internal.attached = true; 593 } 594 pthread_mutex_unlock(&g_pci_mutex); 595 return rc; 596 } 597 598 driver->cb_fn = enum_cb; 599 driver->cb_arg = enum_ctx; 600 601 rc = -ENODEV; 602 TAILQ_FOREACH(provider, &g_pci_device_providers, tailq) { 603 rc = provider->attach_cb(pci_address); 604 if (rc == 0) { 605 break; 606 } 607 } 608 609 driver->cb_arg = NULL; 610 driver->cb_fn = NULL; 611 612 cleanup_pci_devices(); 613 614 if (rc != 0) { 615 return -1; 616 } 617 618 /* explicit attach ignores the allowlist, so if we blocked this 619 * device before let's enable it now - just for clarity. 620 */ 621 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 622 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 623 break; 624 } 625 } 626 assert(dev != NULL); 627 628 rte_dev = dev->dev_handle; 629 if (rte_dev != NULL) { 630 da = dpdk_pci_device_get_devargs(rte_dev); 631 if (da && get_allowed_at(da)) { 632 set_allowed_at(da, spdk_get_ticks()); 633 da->policy = RTE_DEV_ALLOWED; 634 } 635 } 636 637 return 0; 638 } 639 640 /* Note: You can call spdk_pci_enumerate from more than one thread 641 * simultaneously safely, but you cannot call spdk_pci_enumerate 642 * and rte_eal_pci_probe simultaneously. 643 */ 644 int 645 spdk_pci_enumerate(struct spdk_pci_driver *driver, 646 spdk_pci_enum_cb enum_cb, 647 void *enum_ctx) 648 { 649 struct spdk_pci_device *dev; 650 int rc; 651 652 cleanup_pci_devices(); 653 654 pthread_mutex_lock(&g_pci_mutex); 655 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 656 if (dev->internal.attached || 657 dev->internal.driver != driver || 658 dev->internal.pending_removal) { 659 continue; 660 } 661 662 rc = enum_cb(enum_ctx, dev); 663 if (rc == 0) { 664 dev->internal.attached = true; 665 } else if (rc < 0) { 666 pthread_mutex_unlock(&g_pci_mutex); 667 return -1; 668 } 669 } 670 pthread_mutex_unlock(&g_pci_mutex); 671 672 if (scan_pci_bus(true) != 0) { 673 return -1; 674 } 675 676 driver->cb_fn = enum_cb; 677 driver->cb_arg = enum_ctx; 678 679 if (dpdk_bus_probe() != 0) { 680 driver->cb_arg = NULL; 681 driver->cb_fn = NULL; 682 return -1; 683 } 684 685 driver->cb_arg = NULL; 686 driver->cb_fn = NULL; 687 688 cleanup_pci_devices(); 689 return 0; 690 } 691 692 void 693 spdk_pci_for_each_device(void *ctx, void (*fn)(void *ctx, struct spdk_pci_device *dev)) 694 { 695 struct spdk_pci_device *dev, *tmp; 696 697 pthread_mutex_lock(&g_pci_mutex); 698 TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) { 699 fn(ctx, dev); 700 } 701 pthread_mutex_unlock(&g_pci_mutex); 702 } 703 704 int 705 spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar, 706 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 707 { 708 return dev->map_bar(dev, bar, mapped_addr, phys_addr, size); 709 } 710 711 int 712 spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr) 713 { 714 return dev->unmap_bar(dev, bar, addr); 715 } 716 717 int 718 spdk_pci_device_enable_interrupt(struct spdk_pci_device *dev) 719 { 720 return dpdk_pci_device_enable_interrupt(dev->dev_handle); 721 } 722 723 int 724 spdk_pci_device_disable_interrupt(struct spdk_pci_device *dev) 725 { 726 return dpdk_pci_device_disable_interrupt(dev->dev_handle); 727 } 728 729 int 730 spdk_pci_device_get_interrupt_efd(struct spdk_pci_device *dev) 731 { 732 return dpdk_pci_device_get_interrupt_efd(dev->dev_handle); 733 } 734 735 uint32_t 736 spdk_pci_device_get_domain(struct spdk_pci_device *dev) 737 { 738 return dev->addr.domain; 739 } 740 741 uint8_t 742 spdk_pci_device_get_bus(struct spdk_pci_device *dev) 743 { 744 return dev->addr.bus; 745 } 746 747 uint8_t 748 spdk_pci_device_get_dev(struct spdk_pci_device *dev) 749 { 750 return dev->addr.dev; 751 } 752 753 uint8_t 754 spdk_pci_device_get_func(struct spdk_pci_device *dev) 755 { 756 return dev->addr.func; 757 } 758 759 uint16_t 760 spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev) 761 { 762 return dev->id.vendor_id; 763 } 764 765 uint16_t 766 spdk_pci_device_get_device_id(struct spdk_pci_device *dev) 767 { 768 return dev->id.device_id; 769 } 770 771 uint16_t 772 spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev) 773 { 774 return dev->id.subvendor_id; 775 } 776 777 uint16_t 778 spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev) 779 { 780 return dev->id.subdevice_id; 781 } 782 783 struct spdk_pci_id 784 spdk_pci_device_get_id(struct spdk_pci_device *dev) 785 { 786 return dev->id; 787 } 788 789 int 790 spdk_pci_device_get_socket_id(struct spdk_pci_device *dev) 791 { 792 return dev->socket_id; 793 } 794 795 int 796 spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 797 { 798 return dev->cfg_read(dev, value, len, offset); 799 } 800 801 int 802 spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 803 { 804 return dev->cfg_write(dev, value, len, offset); 805 } 806 807 int 808 spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset) 809 { 810 return spdk_pci_device_cfg_read(dev, value, 1, offset); 811 } 812 813 int 814 spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset) 815 { 816 return spdk_pci_device_cfg_write(dev, &value, 1, offset); 817 } 818 819 int 820 spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset) 821 { 822 return spdk_pci_device_cfg_read(dev, value, 2, offset); 823 } 824 825 int 826 spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset) 827 { 828 return spdk_pci_device_cfg_write(dev, &value, 2, offset); 829 } 830 831 int 832 spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset) 833 { 834 return spdk_pci_device_cfg_read(dev, value, 4, offset); 835 } 836 837 int 838 spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset) 839 { 840 return spdk_pci_device_cfg_write(dev, &value, 4, offset); 841 } 842 843 int 844 spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len) 845 { 846 int err; 847 uint32_t pos, header = 0; 848 uint32_t i, buf[2]; 849 850 if (len < 17) { 851 return -1; 852 } 853 854 err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE); 855 if (err || !header) { 856 return -1; 857 } 858 859 pos = PCI_CFG_SIZE; 860 while (1) { 861 if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) { 862 if (pos) { 863 /* skip the header */ 864 pos += 4; 865 for (i = 0; i < 2; i++) { 866 err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i); 867 if (err) { 868 return -1; 869 } 870 } 871 snprintf(sn, len, "%08x%08x", buf[1], buf[0]); 872 return 0; 873 } 874 } 875 pos = (header >> 20) & 0xffc; 876 /* 0 if no other items exist */ 877 if (pos < PCI_CFG_SIZE) { 878 return -1; 879 } 880 err = spdk_pci_device_cfg_read32(dev, &header, pos); 881 if (err) { 882 return -1; 883 } 884 } 885 return -1; 886 } 887 888 struct spdk_pci_addr 889 spdk_pci_device_get_addr(struct spdk_pci_device *dev) 890 { 891 return dev->addr; 892 } 893 894 bool 895 spdk_pci_device_is_removed(struct spdk_pci_device *dev) 896 { 897 return dev->internal.pending_removal; 898 } 899 900 int 901 spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2) 902 { 903 if (a1->domain > a2->domain) { 904 return 1; 905 } else if (a1->domain < a2->domain) { 906 return -1; 907 } else if (a1->bus > a2->bus) { 908 return 1; 909 } else if (a1->bus < a2->bus) { 910 return -1; 911 } else if (a1->dev > a2->dev) { 912 return 1; 913 } else if (a1->dev < a2->dev) { 914 return -1; 915 } else if (a1->func > a2->func) { 916 return 1; 917 } else if (a1->func < a2->func) { 918 return -1; 919 } 920 921 return 0; 922 } 923 924 #ifdef __linux__ 925 int 926 spdk_pci_device_claim(struct spdk_pci_device *dev) 927 { 928 int dev_fd; 929 char dev_name[64]; 930 int pid; 931 void *dev_map; 932 struct flock pcidev_lock = { 933 .l_type = F_WRLCK, 934 .l_whence = SEEK_SET, 935 .l_start = 0, 936 .l_len = 0, 937 }; 938 939 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 940 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 941 942 dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 943 if (dev_fd == -1) { 944 SPDK_ERRLOG("could not open %s\n", dev_name); 945 return -errno; 946 } 947 948 if (ftruncate(dev_fd, sizeof(int)) != 0) { 949 SPDK_ERRLOG("could not truncate %s\n", dev_name); 950 close(dev_fd); 951 return -errno; 952 } 953 954 dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, 955 MAP_SHARED, dev_fd, 0); 956 if (dev_map == MAP_FAILED) { 957 SPDK_ERRLOG("could not mmap dev %s (%d)\n", dev_name, errno); 958 close(dev_fd); 959 return -errno; 960 } 961 962 if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) { 963 pid = *(int *)dev_map; 964 SPDK_ERRLOG("Cannot create lock on device %s, probably" 965 " process %d has claimed it\n", dev_name, pid); 966 munmap(dev_map, sizeof(int)); 967 close(dev_fd); 968 /* F_SETLK returns unspecified errnos, normalize them */ 969 return -EACCES; 970 } 971 972 *(int *)dev_map = (int)getpid(); 973 munmap(dev_map, sizeof(int)); 974 dev->internal.claim_fd = dev_fd; 975 /* Keep dev_fd open to maintain the lock. */ 976 return 0; 977 } 978 979 void 980 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 981 { 982 char dev_name[64]; 983 984 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 985 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 986 987 close(dev->internal.claim_fd); 988 dev->internal.claim_fd = -1; 989 unlink(dev_name); 990 } 991 #else /* !__linux__ */ 992 int 993 spdk_pci_device_claim(struct spdk_pci_device *dev) 994 { 995 /* TODO */ 996 return 0; 997 } 998 999 void 1000 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 1001 { 1002 /* TODO */ 1003 } 1004 #endif /* __linux__ */ 1005 1006 int 1007 spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf) 1008 { 1009 unsigned domain, bus, dev, func; 1010 1011 if (addr == NULL || bdf == NULL) { 1012 return -EINVAL; 1013 } 1014 1015 if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) || 1016 (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) { 1017 /* Matched a full address - all variables are initialized */ 1018 } else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) { 1019 func = 0; 1020 } else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) || 1021 (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) { 1022 domain = 0; 1023 } else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) || 1024 (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) { 1025 domain = 0; 1026 func = 0; 1027 } else { 1028 return -EINVAL; 1029 } 1030 1031 if (bus > 0xFF || dev > 0x1F || func > 7) { 1032 return -EINVAL; 1033 } 1034 1035 addr->domain = domain; 1036 addr->bus = bus; 1037 addr->dev = dev; 1038 addr->func = func; 1039 1040 return 0; 1041 } 1042 1043 int 1044 spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr) 1045 { 1046 int rc; 1047 1048 rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x", 1049 addr->domain, addr->bus, 1050 addr->dev, addr->func); 1051 1052 if (rc > 0 && (size_t)rc < sz) { 1053 return 0; 1054 } 1055 1056 return -1; 1057 } 1058 1059 int 1060 spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev) 1061 { 1062 int rc; 1063 1064 assert(dev->map_bar != NULL); 1065 assert(dev->unmap_bar != NULL); 1066 assert(dev->cfg_read != NULL); 1067 assert(dev->cfg_write != NULL); 1068 dev->internal.driver = drv; 1069 1070 if (drv->cb_fn != NULL) { 1071 rc = drv->cb_fn(drv->cb_arg, dev); 1072 if (rc != 0) { 1073 return -ECANCELED; 1074 } 1075 1076 dev->internal.attached = true; 1077 } 1078 1079 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 1080 1081 return 0; 1082 } 1083 1084 void 1085 spdk_pci_unhook_device(struct spdk_pci_device *dev) 1086 { 1087 assert(!dev->internal.attached); 1088 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 1089 } 1090 1091 void 1092 spdk_pci_register_device_provider(struct spdk_pci_device_provider *provider) 1093 { 1094 TAILQ_INSERT_TAIL(&g_pci_device_providers, provider, tailq); 1095 } 1096 1097 const char * 1098 spdk_pci_device_get_type(const struct spdk_pci_device *dev) 1099 { 1100 return dev->type; 1101 } 1102 1103 int 1104 spdk_pci_device_allow(struct spdk_pci_addr *pci_addr) 1105 { 1106 struct rte_devargs *da; 1107 char devargs_str[128]; 1108 1109 da = calloc(1, sizeof(*da)); 1110 if (da == NULL) { 1111 SPDK_ERRLOG("could not allocate rte_devargs\n"); 1112 return -ENOMEM; 1113 } 1114 1115 snprintf(devargs_str, sizeof(devargs_str), "pci:%04x:%02x:%02x.%x", 1116 pci_addr->domain, pci_addr->bus, pci_addr->dev, pci_addr->func); 1117 if (rte_devargs_parse(da, devargs_str) != 0) { 1118 SPDK_ERRLOG("rte_devargs_parse() failed on '%s'\n", devargs_str); 1119 free(da); 1120 return -EINVAL; 1121 } 1122 da->policy = RTE_DEV_ALLOWED; 1123 /* Note: if a devargs already exists for this device address, it just gets 1124 * overridden. So we do not need to check if the devargs already exists. 1125 * DPDK will take care of memory management for the devargs structure after 1126 * it has been inserted, so there's nothing SPDK needs to track. 1127 */ 1128 if (rte_devargs_insert(&da) != 0) { 1129 SPDK_ERRLOG("rte_devargs_insert() failed on '%s'\n", devargs_str); 1130 free(da); 1131 return -EINVAL; 1132 } 1133 1134 return 0; 1135 } 1136