1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2015 Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "env_internal.h" 7 #include "pci_dpdk.h" 8 9 #include <rte_alarm.h> 10 #include <rte_devargs.h> 11 #include <rte_pci.h> 12 #include "spdk/env.h" 13 #include "spdk/log.h" 14 #include "spdk/string.h" 15 #include "spdk/memory.h" 16 17 #define SYSFS_PCI_DRIVERS "/sys/bus/pci/drivers" 18 19 /* Compatibility for versions < 20.11 */ 20 #if RTE_VERSION < RTE_VERSION_NUM(20, 11, 0, 0) 21 #define RTE_DEV_ALLOWED RTE_DEV_WHITELISTED 22 #define RTE_DEV_BLOCKED RTE_DEV_BLACKLISTED 23 #define RTE_BUS_SCAN_ALLOWLIST RTE_BUS_SCAN_WHITELIST 24 #endif 25 26 #define PCI_CFG_SIZE 256 27 #define PCI_EXT_CAP_ID_SN 0x03 28 29 /* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time 30 * might cause the internal IPC to misbehave. Just retry in such case. 31 */ 32 #define DPDK_HOTPLUG_RETRY_COUNT 4 33 34 /* DPDK alarm/interrupt thread */ 35 static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER; 36 static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices); 37 /* devices hotplugged on a dpdk thread */ 38 static TAILQ_HEAD(, spdk_pci_device) g_pci_hotplugged_devices = 39 TAILQ_HEAD_INITIALIZER(g_pci_hotplugged_devices); 40 static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers); 41 static TAILQ_HEAD(, spdk_pci_device_provider) g_pci_device_providers = 42 TAILQ_HEAD_INITIALIZER(g_pci_device_providers); 43 44 int pci_device_init(struct rte_pci_driver *driver, struct rte_pci_device *device); 45 int pci_device_fini(struct rte_pci_device *device); 46 47 struct env_devargs { 48 struct rte_bus *bus; 49 char name[128]; 50 uint64_t allowed_at; 51 TAILQ_ENTRY(env_devargs) link; 52 }; 53 static TAILQ_HEAD(, env_devargs) g_env_devargs = TAILQ_HEAD_INITIALIZER(g_env_devargs); 54 55 static struct env_devargs * 56 find_env_devargs(struct rte_bus *bus, const char *name) 57 { 58 struct env_devargs *da; 59 60 TAILQ_FOREACH(da, &g_env_devargs, link) { 61 if (bus == da->bus && !strcmp(name, da->name)) { 62 return da; 63 } 64 } 65 66 return NULL; 67 } 68 69 static int 70 map_bar_rte(struct spdk_pci_device *device, uint32_t bar, 71 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 72 { 73 struct rte_mem_resource *res; 74 75 res = dpdk_pci_device_get_mem_resource(device->dev_handle, bar); 76 *mapped_addr = res->addr; 77 *phys_addr = (uint64_t)res->phys_addr; 78 *size = (uint64_t)res->len; 79 80 return 0; 81 } 82 83 static int 84 unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr) 85 { 86 return 0; 87 } 88 89 static int 90 cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 91 { 92 return dpdk_pci_device_read_config(dev->dev_handle, value, len, offset); 93 } 94 95 static int 96 cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 97 { 98 return dpdk_pci_device_write_config(dev->dev_handle, value, len, offset); 99 } 100 101 static void 102 remove_rte_dev(struct rte_pci_device *rte_dev) 103 { 104 char bdf[32]; 105 int i = 0, rc; 106 107 snprintf(bdf, sizeof(bdf), "%s", dpdk_pci_device_get_name(rte_dev)); 108 do { 109 rc = rte_eal_hotplug_remove("pci", bdf); 110 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 111 } 112 113 static void 114 detach_rte_cb(void *_dev) 115 { 116 remove_rte_dev(_dev); 117 } 118 119 /* if it's a physical device we need to deal with DPDK on 120 * a different process and we can't just unset one flag 121 * here. We also want to stop using any device resources 122 * so that the device isn't "in use" by the userspace driver 123 * once we detach it. This would allow attaching the device 124 * to a different process, or to a kernel driver like nvme. 125 */ 126 static void 127 detach_rte(struct spdk_pci_device *dev) 128 { 129 struct rte_pci_device *rte_dev = dev->dev_handle; 130 int i; 131 bool removed; 132 133 if (!spdk_process_is_primary()) { 134 return; 135 } 136 137 pthread_mutex_lock(&g_pci_mutex); 138 dev->internal.attached = false; 139 /* prevent the hotremove notification from removing this device */ 140 dev->internal.pending_removal = true; 141 pthread_mutex_unlock(&g_pci_mutex); 142 143 rte_eal_alarm_set(1, detach_rte_cb, rte_dev); 144 145 /* wait up to 2s for the cb to execute */ 146 for (i = 2000; i > 0; i--) { 147 148 spdk_delay_us(1000); 149 pthread_mutex_lock(&g_pci_mutex); 150 removed = dev->internal.removed; 151 pthread_mutex_unlock(&g_pci_mutex); 152 153 if (removed) { 154 break; 155 } 156 } 157 158 /* besides checking the removed flag, we also need to wait 159 * for the dpdk detach function to unwind, as it's doing some 160 * operations even after calling our detach callback. Simply 161 * cancel the alarm - if it started executing already, this 162 * call will block and wait for it to finish. 163 */ 164 rte_eal_alarm_cancel(detach_rte_cb, rte_dev); 165 166 /* the device could have been finally removed, so just check 167 * it again. 168 */ 169 pthread_mutex_lock(&g_pci_mutex); 170 removed = dev->internal.removed; 171 pthread_mutex_unlock(&g_pci_mutex); 172 if (!removed) { 173 SPDK_ERRLOG("Timeout waiting for DPDK to remove PCI device %s.\n", 174 dpdk_pci_device_get_name(rte_dev)); 175 /* If we reach this state, then the device couldn't be removed and most likely 176 a subsequent hot add of a device in the same BDF will fail */ 177 } 178 } 179 180 void 181 spdk_pci_driver_register(const char *name, struct spdk_pci_id *id_table, uint32_t flags) 182 { 183 struct spdk_pci_driver *driver; 184 185 driver = calloc(1, sizeof(*driver)); 186 if (!driver) { 187 /* we can't do any better than bailing atm */ 188 return; 189 } 190 191 driver->name = name; 192 driver->id_table = id_table; 193 driver->drv_flags = flags; 194 driver->driver = (struct rte_pci_driver *)driver->driver_buf; 195 TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq); 196 } 197 198 struct spdk_pci_driver * 199 spdk_pci_nvme_get_driver(void) 200 { 201 return spdk_pci_get_driver("nvme"); 202 } 203 204 struct spdk_pci_driver * 205 spdk_pci_get_driver(const char *name) 206 { 207 struct spdk_pci_driver *driver; 208 209 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 210 if (strcmp(driver->name, name) == 0) { 211 return driver; 212 } 213 } 214 215 return NULL; 216 } 217 218 static void 219 pci_device_rte_dev_event(const char *device_name, 220 enum rte_dev_event_type event, 221 void *cb_arg) 222 { 223 struct spdk_pci_device *dev; 224 bool can_detach = false; 225 226 switch (event) { 227 default: 228 case RTE_DEV_EVENT_ADD: 229 /* Nothing to do here yet. */ 230 break; 231 case RTE_DEV_EVENT_REMOVE: 232 pthread_mutex_lock(&g_pci_mutex); 233 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 234 struct rte_pci_device *rte_dev = dev->dev_handle; 235 236 /* Note: these ERRLOGs are useful for triaging issue #2983. */ 237 if (dev->internal.pending_removal || dev->internal.removed) { 238 SPDK_ERRLOG("Received event for device SPDK already tried to remove\n"); 239 SPDK_ERRLOG("pending_removal=%d removed=%d\n", dev->internal.pending_removal, 240 dev->internal.removed); 241 } 242 243 if (strcmp(dpdk_pci_device_get_name(rte_dev), device_name) == 0 && 244 !dev->internal.pending_removal) { 245 can_detach = !dev->internal.attached; 246 /* prevent any further attaches */ 247 dev->internal.pending_removal = true; 248 break; 249 } 250 } 251 pthread_mutex_unlock(&g_pci_mutex); 252 253 if (dev != NULL && can_detach) { 254 /* if device is not attached we can remove it right away. 255 * Otherwise it will be removed at detach. 256 * 257 * Because the user's callback is invoked in eal interrupt 258 * callback, the interrupt callback need to be finished before 259 * it can be unregistered when detaching device. So finish 260 * callback soon and use a deferred removal to detach device 261 * is need. It is a workaround, once the device detaching be 262 * moved into the eal in the future, the deferred removal could 263 * be deleted. 264 */ 265 rte_eal_alarm_set(1, detach_rte_cb, dev->dev_handle); 266 } 267 break; 268 } 269 } 270 271 static void 272 cleanup_pci_devices(void) 273 { 274 struct spdk_pci_device *dev, *tmp; 275 276 pthread_mutex_lock(&g_pci_mutex); 277 /* cleanup removed devices */ 278 TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) { 279 if (!dev->internal.removed) { 280 continue; 281 } 282 283 vtophys_pci_device_removed(dev->dev_handle); 284 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 285 free(dev); 286 } 287 288 /* add newly-attached devices */ 289 TAILQ_FOREACH_SAFE(dev, &g_pci_hotplugged_devices, internal.tailq, tmp) { 290 TAILQ_REMOVE(&g_pci_hotplugged_devices, dev, internal.tailq); 291 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 292 vtophys_pci_device_added(dev->dev_handle); 293 } 294 pthread_mutex_unlock(&g_pci_mutex); 295 } 296 297 static int scan_pci_bus(bool delay_init); 298 299 static inline void 300 _pci_env_init(void) 301 { 302 /* We assume devices were present on the bus for more than 2 seconds 303 * before initializing SPDK and there's no need to wait more. We scan 304 * the bus, but we don't block any devices. 305 */ 306 scan_pci_bus(false); 307 308 /* Register a single hotremove callback for all devices. */ 309 if (spdk_process_is_primary()) { 310 rte_dev_event_callback_register(NULL, pci_device_rte_dev_event, NULL); 311 } 312 } 313 314 int 315 pci_env_init(void) 316 { 317 struct spdk_pci_driver *driver; 318 int rc; 319 320 rc = dpdk_pci_init(); 321 if (rc) { 322 return rc; 323 } 324 325 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 326 dpdk_pci_driver_register(driver, pci_device_init, pci_device_fini); 327 } 328 329 _pci_env_init(); 330 return 0; 331 } 332 333 void 334 pci_env_reinit(void) 335 { 336 /* There is no need to register pci drivers again, since they were 337 * already pre-registered in pci_env_init. 338 */ 339 340 _pci_env_init(); 341 } 342 343 void 344 pci_env_fini(void) 345 { 346 struct spdk_pci_device *dev; 347 char bdf[32]; 348 349 cleanup_pci_devices(); 350 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 351 if (dev->internal.attached) { 352 spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->addr); 353 SPDK_ERRLOG("Device %s is still attached at shutdown!\n", bdf); 354 } 355 } 356 357 if (spdk_process_is_primary()) { 358 rte_dev_event_callback_unregister(NULL, pci_device_rte_dev_event, NULL); 359 } 360 } 361 362 int 363 pci_device_init(struct rte_pci_driver *_drv, 364 struct rte_pci_device *_dev) 365 { 366 struct spdk_pci_driver *driver = (struct spdk_pci_driver *)_drv; 367 struct spdk_pci_device *dev; 368 struct rte_pci_addr *addr; 369 struct rte_pci_id *id; 370 int rc; 371 372 dev = calloc(1, sizeof(*dev)); 373 if (dev == NULL) { 374 return -1; 375 } 376 377 dev->dev_handle = _dev; 378 379 addr = dpdk_pci_device_get_addr(_dev); 380 dev->addr.domain = addr->domain; 381 dev->addr.bus = addr->bus; 382 dev->addr.dev = addr->devid; 383 dev->addr.func = addr->function; 384 385 id = dpdk_pci_device_get_id(_dev); 386 dev->id.class_id = id->class_id; 387 dev->id.vendor_id = id->vendor_id; 388 dev->id.device_id = id->device_id; 389 dev->id.subvendor_id = id->subsystem_vendor_id; 390 dev->id.subdevice_id = id->subsystem_device_id; 391 392 dev->socket_id = dpdk_pci_device_get_numa_node(_dev); 393 dev->type = "pci"; 394 395 dev->map_bar = map_bar_rte; 396 dev->unmap_bar = unmap_bar_rte; 397 dev->cfg_read = cfg_read_rte; 398 dev->cfg_write = cfg_write_rte; 399 400 dev->internal.driver = driver; 401 dev->internal.claim_fd = -1; 402 403 if (driver->cb_fn != NULL) { 404 rc = driver->cb_fn(driver->cb_arg, dev); 405 if (rc != 0) { 406 free(dev); 407 return rc; 408 } 409 dev->internal.attached = true; 410 } 411 412 pthread_mutex_lock(&g_pci_mutex); 413 TAILQ_INSERT_TAIL(&g_pci_hotplugged_devices, dev, internal.tailq); 414 pthread_mutex_unlock(&g_pci_mutex); 415 return 0; 416 } 417 418 static void 419 set_allowed_at(struct rte_devargs *rte_da, uint64_t tsc) 420 { 421 struct env_devargs *env_da; 422 423 env_da = find_env_devargs(rte_da->bus, rte_da->name); 424 if (env_da == NULL) { 425 env_da = calloc(1, sizeof(*env_da)); 426 if (env_da == NULL) { 427 SPDK_ERRLOG("could not set_allowed_at for device %s\n", rte_da->name); 428 return; 429 } 430 env_da->bus = rte_da->bus; 431 spdk_strcpy_pad(env_da->name, rte_da->name, sizeof(env_da->name), 0); 432 TAILQ_INSERT_TAIL(&g_env_devargs, env_da, link); 433 } 434 435 env_da->allowed_at = tsc; 436 } 437 438 static uint64_t 439 get_allowed_at(struct rte_devargs *rte_da) 440 { 441 struct env_devargs *env_da; 442 443 env_da = find_env_devargs(rte_da->bus, rte_da->name); 444 if (env_da) { 445 return env_da->allowed_at; 446 } else { 447 return 0; 448 } 449 } 450 451 int 452 pci_device_fini(struct rte_pci_device *_dev) 453 { 454 struct spdk_pci_device *dev; 455 456 pthread_mutex_lock(&g_pci_mutex); 457 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 458 if (dev->dev_handle == _dev) { 459 break; 460 } 461 } 462 463 if (dev == NULL || dev->internal.attached) { 464 /* The device might be still referenced somewhere in SPDK. */ 465 pthread_mutex_unlock(&g_pci_mutex); 466 return -EBUSY; 467 } 468 469 /* remove our allowed_at option */ 470 if (dpdk_pci_device_get_devargs(_dev)) { 471 set_allowed_at(dpdk_pci_device_get_devargs(_dev), 0); 472 } 473 474 /* It is possible that removed flag was already set when there is a race 475 * between the remove notification for this process, and another process 476 * that is also detaching from this same device (for example, when using 477 * nvme driver in multi-process mode. So do not assert here. See 478 * #2456 for additional details. 479 */ 480 dev->internal.removed = true; 481 pthread_mutex_unlock(&g_pci_mutex); 482 return 0; 483 484 } 485 486 void 487 spdk_pci_device_detach(struct spdk_pci_device *dev) 488 { 489 struct spdk_pci_device_provider *provider; 490 491 assert(dev->internal.attached); 492 493 if (dev->internal.claim_fd >= 0) { 494 spdk_pci_device_unclaim(dev); 495 } 496 497 TAILQ_FOREACH(provider, &g_pci_device_providers, tailq) { 498 if (strcmp(dev->type, provider->name) == 0) { 499 break; 500 } 501 } 502 503 assert(provider != NULL); 504 dev->internal.attached = false; 505 provider->detach_cb(dev); 506 507 cleanup_pci_devices(); 508 } 509 510 static int 511 scan_pci_bus(bool delay_init) 512 { 513 struct rte_dev_iterator it; 514 struct rte_device *rte_dev; 515 uint64_t now; 516 517 dpdk_bus_scan(); 518 now = spdk_get_ticks(); 519 520 if (!TAILQ_FIRST(&g_pci_drivers)) { 521 return 0; 522 } 523 524 RTE_DEV_FOREACH(rte_dev, "bus=pci", &it) { 525 struct rte_devargs *da; 526 527 da = dpdk_device_get_devargs(rte_dev); 528 if (!da) { 529 char devargs_str[128]; 530 531 /* the device was never blocked or allowed */ 532 da = calloc(1, sizeof(*da)); 533 if (!da) { 534 return -1; 535 } 536 537 snprintf(devargs_str, sizeof(devargs_str), "pci:%s", dpdk_device_get_name(rte_dev)); 538 if (rte_devargs_parse(da, devargs_str) != 0) { 539 free(da); 540 return -1; 541 } 542 543 rte_devargs_insert(&da); 544 dpdk_device_set_devargs(rte_dev, da); 545 } 546 547 if (get_allowed_at(da)) { 548 uint64_t allowed_at = get_allowed_at(da); 549 550 /* this device was seen by spdk before... */ 551 if (da->policy == RTE_DEV_BLOCKED && allowed_at <= now) { 552 da->policy = RTE_DEV_ALLOWED; 553 } 554 } else if ((dpdk_device_scan_allowed(rte_dev) && da->policy == RTE_DEV_ALLOWED) || 555 da->policy != RTE_DEV_BLOCKED) { 556 /* override the policy only if not permanently blocked */ 557 558 if (delay_init) { 559 da->policy = RTE_DEV_BLOCKED; 560 set_allowed_at(da, now + 2 * spdk_get_ticks_hz()); 561 } else { 562 da->policy = RTE_DEV_ALLOWED; 563 set_allowed_at(da, now); 564 } 565 } 566 } 567 568 return 0; 569 } 570 571 static int 572 pci_attach_rte(const struct spdk_pci_addr *addr) 573 { 574 char bdf[32]; 575 int rc, i = 0; 576 577 spdk_pci_addr_fmt(bdf, sizeof(bdf), addr); 578 579 do { 580 rc = rte_eal_hotplug_add("pci", bdf, ""); 581 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 582 583 if (i > 1 && rc == -EEXIST) { 584 /* Even though the previous request timed out, the device 585 * was attached successfully. 586 */ 587 rc = 0; 588 } 589 590 return rc; 591 } 592 593 static struct spdk_pci_device_provider g_pci_rte_provider = { 594 .name = "pci", 595 .attach_cb = pci_attach_rte, 596 .detach_cb = detach_rte, 597 }; 598 599 SPDK_PCI_REGISTER_DEVICE_PROVIDER(pci, &g_pci_rte_provider); 600 601 int 602 spdk_pci_device_attach(struct spdk_pci_driver *driver, 603 spdk_pci_enum_cb enum_cb, 604 void *enum_ctx, struct spdk_pci_addr *pci_address) 605 { 606 struct spdk_pci_device *dev; 607 struct spdk_pci_device_provider *provider; 608 struct rte_pci_device *rte_dev; 609 struct rte_devargs *da; 610 int rc; 611 612 cleanup_pci_devices(); 613 614 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 615 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 616 break; 617 } 618 } 619 620 if (dev != NULL && dev->internal.driver == driver) { 621 pthread_mutex_lock(&g_pci_mutex); 622 if (dev->internal.attached || dev->internal.pending_removal) { 623 pthread_mutex_unlock(&g_pci_mutex); 624 return -1; 625 } 626 627 rc = enum_cb(enum_ctx, dev); 628 if (rc == 0) { 629 dev->internal.attached = true; 630 } 631 pthread_mutex_unlock(&g_pci_mutex); 632 return rc; 633 } 634 635 driver->cb_fn = enum_cb; 636 driver->cb_arg = enum_ctx; 637 638 rc = -ENODEV; 639 TAILQ_FOREACH(provider, &g_pci_device_providers, tailq) { 640 rc = provider->attach_cb(pci_address); 641 if (rc == 0) { 642 break; 643 } 644 } 645 646 driver->cb_arg = NULL; 647 driver->cb_fn = NULL; 648 649 cleanup_pci_devices(); 650 651 if (rc != 0) { 652 return -1; 653 } 654 655 /* explicit attach ignores the allowlist, so if we blocked this 656 * device before let's enable it now - just for clarity. 657 */ 658 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 659 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 660 break; 661 } 662 } 663 assert(dev != NULL); 664 665 rte_dev = dev->dev_handle; 666 if (rte_dev != NULL) { 667 da = dpdk_pci_device_get_devargs(rte_dev); 668 if (da && get_allowed_at(da)) { 669 set_allowed_at(da, spdk_get_ticks()); 670 da->policy = RTE_DEV_ALLOWED; 671 } 672 } 673 674 return 0; 675 } 676 677 /* Note: You can call spdk_pci_enumerate from more than one thread 678 * simultaneously safely, but you cannot call spdk_pci_enumerate 679 * and rte_eal_pci_probe simultaneously. 680 */ 681 int 682 spdk_pci_enumerate(struct spdk_pci_driver *driver, 683 spdk_pci_enum_cb enum_cb, 684 void *enum_ctx) 685 { 686 struct spdk_pci_device *dev; 687 int rc; 688 689 cleanup_pci_devices(); 690 691 pthread_mutex_lock(&g_pci_mutex); 692 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 693 if (dev->internal.attached || 694 dev->internal.driver != driver || 695 dev->internal.pending_removal) { 696 continue; 697 } 698 699 rc = enum_cb(enum_ctx, dev); 700 if (rc == 0) { 701 dev->internal.attached = true; 702 } else if (rc < 0) { 703 pthread_mutex_unlock(&g_pci_mutex); 704 return -1; 705 } 706 } 707 pthread_mutex_unlock(&g_pci_mutex); 708 709 if (scan_pci_bus(true) != 0) { 710 return -1; 711 } 712 713 driver->cb_fn = enum_cb; 714 driver->cb_arg = enum_ctx; 715 716 if (dpdk_bus_probe() != 0) { 717 driver->cb_arg = NULL; 718 driver->cb_fn = NULL; 719 return -1; 720 } 721 722 driver->cb_arg = NULL; 723 driver->cb_fn = NULL; 724 725 cleanup_pci_devices(); 726 return 0; 727 } 728 729 void 730 spdk_pci_for_each_device(void *ctx, void (*fn)(void *ctx, struct spdk_pci_device *dev)) 731 { 732 struct spdk_pci_device *dev, *tmp; 733 734 pthread_mutex_lock(&g_pci_mutex); 735 TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) { 736 fn(ctx, dev); 737 } 738 pthread_mutex_unlock(&g_pci_mutex); 739 } 740 741 int 742 spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar, 743 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 744 { 745 int rc; 746 747 rc = dev->map_bar(dev, bar, mapped_addr, phys_addr, size); 748 if (rc) { 749 return rc; 750 } 751 752 #if VFIO_ENABLED 753 /* Automatically map the BAR to the IOMMU */ 754 if (!spdk_iommu_is_enabled()) { 755 return 0; 756 } 757 758 if (rte_eal_iova_mode() == RTE_IOVA_VA) { 759 /* We'll use the virtual address as the iova to match DPDK. */ 760 rc = vtophys_iommu_map_dma_bar((uint64_t)(*mapped_addr), (uint64_t) * mapped_addr, *size); 761 if (rc) { 762 dev->unmap_bar(dev, bar, *mapped_addr); 763 return -EFAULT; 764 } 765 766 *phys_addr = (uint64_t)(*mapped_addr); 767 } else { 768 /* We'll use the physical address as the iova to match DPDK. */ 769 rc = vtophys_iommu_map_dma_bar((uint64_t)(*mapped_addr), *phys_addr, *size); 770 if (rc) { 771 dev->unmap_bar(dev, bar, *mapped_addr); 772 return -EFAULT; 773 } 774 } 775 #endif 776 return rc; 777 } 778 779 int 780 spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr) 781 { 782 #if VFIO_ENABLED 783 int rc; 784 785 if (spdk_iommu_is_enabled()) { 786 rc = vtophys_iommu_unmap_dma_bar((uint64_t)addr); 787 if (rc) { 788 return -EFAULT; 789 } 790 } 791 #endif 792 793 return dev->unmap_bar(dev, bar, addr); 794 } 795 796 int 797 spdk_pci_device_enable_interrupt(struct spdk_pci_device *dev) 798 { 799 return dpdk_pci_device_enable_interrupt(dev->dev_handle); 800 } 801 802 int 803 spdk_pci_device_disable_interrupt(struct spdk_pci_device *dev) 804 { 805 return dpdk_pci_device_disable_interrupt(dev->dev_handle); 806 } 807 808 int 809 spdk_pci_device_get_interrupt_efd(struct spdk_pci_device *dev) 810 { 811 return dpdk_pci_device_get_interrupt_efd(dev->dev_handle); 812 } 813 814 uint32_t 815 spdk_pci_device_get_domain(struct spdk_pci_device *dev) 816 { 817 return dev->addr.domain; 818 } 819 820 uint8_t 821 spdk_pci_device_get_bus(struct spdk_pci_device *dev) 822 { 823 return dev->addr.bus; 824 } 825 826 uint8_t 827 spdk_pci_device_get_dev(struct spdk_pci_device *dev) 828 { 829 return dev->addr.dev; 830 } 831 832 uint8_t 833 spdk_pci_device_get_func(struct spdk_pci_device *dev) 834 { 835 return dev->addr.func; 836 } 837 838 uint16_t 839 spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev) 840 { 841 return dev->id.vendor_id; 842 } 843 844 uint16_t 845 spdk_pci_device_get_device_id(struct spdk_pci_device *dev) 846 { 847 return dev->id.device_id; 848 } 849 850 uint16_t 851 spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev) 852 { 853 return dev->id.subvendor_id; 854 } 855 856 uint16_t 857 spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev) 858 { 859 return dev->id.subdevice_id; 860 } 861 862 struct spdk_pci_id 863 spdk_pci_device_get_id(struct spdk_pci_device *dev) 864 { 865 return dev->id; 866 } 867 868 int 869 spdk_pci_device_get_socket_id(struct spdk_pci_device *dev) 870 { 871 return dev->socket_id; 872 } 873 874 int 875 spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 876 { 877 return dev->cfg_read(dev, value, len, offset); 878 } 879 880 int 881 spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 882 { 883 return dev->cfg_write(dev, value, len, offset); 884 } 885 886 int 887 spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset) 888 { 889 return spdk_pci_device_cfg_read(dev, value, 1, offset); 890 } 891 892 int 893 spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset) 894 { 895 return spdk_pci_device_cfg_write(dev, &value, 1, offset); 896 } 897 898 int 899 spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset) 900 { 901 return spdk_pci_device_cfg_read(dev, value, 2, offset); 902 } 903 904 int 905 spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset) 906 { 907 return spdk_pci_device_cfg_write(dev, &value, 2, offset); 908 } 909 910 int 911 spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset) 912 { 913 return spdk_pci_device_cfg_read(dev, value, 4, offset); 914 } 915 916 int 917 spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset) 918 { 919 return spdk_pci_device_cfg_write(dev, &value, 4, offset); 920 } 921 922 int 923 spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len) 924 { 925 int err; 926 uint32_t pos, header = 0; 927 uint32_t i, buf[2]; 928 929 if (len < 17) { 930 return -1; 931 } 932 933 err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE); 934 if (err || !header) { 935 return -1; 936 } 937 938 pos = PCI_CFG_SIZE; 939 while (1) { 940 if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) { 941 if (pos) { 942 /* skip the header */ 943 pos += 4; 944 for (i = 0; i < 2; i++) { 945 err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i); 946 if (err) { 947 return -1; 948 } 949 } 950 snprintf(sn, len, "%08x%08x", buf[1], buf[0]); 951 return 0; 952 } 953 } 954 pos = (header >> 20) & 0xffc; 955 /* 0 if no other items exist */ 956 if (pos < PCI_CFG_SIZE) { 957 return -1; 958 } 959 err = spdk_pci_device_cfg_read32(dev, &header, pos); 960 if (err) { 961 return -1; 962 } 963 } 964 return -1; 965 } 966 967 struct spdk_pci_addr 968 spdk_pci_device_get_addr(struct spdk_pci_device *dev) 969 { 970 return dev->addr; 971 } 972 973 bool 974 spdk_pci_device_is_removed(struct spdk_pci_device *dev) 975 { 976 return dev->internal.pending_removal; 977 } 978 979 int 980 spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2) 981 { 982 if (a1->domain > a2->domain) { 983 return 1; 984 } else if (a1->domain < a2->domain) { 985 return -1; 986 } else if (a1->bus > a2->bus) { 987 return 1; 988 } else if (a1->bus < a2->bus) { 989 return -1; 990 } else if (a1->dev > a2->dev) { 991 return 1; 992 } else if (a1->dev < a2->dev) { 993 return -1; 994 } else if (a1->func > a2->func) { 995 return 1; 996 } else if (a1->func < a2->func) { 997 return -1; 998 } 999 1000 return 0; 1001 } 1002 1003 #ifdef __linux__ 1004 int 1005 spdk_pci_device_claim(struct spdk_pci_device *dev) 1006 { 1007 int dev_fd; 1008 char dev_name[64]; 1009 int pid; 1010 void *dev_map; 1011 struct flock pcidev_lock = { 1012 .l_type = F_WRLCK, 1013 .l_whence = SEEK_SET, 1014 .l_start = 0, 1015 .l_len = 0, 1016 }; 1017 1018 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 1019 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 1020 1021 dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 1022 if (dev_fd == -1) { 1023 SPDK_ERRLOG("could not open %s\n", dev_name); 1024 return -errno; 1025 } 1026 1027 if (ftruncate(dev_fd, sizeof(int)) != 0) { 1028 SPDK_ERRLOG("could not truncate %s\n", dev_name); 1029 close(dev_fd); 1030 return -errno; 1031 } 1032 1033 dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, 1034 MAP_SHARED, dev_fd, 0); 1035 if (dev_map == MAP_FAILED) { 1036 SPDK_ERRLOG("could not mmap dev %s (%d)\n", dev_name, errno); 1037 close(dev_fd); 1038 return -errno; 1039 } 1040 1041 if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) { 1042 pid = *(int *)dev_map; 1043 SPDK_ERRLOG("Cannot create lock on device %s, probably" 1044 " process %d has claimed it\n", dev_name, pid); 1045 munmap(dev_map, sizeof(int)); 1046 close(dev_fd); 1047 /* F_SETLK returns unspecified errnos, normalize them */ 1048 return -EACCES; 1049 } 1050 1051 *(int *)dev_map = (int)getpid(); 1052 munmap(dev_map, sizeof(int)); 1053 dev->internal.claim_fd = dev_fd; 1054 /* Keep dev_fd open to maintain the lock. */ 1055 return 0; 1056 } 1057 1058 void 1059 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 1060 { 1061 char dev_name[64]; 1062 1063 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 1064 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 1065 1066 close(dev->internal.claim_fd); 1067 dev->internal.claim_fd = -1; 1068 unlink(dev_name); 1069 } 1070 #else /* !__linux__ */ 1071 int 1072 spdk_pci_device_claim(struct spdk_pci_device *dev) 1073 { 1074 /* TODO */ 1075 return 0; 1076 } 1077 1078 void 1079 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 1080 { 1081 /* TODO */ 1082 } 1083 #endif /* __linux__ */ 1084 1085 int 1086 spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf) 1087 { 1088 unsigned domain, bus, dev, func; 1089 1090 if (addr == NULL || bdf == NULL) { 1091 return -EINVAL; 1092 } 1093 1094 if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) || 1095 (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) { 1096 /* Matched a full address - all variables are initialized */ 1097 } else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) { 1098 func = 0; 1099 } else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) || 1100 (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) { 1101 domain = 0; 1102 } else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) || 1103 (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) { 1104 domain = 0; 1105 func = 0; 1106 } else { 1107 return -EINVAL; 1108 } 1109 1110 if (bus > 0xFF || dev > 0x1F || func > 7) { 1111 return -EINVAL; 1112 } 1113 1114 addr->domain = domain; 1115 addr->bus = bus; 1116 addr->dev = dev; 1117 addr->func = func; 1118 1119 return 0; 1120 } 1121 1122 int 1123 spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr) 1124 { 1125 int rc; 1126 1127 rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x", 1128 addr->domain, addr->bus, 1129 addr->dev, addr->func); 1130 1131 if (rc > 0 && (size_t)rc < sz) { 1132 return 0; 1133 } 1134 1135 return -1; 1136 } 1137 1138 int 1139 spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev) 1140 { 1141 int rc; 1142 1143 assert(dev->map_bar != NULL); 1144 assert(dev->unmap_bar != NULL); 1145 assert(dev->cfg_read != NULL); 1146 assert(dev->cfg_write != NULL); 1147 dev->internal.driver = drv; 1148 1149 if (drv->cb_fn != NULL) { 1150 rc = drv->cb_fn(drv->cb_arg, dev); 1151 if (rc != 0) { 1152 return -ECANCELED; 1153 } 1154 1155 dev->internal.attached = true; 1156 } 1157 1158 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 1159 1160 return 0; 1161 } 1162 1163 void 1164 spdk_pci_unhook_device(struct spdk_pci_device *dev) 1165 { 1166 assert(!dev->internal.attached); 1167 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 1168 } 1169 1170 void 1171 spdk_pci_register_device_provider(struct spdk_pci_device_provider *provider) 1172 { 1173 TAILQ_INSERT_TAIL(&g_pci_device_providers, provider, tailq); 1174 } 1175 1176 const char * 1177 spdk_pci_device_get_type(const struct spdk_pci_device *dev) 1178 { 1179 return dev->type; 1180 } 1181 1182 int 1183 spdk_pci_device_allow(struct spdk_pci_addr *pci_addr) 1184 { 1185 struct rte_devargs *da; 1186 char devargs_str[128]; 1187 1188 da = calloc(1, sizeof(*da)); 1189 if (da == NULL) { 1190 SPDK_ERRLOG("could not allocate rte_devargs\n"); 1191 return -ENOMEM; 1192 } 1193 1194 snprintf(devargs_str, sizeof(devargs_str), "pci:%04x:%02x:%02x.%x", 1195 pci_addr->domain, pci_addr->bus, pci_addr->dev, pci_addr->func); 1196 if (rte_devargs_parse(da, devargs_str) != 0) { 1197 SPDK_ERRLOG("rte_devargs_parse() failed on '%s'\n", devargs_str); 1198 free(da); 1199 return -EINVAL; 1200 } 1201 da->policy = RTE_DEV_ALLOWED; 1202 /* Note: if a devargs already exists for this device address, it just gets 1203 * overridden. So we do not need to check if the devargs already exists. 1204 * DPDK will take care of memory management for the devargs structure after 1205 * it has been inserted, so there's nothing SPDK needs to track. 1206 */ 1207 if (rte_devargs_insert(&da) != 0) { 1208 SPDK_ERRLOG("rte_devargs_insert() failed on '%s'\n", devargs_str); 1209 free(da); 1210 return -EINVAL; 1211 } 1212 1213 return 0; 1214 } 1215