1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "env_internal.h" 35 36 #include <rte_alarm.h> 37 #include <rte_devargs.h> 38 #include "spdk/env.h" 39 #include "spdk/log.h" 40 #include "spdk/string.h" 41 42 #define SYSFS_PCI_DRIVERS "/sys/bus/pci/drivers" 43 44 /* Compatibility for versions < 20.11 */ 45 #if RTE_VERSION < RTE_VERSION_NUM(20, 11, 0, 0) 46 #define RTE_DEV_ALLOWED RTE_DEV_WHITELISTED 47 #define RTE_DEV_BLOCKED RTE_DEV_BLACKLISTED 48 #define RTE_BUS_SCAN_ALLOWLIST RTE_BUS_SCAN_WHITELIST 49 #endif 50 51 #define PCI_CFG_SIZE 256 52 #define PCI_EXT_CAP_ID_SN 0x03 53 54 /* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time 55 * might cause the internal IPC to misbehave. Just retry in such case. 56 */ 57 #define DPDK_HOTPLUG_RETRY_COUNT 4 58 59 /* DPDK alarm/interrupt thread */ 60 static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER; 61 static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices); 62 /* devices hotplugged on a dpdk thread */ 63 static TAILQ_HEAD(, spdk_pci_device) g_pci_hotplugged_devices = 64 TAILQ_HEAD_INITIALIZER(g_pci_hotplugged_devices); 65 static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers); 66 67 struct env_devargs { 68 struct rte_bus *bus; 69 char name[128]; 70 uint64_t allowed_at; 71 TAILQ_ENTRY(env_devargs) link; 72 }; 73 static TAILQ_HEAD(, env_devargs) g_env_devargs = TAILQ_HEAD_INITIALIZER(g_env_devargs); 74 75 static struct env_devargs * 76 find_env_devargs(struct rte_bus *bus, const char *name) 77 { 78 struct env_devargs *da; 79 80 TAILQ_FOREACH(da, &g_env_devargs, link) { 81 if (bus == da->bus && !strcmp(name, da->name)) { 82 return da; 83 } 84 } 85 86 return NULL; 87 } 88 89 static int 90 map_bar_rte(struct spdk_pci_device *device, uint32_t bar, 91 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 92 { 93 struct rte_pci_device *dev = device->dev_handle; 94 95 *mapped_addr = dev->mem_resource[bar].addr; 96 *phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr; 97 *size = (uint64_t)dev->mem_resource[bar].len; 98 99 return 0; 100 } 101 102 static int 103 unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr) 104 { 105 return 0; 106 } 107 108 static int 109 cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 110 { 111 int rc; 112 113 rc = rte_pci_read_config(dev->dev_handle, value, len, offset); 114 115 return (rc > 0 && (uint32_t) rc == len) ? 0 : -1; 116 } 117 118 static int 119 cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 120 { 121 int rc; 122 123 rc = rte_pci_write_config(dev->dev_handle, value, len, offset); 124 125 #ifdef __FreeBSD__ 126 /* DPDK returns 0 on success and -1 on failure */ 127 return rc; 128 #endif 129 return (rc > 0 && (uint32_t) rc == len) ? 0 : -1; 130 } 131 132 static void 133 remove_rte_dev(struct rte_pci_device *rte_dev) 134 { 135 char bdf[32]; 136 int i = 0, rc; 137 138 snprintf(bdf, sizeof(bdf), "%s", rte_dev->device.name); 139 do { 140 rc = rte_eal_hotplug_remove("pci", bdf); 141 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 142 } 143 144 static void 145 detach_rte_cb(void *_dev) 146 { 147 remove_rte_dev(_dev); 148 } 149 150 static void 151 detach_rte(struct spdk_pci_device *dev) 152 { 153 struct rte_pci_device *rte_dev = dev->dev_handle; 154 int i; 155 bool removed; 156 157 if (!spdk_process_is_primary()) { 158 remove_rte_dev(rte_dev); 159 return; 160 } 161 162 pthread_mutex_lock(&g_pci_mutex); 163 dev->internal.attached = false; 164 /* prevent the hotremove notification from removing this device */ 165 dev->internal.pending_removal = true; 166 pthread_mutex_unlock(&g_pci_mutex); 167 168 rte_eal_alarm_set(1, detach_rte_cb, rte_dev); 169 170 /* wait up to 2s for the cb to execute */ 171 for (i = 2000; i > 0; i--) { 172 173 spdk_delay_us(1000); 174 pthread_mutex_lock(&g_pci_mutex); 175 removed = dev->internal.removed; 176 pthread_mutex_unlock(&g_pci_mutex); 177 178 if (removed) { 179 break; 180 } 181 } 182 183 /* besides checking the removed flag, we also need to wait 184 * for the dpdk detach function to unwind, as it's doing some 185 * operations even after calling our detach callback. Simply 186 * cancel the alarm - if it started executing already, this 187 * call will block and wait for it to finish. 188 */ 189 rte_eal_alarm_cancel(detach_rte_cb, rte_dev); 190 191 /* the device could have been finally removed, so just check 192 * it again. 193 */ 194 pthread_mutex_lock(&g_pci_mutex); 195 removed = dev->internal.removed; 196 pthread_mutex_unlock(&g_pci_mutex); 197 if (!removed) { 198 SPDK_ERRLOG("Timeout waiting for DPDK to remove PCI device %s.\n", 199 rte_dev->name); 200 /* If we reach this state, then the device couldn't be removed and most likely 201 a subsequent hot add of a device in the same BDF will fail */ 202 } 203 } 204 205 void 206 spdk_pci_driver_register(const char *name, struct spdk_pci_id *id_table, uint32_t flags) 207 { 208 struct spdk_pci_driver *driver; 209 210 driver = calloc(1, sizeof(*driver)); 211 if (!driver) { 212 /* we can't do any better than bailing atm */ 213 return; 214 } 215 216 driver->name = name; 217 driver->id_table = id_table; 218 driver->drv_flags = flags; 219 TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq); 220 } 221 222 struct spdk_pci_driver * 223 spdk_pci_nvme_get_driver(void) 224 { 225 return spdk_pci_get_driver("nvme"); 226 } 227 228 struct spdk_pci_driver * 229 spdk_pci_get_driver(const char *name) 230 { 231 struct spdk_pci_driver *driver; 232 233 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 234 if (strcmp(driver->name, name) == 0) { 235 return driver; 236 } 237 } 238 239 return NULL; 240 } 241 242 static void 243 pci_device_rte_dev_event(const char *device_name, 244 enum rte_dev_event_type event, 245 void *cb_arg) 246 { 247 struct spdk_pci_device *dev; 248 bool can_detach = false; 249 250 switch (event) { 251 default: 252 case RTE_DEV_EVENT_ADD: 253 /* Nothing to do here yet. */ 254 break; 255 case RTE_DEV_EVENT_REMOVE: 256 pthread_mutex_lock(&g_pci_mutex); 257 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 258 struct rte_pci_device *rte_dev = dev->dev_handle; 259 260 if (strcmp(rte_dev->name, device_name) == 0 && 261 !dev->internal.pending_removal) { 262 can_detach = !dev->internal.attached; 263 /* prevent any further attaches */ 264 dev->internal.pending_removal = true; 265 break; 266 } 267 } 268 pthread_mutex_unlock(&g_pci_mutex); 269 270 if (dev != NULL && can_detach) { 271 /* if device is not attached we can remove it right away. 272 * Otherwise it will be removed at detach. 273 * 274 * Because the user's callback is invoked in eal interrupt 275 * callback, the interrupt callback need to be finished before 276 * it can be unregistered when detaching device. So finish 277 * callback soon and use a deferred removal to detach device 278 * is need. It is a workaround, once the device detaching be 279 * moved into the eal in the future, the deferred removal could 280 * be deleted. 281 */ 282 rte_eal_alarm_set(1, detach_rte_cb, dev->dev_handle); 283 } 284 break; 285 } 286 } 287 288 static void 289 cleanup_pci_devices(void) 290 { 291 struct spdk_pci_device *dev, *tmp; 292 293 pthread_mutex_lock(&g_pci_mutex); 294 /* cleanup removed devices */ 295 TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) { 296 if (!dev->internal.removed) { 297 continue; 298 } 299 300 vtophys_pci_device_removed(dev->dev_handle); 301 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 302 free(dev); 303 } 304 305 /* add newly-attached devices */ 306 TAILQ_FOREACH_SAFE(dev, &g_pci_hotplugged_devices, internal.tailq, tmp) { 307 TAILQ_REMOVE(&g_pci_hotplugged_devices, dev, internal.tailq); 308 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 309 vtophys_pci_device_added(dev->dev_handle); 310 } 311 pthread_mutex_unlock(&g_pci_mutex); 312 } 313 314 static int scan_pci_bus(bool delay_init); 315 316 /* translate spdk_pci_driver to an rte_pci_driver and register it to dpdk */ 317 static int 318 register_rte_driver(struct spdk_pci_driver *driver) 319 { 320 unsigned pci_id_count = 0; 321 struct rte_pci_id *rte_id_table; 322 char *rte_name; 323 size_t rte_name_len; 324 uint32_t rte_flags; 325 326 assert(driver->id_table); 327 while (driver->id_table[pci_id_count].vendor_id) { 328 pci_id_count++; 329 } 330 assert(pci_id_count > 0); 331 332 rte_id_table = calloc(pci_id_count + 1, sizeof(*rte_id_table)); 333 if (!rte_id_table) { 334 return -ENOMEM; 335 } 336 337 while (pci_id_count > 0) { 338 struct rte_pci_id *rte_id = &rte_id_table[pci_id_count - 1]; 339 const struct spdk_pci_id *spdk_id = &driver->id_table[pci_id_count - 1]; 340 341 rte_id->class_id = spdk_id->class_id; 342 rte_id->vendor_id = spdk_id->vendor_id; 343 rte_id->device_id = spdk_id->device_id; 344 rte_id->subsystem_vendor_id = spdk_id->subvendor_id; 345 rte_id->subsystem_device_id = spdk_id->subdevice_id; 346 pci_id_count--; 347 } 348 349 assert(driver->name); 350 rte_name_len = strlen(driver->name) + strlen("spdk_") + 1; 351 rte_name = calloc(rte_name_len, 1); 352 if (!rte_name) { 353 free(rte_id_table); 354 return -ENOMEM; 355 } 356 357 snprintf(rte_name, rte_name_len, "spdk_%s", driver->name); 358 driver->driver.driver.name = rte_name; 359 driver->driver.id_table = rte_id_table; 360 361 rte_flags = 0; 362 if (driver->drv_flags & SPDK_PCI_DRIVER_NEED_MAPPING) { 363 rte_flags |= RTE_PCI_DRV_NEED_MAPPING; 364 } 365 if (driver->drv_flags & SPDK_PCI_DRIVER_WC_ACTIVATE) { 366 rte_flags |= RTE_PCI_DRV_WC_ACTIVATE; 367 } 368 driver->driver.drv_flags = rte_flags; 369 370 driver->driver.probe = pci_device_init; 371 driver->driver.remove = pci_device_fini; 372 373 rte_pci_register(&driver->driver); 374 return 0; 375 } 376 377 static inline void 378 _pci_env_init(void) 379 { 380 /* We assume devices were present on the bus for more than 2 seconds 381 * before initializing SPDK and there's no need to wait more. We scan 382 * the bus, but we don't block any devices. 383 */ 384 scan_pci_bus(false); 385 386 /* Register a single hotremove callback for all devices. */ 387 if (spdk_process_is_primary()) { 388 rte_dev_event_callback_register(NULL, pci_device_rte_dev_event, NULL); 389 } 390 } 391 392 void 393 pci_env_init(void) 394 { 395 struct spdk_pci_driver *driver; 396 397 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 398 register_rte_driver(driver); 399 } 400 401 _pci_env_init(); 402 } 403 404 void 405 pci_env_reinit(void) 406 { 407 /* There is no need to register pci drivers again, since they were 408 * already pre-registered in pci_env_init. 409 */ 410 411 _pci_env_init(); 412 } 413 414 void 415 pci_env_fini(void) 416 { 417 struct spdk_pci_device *dev; 418 char bdf[32]; 419 420 cleanup_pci_devices(); 421 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 422 if (dev->internal.attached) { 423 spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->addr); 424 SPDK_ERRLOG("Device %s is still attached at shutdown!\n", bdf); 425 } 426 } 427 428 if (spdk_process_is_primary()) { 429 rte_dev_event_callback_unregister(NULL, pci_device_rte_dev_event, NULL); 430 } 431 } 432 433 int 434 pci_device_init(struct rte_pci_driver *_drv, 435 struct rte_pci_device *_dev) 436 { 437 struct spdk_pci_driver *driver = (struct spdk_pci_driver *)_drv; 438 struct spdk_pci_device *dev; 439 int rc; 440 441 dev = calloc(1, sizeof(*dev)); 442 if (dev == NULL) { 443 return -1; 444 } 445 446 dev->dev_handle = _dev; 447 448 dev->addr.domain = _dev->addr.domain; 449 dev->addr.bus = _dev->addr.bus; 450 dev->addr.dev = _dev->addr.devid; 451 dev->addr.func = _dev->addr.function; 452 dev->id.class_id = _dev->id.class_id; 453 dev->id.vendor_id = _dev->id.vendor_id; 454 dev->id.device_id = _dev->id.device_id; 455 dev->id.subvendor_id = _dev->id.subsystem_vendor_id; 456 dev->id.subdevice_id = _dev->id.subsystem_device_id; 457 dev->socket_id = _dev->device.numa_node; 458 dev->type = "pci"; 459 460 dev->map_bar = map_bar_rte; 461 dev->unmap_bar = unmap_bar_rte; 462 dev->cfg_read = cfg_read_rte; 463 dev->cfg_write = cfg_write_rte; 464 465 dev->internal.driver = driver; 466 dev->internal.claim_fd = -1; 467 468 if (driver->cb_fn != NULL) { 469 rc = driver->cb_fn(driver->cb_arg, dev); 470 if (rc != 0) { 471 free(dev); 472 return rc; 473 } 474 dev->internal.attached = true; 475 } 476 477 pthread_mutex_lock(&g_pci_mutex); 478 TAILQ_INSERT_TAIL(&g_pci_hotplugged_devices, dev, internal.tailq); 479 pthread_mutex_unlock(&g_pci_mutex); 480 return 0; 481 } 482 483 static void 484 set_allowed_at(struct rte_devargs *rte_da, uint64_t tsc) 485 { 486 struct env_devargs *env_da; 487 488 env_da = find_env_devargs(rte_da->bus, rte_da->name); 489 if (env_da == NULL) { 490 env_da = calloc(1, sizeof(*env_da)); 491 if (env_da == NULL) { 492 SPDK_ERRLOG("could not set_allowed_at for device %s\n", rte_da->name); 493 return; 494 } 495 env_da->bus = rte_da->bus; 496 spdk_strcpy_pad(env_da->name, rte_da->name, sizeof(env_da->name), 0); 497 TAILQ_INSERT_TAIL(&g_env_devargs, env_da, link); 498 } 499 500 env_da->allowed_at = tsc; 501 } 502 503 static uint64_t 504 get_allowed_at(struct rte_devargs *rte_da) 505 { 506 struct env_devargs *env_da; 507 508 env_da = find_env_devargs(rte_da->bus, rte_da->name); 509 if (env_da) { 510 return env_da->allowed_at; 511 } else { 512 return 0; 513 } 514 } 515 516 int 517 pci_device_fini(struct rte_pci_device *_dev) 518 { 519 struct spdk_pci_device *dev; 520 521 pthread_mutex_lock(&g_pci_mutex); 522 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 523 if (dev->dev_handle == _dev) { 524 break; 525 } 526 } 527 528 if (dev == NULL || dev->internal.attached) { 529 /* The device might be still referenced somewhere in SPDK. */ 530 pthread_mutex_unlock(&g_pci_mutex); 531 return -EBUSY; 532 } 533 534 /* remove our allowed_at option */ 535 if (_dev->device.devargs) { 536 set_allowed_at(_dev->device.devargs, 0); 537 } 538 539 /* It is possible that removed flag was already set when there is a race 540 * between the remove notification for this process, and another process 541 * that is also detaching from this same device (for example, when using 542 * nvme driver in multi-process mode. So do not assert here. See 543 * #2456 for additional details. 544 */ 545 dev->internal.removed = true; 546 pthread_mutex_unlock(&g_pci_mutex); 547 return 0; 548 549 } 550 551 void 552 spdk_pci_device_detach(struct spdk_pci_device *dev) 553 { 554 assert(dev->internal.attached); 555 556 if (dev->internal.claim_fd >= 0) { 557 spdk_pci_device_unclaim(dev); 558 } 559 560 dev->internal.attached = false; 561 if (strcmp(dev->type, "pci") == 0) { 562 /* if it's a physical device we need to deal with DPDK on 563 * a different process and we can't just unset one flag 564 * here. We also want to stop using any device resources 565 * so that the device isn't "in use" by the userspace driver 566 * once we detach it. This would allow attaching the device 567 * to a different process, or to a kernel driver like nvme. 568 */ 569 detach_rte(dev); 570 } 571 572 cleanup_pci_devices(); 573 } 574 575 static int 576 scan_pci_bus(bool delay_init) 577 { 578 struct spdk_pci_driver *driver; 579 struct rte_pci_device *rte_dev; 580 uint64_t now; 581 582 rte_bus_scan(); 583 now = spdk_get_ticks(); 584 585 driver = TAILQ_FIRST(&g_pci_drivers); 586 if (!driver) { 587 return 0; 588 } 589 590 TAILQ_FOREACH(rte_dev, &driver->driver.bus->device_list, next) { 591 struct rte_devargs *da; 592 593 da = rte_dev->device.devargs; 594 if (!da) { 595 char devargs_str[128]; 596 597 /* the device was never blocked or allowed */ 598 da = calloc(1, sizeof(*da)); 599 if (!da) { 600 return -1; 601 } 602 603 snprintf(devargs_str, sizeof(devargs_str), "pci:%s", rte_dev->device.name); 604 if (rte_devargs_parse(da, devargs_str) != 0) { 605 free(da); 606 return -1; 607 } 608 609 rte_devargs_insert(&da); 610 rte_dev->device.devargs = da; 611 } 612 613 if (get_allowed_at(da)) { 614 uint64_t allowed_at = get_allowed_at(da); 615 616 /* this device was seen by spdk before... */ 617 if (da->policy == RTE_DEV_BLOCKED && allowed_at <= now) { 618 da->policy = RTE_DEV_ALLOWED; 619 } 620 } else if ((driver->driver.bus->bus.conf.scan_mode == RTE_BUS_SCAN_ALLOWLIST && 621 da->policy == RTE_DEV_ALLOWED) || da->policy != RTE_DEV_BLOCKED) { 622 /* override the policy only if not permanently blocked */ 623 624 if (delay_init) { 625 da->policy = RTE_DEV_BLOCKED; 626 set_allowed_at(da, now + 2 * spdk_get_ticks_hz()); 627 } else { 628 da->policy = RTE_DEV_ALLOWED; 629 set_allowed_at(da, now); 630 } 631 } 632 } 633 634 return 0; 635 } 636 637 int 638 spdk_pci_device_attach(struct spdk_pci_driver *driver, 639 spdk_pci_enum_cb enum_cb, 640 void *enum_ctx, struct spdk_pci_addr *pci_address) 641 { 642 struct spdk_pci_device *dev; 643 struct rte_pci_device *rte_dev; 644 struct rte_devargs *da; 645 int rc; 646 char bdf[32]; 647 648 spdk_pci_addr_fmt(bdf, sizeof(bdf), pci_address); 649 650 cleanup_pci_devices(); 651 652 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 653 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 654 break; 655 } 656 } 657 658 if (dev != NULL && dev->internal.driver == driver) { 659 pthread_mutex_lock(&g_pci_mutex); 660 if (dev->internal.attached || dev->internal.pending_removal) { 661 pthread_mutex_unlock(&g_pci_mutex); 662 return -1; 663 } 664 665 rc = enum_cb(enum_ctx, dev); 666 if (rc == 0) { 667 dev->internal.attached = true; 668 } 669 pthread_mutex_unlock(&g_pci_mutex); 670 return rc; 671 } 672 673 driver->cb_fn = enum_cb; 674 driver->cb_arg = enum_ctx; 675 676 int i = 0; 677 678 do { 679 rc = rte_eal_hotplug_add("pci", bdf, ""); 680 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 681 682 if (i > 1 && rc == -EEXIST) { 683 /* Even though the previous request timed out, the device 684 * was attached successfully. 685 */ 686 rc = 0; 687 } 688 689 driver->cb_arg = NULL; 690 driver->cb_fn = NULL; 691 692 cleanup_pci_devices(); 693 694 if (rc != 0) { 695 return -1; 696 } 697 698 /* explicit attach ignores the allowlist, so if we blocked this 699 * device before let's enable it now - just for clarity. 700 */ 701 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 702 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 703 break; 704 } 705 } 706 assert(dev != NULL); 707 708 rte_dev = dev->dev_handle; 709 da = rte_dev->device.devargs; 710 if (da && get_allowed_at(da)) { 711 set_allowed_at(da, spdk_get_ticks()); 712 da->policy = RTE_DEV_ALLOWED; 713 } 714 715 return 0; 716 } 717 718 /* Note: You can call spdk_pci_enumerate from more than one thread 719 * simultaneously safely, but you cannot call spdk_pci_enumerate 720 * and rte_eal_pci_probe simultaneously. 721 */ 722 int 723 spdk_pci_enumerate(struct spdk_pci_driver *driver, 724 spdk_pci_enum_cb enum_cb, 725 void *enum_ctx) 726 { 727 struct spdk_pci_device *dev; 728 int rc; 729 730 cleanup_pci_devices(); 731 732 pthread_mutex_lock(&g_pci_mutex); 733 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 734 if (dev->internal.attached || 735 dev->internal.driver != driver || 736 dev->internal.pending_removal) { 737 continue; 738 } 739 740 rc = enum_cb(enum_ctx, dev); 741 if (rc == 0) { 742 dev->internal.attached = true; 743 } else if (rc < 0) { 744 pthread_mutex_unlock(&g_pci_mutex); 745 return -1; 746 } 747 } 748 pthread_mutex_unlock(&g_pci_mutex); 749 750 if (scan_pci_bus(true) != 0) { 751 return -1; 752 } 753 754 driver->cb_fn = enum_cb; 755 driver->cb_arg = enum_ctx; 756 757 if (rte_bus_probe() != 0) { 758 driver->cb_arg = NULL; 759 driver->cb_fn = NULL; 760 return -1; 761 } 762 763 driver->cb_arg = NULL; 764 driver->cb_fn = NULL; 765 766 cleanup_pci_devices(); 767 return 0; 768 } 769 770 void 771 spdk_pci_for_each_device(void *ctx, void (*fn)(void *ctx, struct spdk_pci_device *dev)) 772 { 773 struct spdk_pci_device *dev; 774 775 pthread_mutex_lock(&g_pci_mutex); 776 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 777 fn(ctx, dev); 778 } 779 pthread_mutex_unlock(&g_pci_mutex); 780 } 781 782 int 783 spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar, 784 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 785 { 786 return dev->map_bar(dev, bar, mapped_addr, phys_addr, size); 787 } 788 789 int 790 spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr) 791 { 792 return dev->unmap_bar(dev, bar, addr); 793 } 794 795 int 796 spdk_pci_device_enable_interrupt(struct spdk_pci_device *dev) 797 { 798 struct rte_pci_device *rte_dev = dev->dev_handle; 799 #if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0) 800 return rte_intr_enable(&rte_dev->intr_handle); 801 #else 802 return rte_intr_enable(rte_dev->intr_handle); 803 #endif 804 } 805 806 int 807 spdk_pci_device_disable_interrupt(struct spdk_pci_device *dev) 808 { 809 struct rte_pci_device *rte_dev = dev->dev_handle; 810 #if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0) 811 return rte_intr_disable(&rte_dev->intr_handle); 812 #else 813 return rte_intr_disable(rte_dev->intr_handle); 814 #endif 815 } 816 817 int 818 spdk_pci_device_get_interrupt_efd(struct spdk_pci_device *dev) 819 { 820 struct rte_pci_device *rte_dev = dev->dev_handle; 821 #if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0) 822 return rte_dev->intr_handle.fd; 823 #else 824 return rte_intr_fd_get(rte_dev->intr_handle); 825 #endif 826 } 827 828 uint32_t 829 spdk_pci_device_get_domain(struct spdk_pci_device *dev) 830 { 831 return dev->addr.domain; 832 } 833 834 uint8_t 835 spdk_pci_device_get_bus(struct spdk_pci_device *dev) 836 { 837 return dev->addr.bus; 838 } 839 840 uint8_t 841 spdk_pci_device_get_dev(struct spdk_pci_device *dev) 842 { 843 return dev->addr.dev; 844 } 845 846 uint8_t 847 spdk_pci_device_get_func(struct spdk_pci_device *dev) 848 { 849 return dev->addr.func; 850 } 851 852 uint16_t 853 spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev) 854 { 855 return dev->id.vendor_id; 856 } 857 858 uint16_t 859 spdk_pci_device_get_device_id(struct spdk_pci_device *dev) 860 { 861 return dev->id.device_id; 862 } 863 864 uint16_t 865 spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev) 866 { 867 return dev->id.subvendor_id; 868 } 869 870 uint16_t 871 spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev) 872 { 873 return dev->id.subdevice_id; 874 } 875 876 struct spdk_pci_id 877 spdk_pci_device_get_id(struct spdk_pci_device *dev) 878 { 879 return dev->id; 880 } 881 882 int 883 spdk_pci_device_get_socket_id(struct spdk_pci_device *dev) 884 { 885 return dev->socket_id; 886 } 887 888 int 889 spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 890 { 891 return dev->cfg_read(dev, value, len, offset); 892 } 893 894 int 895 spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 896 { 897 return dev->cfg_write(dev, value, len, offset); 898 } 899 900 int 901 spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset) 902 { 903 return spdk_pci_device_cfg_read(dev, value, 1, offset); 904 } 905 906 int 907 spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset) 908 { 909 return spdk_pci_device_cfg_write(dev, &value, 1, offset); 910 } 911 912 int 913 spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset) 914 { 915 return spdk_pci_device_cfg_read(dev, value, 2, offset); 916 } 917 918 int 919 spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset) 920 { 921 return spdk_pci_device_cfg_write(dev, &value, 2, offset); 922 } 923 924 int 925 spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset) 926 { 927 return spdk_pci_device_cfg_read(dev, value, 4, offset); 928 } 929 930 int 931 spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset) 932 { 933 return spdk_pci_device_cfg_write(dev, &value, 4, offset); 934 } 935 936 int 937 spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len) 938 { 939 int err; 940 uint32_t pos, header = 0; 941 uint32_t i, buf[2]; 942 943 if (len < 17) { 944 return -1; 945 } 946 947 err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE); 948 if (err || !header) { 949 return -1; 950 } 951 952 pos = PCI_CFG_SIZE; 953 while (1) { 954 if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) { 955 if (pos) { 956 /* skip the header */ 957 pos += 4; 958 for (i = 0; i < 2; i++) { 959 err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i); 960 if (err) { 961 return -1; 962 } 963 } 964 snprintf(sn, len, "%08x%08x", buf[1], buf[0]); 965 return 0; 966 } 967 } 968 pos = (header >> 20) & 0xffc; 969 /* 0 if no other items exist */ 970 if (pos < PCI_CFG_SIZE) { 971 return -1; 972 } 973 err = spdk_pci_device_cfg_read32(dev, &header, pos); 974 if (err) { 975 return -1; 976 } 977 } 978 return -1; 979 } 980 981 struct spdk_pci_addr 982 spdk_pci_device_get_addr(struct spdk_pci_device *dev) 983 { 984 return dev->addr; 985 } 986 987 bool 988 spdk_pci_device_is_removed(struct spdk_pci_device *dev) 989 { 990 return dev->internal.pending_removal; 991 } 992 993 int 994 spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2) 995 { 996 if (a1->domain > a2->domain) { 997 return 1; 998 } else if (a1->domain < a2->domain) { 999 return -1; 1000 } else if (a1->bus > a2->bus) { 1001 return 1; 1002 } else if (a1->bus < a2->bus) { 1003 return -1; 1004 } else if (a1->dev > a2->dev) { 1005 return 1; 1006 } else if (a1->dev < a2->dev) { 1007 return -1; 1008 } else if (a1->func > a2->func) { 1009 return 1; 1010 } else if (a1->func < a2->func) { 1011 return -1; 1012 } 1013 1014 return 0; 1015 } 1016 1017 #ifdef __linux__ 1018 int 1019 spdk_pci_device_claim(struct spdk_pci_device *dev) 1020 { 1021 int dev_fd; 1022 char dev_name[64]; 1023 int pid; 1024 void *dev_map; 1025 struct flock pcidev_lock = { 1026 .l_type = F_WRLCK, 1027 .l_whence = SEEK_SET, 1028 .l_start = 0, 1029 .l_len = 0, 1030 }; 1031 1032 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 1033 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 1034 1035 dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 1036 if (dev_fd == -1) { 1037 SPDK_ERRLOG("could not open %s\n", dev_name); 1038 return -errno; 1039 } 1040 1041 if (ftruncate(dev_fd, sizeof(int)) != 0) { 1042 SPDK_ERRLOG("could not truncate %s\n", dev_name); 1043 close(dev_fd); 1044 return -errno; 1045 } 1046 1047 dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, 1048 MAP_SHARED, dev_fd, 0); 1049 if (dev_map == MAP_FAILED) { 1050 SPDK_ERRLOG("could not mmap dev %s (%d)\n", dev_name, errno); 1051 close(dev_fd); 1052 return -errno; 1053 } 1054 1055 if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) { 1056 pid = *(int *)dev_map; 1057 SPDK_ERRLOG("Cannot create lock on device %s, probably" 1058 " process %d has claimed it\n", dev_name, pid); 1059 munmap(dev_map, sizeof(int)); 1060 close(dev_fd); 1061 /* F_SETLK returns unspecified errnos, normalize them */ 1062 return -EACCES; 1063 } 1064 1065 *(int *)dev_map = (int)getpid(); 1066 munmap(dev_map, sizeof(int)); 1067 dev->internal.claim_fd = dev_fd; 1068 /* Keep dev_fd open to maintain the lock. */ 1069 return 0; 1070 } 1071 1072 void 1073 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 1074 { 1075 char dev_name[64]; 1076 1077 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 1078 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 1079 1080 close(dev->internal.claim_fd); 1081 dev->internal.claim_fd = -1; 1082 unlink(dev_name); 1083 } 1084 #else /* !__linux__ */ 1085 int 1086 spdk_pci_device_claim(struct spdk_pci_device *dev) 1087 { 1088 /* TODO */ 1089 return 0; 1090 } 1091 1092 void 1093 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 1094 { 1095 /* TODO */ 1096 } 1097 #endif /* __linux__ */ 1098 1099 int 1100 spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf) 1101 { 1102 unsigned domain, bus, dev, func; 1103 1104 if (addr == NULL || bdf == NULL) { 1105 return -EINVAL; 1106 } 1107 1108 if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) || 1109 (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) { 1110 /* Matched a full address - all variables are initialized */ 1111 } else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) { 1112 func = 0; 1113 } else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) || 1114 (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) { 1115 domain = 0; 1116 } else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) || 1117 (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) { 1118 domain = 0; 1119 func = 0; 1120 } else { 1121 return -EINVAL; 1122 } 1123 1124 if (bus > 0xFF || dev > 0x1F || func > 7) { 1125 return -EINVAL; 1126 } 1127 1128 addr->domain = domain; 1129 addr->bus = bus; 1130 addr->dev = dev; 1131 addr->func = func; 1132 1133 return 0; 1134 } 1135 1136 int 1137 spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr) 1138 { 1139 int rc; 1140 1141 rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x", 1142 addr->domain, addr->bus, 1143 addr->dev, addr->func); 1144 1145 if (rc > 0 && (size_t)rc < sz) { 1146 return 0; 1147 } 1148 1149 return -1; 1150 } 1151 1152 void 1153 spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev) 1154 { 1155 assert(dev->map_bar != NULL); 1156 assert(dev->unmap_bar != NULL); 1157 assert(dev->cfg_read != NULL); 1158 assert(dev->cfg_write != NULL); 1159 dev->internal.driver = drv; 1160 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 1161 } 1162 1163 void 1164 spdk_pci_unhook_device(struct spdk_pci_device *dev) 1165 { 1166 assert(!dev->internal.attached); 1167 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 1168 } 1169 1170 const char * 1171 spdk_pci_device_get_type(const struct spdk_pci_device *dev) 1172 { 1173 return dev->type; 1174 } 1175 1176 int 1177 spdk_pci_device_allow(struct spdk_pci_addr *pci_addr) 1178 { 1179 struct rte_devargs *da; 1180 char devargs_str[128]; 1181 1182 da = calloc(1, sizeof(*da)); 1183 if (da == NULL) { 1184 SPDK_ERRLOG("could not allocate rte_devargs\n"); 1185 return -ENOMEM; 1186 } 1187 1188 snprintf(devargs_str, sizeof(devargs_str), "pci:%04x:%02x:%02x.%x", 1189 pci_addr->domain, pci_addr->bus, pci_addr->dev, pci_addr->func); 1190 if (rte_devargs_parse(da, devargs_str) != 0) { 1191 SPDK_ERRLOG("rte_devargs_parse() failed on '%s'\n", devargs_str); 1192 free(da); 1193 return -EINVAL; 1194 } 1195 da->policy = RTE_DEV_ALLOWED; 1196 /* Note: if a devargs already exists for this device address, it just gets 1197 * overridden. So we do not need to check if the devargs already exists. 1198 * DPDK will take care of memory management for the devargs structure after 1199 * it has been inserted, so there's nothing SPDK needs to track. 1200 */ 1201 if (rte_devargs_insert(&da) != 0) { 1202 SPDK_ERRLOG("rte_devargs_insert() failed on '%s'\n", devargs_str); 1203 free(da); 1204 return -EINVAL; 1205 } 1206 1207 return 0; 1208 } 1209