1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "env_internal.h" 35 36 #include <rte_alarm.h> 37 #include <rte_devargs.h> 38 #include "spdk/env.h" 39 #include "spdk/log.h" 40 #include "spdk/string.h" 41 42 #define SYSFS_PCI_DRIVERS "/sys/bus/pci/drivers" 43 44 /* Compatibility for versions < 20.11 */ 45 #if RTE_VERSION < RTE_VERSION_NUM(20, 11, 0, 0) 46 #define RTE_DEV_ALLOWED RTE_DEV_WHITELISTED 47 #define RTE_DEV_BLOCKED RTE_DEV_BLACKLISTED 48 #define RTE_BUS_SCAN_ALLOWLIST RTE_BUS_SCAN_WHITELIST 49 #endif 50 51 #define PCI_CFG_SIZE 256 52 #define PCI_EXT_CAP_ID_SN 0x03 53 54 /* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time 55 * might cause the internal IPC to misbehave. Just retry in such case. 56 */ 57 #define DPDK_HOTPLUG_RETRY_COUNT 4 58 59 /* DPDK alarm/interrupt thread */ 60 static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER; 61 static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices); 62 /* devices hotplugged on a dpdk thread */ 63 static TAILQ_HEAD(, spdk_pci_device) g_pci_hotplugged_devices = 64 TAILQ_HEAD_INITIALIZER(g_pci_hotplugged_devices); 65 static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers); 66 67 struct env_devargs { 68 struct rte_bus *bus; 69 char name[128]; 70 uint64_t allowed_at; 71 TAILQ_ENTRY(env_devargs) link; 72 }; 73 static TAILQ_HEAD(, env_devargs) g_env_devargs = TAILQ_HEAD_INITIALIZER(g_env_devargs); 74 75 static struct env_devargs * 76 find_env_devargs(struct rte_bus *bus, const char *name) 77 { 78 struct env_devargs *da; 79 80 TAILQ_FOREACH(da, &g_env_devargs, link) { 81 if (bus == da->bus && !strcmp(name, da->name)) { 82 return da; 83 } 84 } 85 86 return NULL; 87 } 88 89 static int 90 map_bar_rte(struct spdk_pci_device *device, uint32_t bar, 91 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 92 { 93 struct rte_pci_device *dev = device->dev_handle; 94 95 *mapped_addr = dev->mem_resource[bar].addr; 96 *phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr; 97 *size = (uint64_t)dev->mem_resource[bar].len; 98 99 return 0; 100 } 101 102 static int 103 unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr) 104 { 105 return 0; 106 } 107 108 static int 109 cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 110 { 111 int rc; 112 113 rc = rte_pci_read_config(dev->dev_handle, value, len, offset); 114 115 return (rc > 0 && (uint32_t) rc == len) ? 0 : -1; 116 } 117 118 static int 119 cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 120 { 121 int rc; 122 123 rc = rte_pci_write_config(dev->dev_handle, value, len, offset); 124 125 #ifdef __FreeBSD__ 126 /* DPDK returns 0 on success and -1 on failure */ 127 return rc; 128 #endif 129 return (rc > 0 && (uint32_t) rc == len) ? 0 : -1; 130 } 131 132 static void 133 remove_rte_dev(struct rte_pci_device *rte_dev) 134 { 135 char bdf[32]; 136 int i = 0, rc; 137 138 snprintf(bdf, sizeof(bdf), "%s", rte_dev->device.name); 139 do { 140 rc = rte_eal_hotplug_remove("pci", bdf); 141 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 142 } 143 144 static void 145 detach_rte_cb(void *_dev) 146 { 147 remove_rte_dev(_dev); 148 } 149 150 static void 151 detach_rte(struct spdk_pci_device *dev) 152 { 153 struct rte_pci_device *rte_dev = dev->dev_handle; 154 int i; 155 bool removed; 156 157 if (!spdk_process_is_primary()) { 158 remove_rte_dev(rte_dev); 159 return; 160 } 161 162 pthread_mutex_lock(&g_pci_mutex); 163 dev->internal.attached = false; 164 /* prevent the hotremove notification from removing this device */ 165 dev->internal.pending_removal = true; 166 pthread_mutex_unlock(&g_pci_mutex); 167 168 rte_eal_alarm_set(1, detach_rte_cb, rte_dev); 169 170 /* wait up to 2s for the cb to execute */ 171 for (i = 2000; i > 0; i--) { 172 173 spdk_delay_us(1000); 174 pthread_mutex_lock(&g_pci_mutex); 175 removed = dev->internal.removed; 176 pthread_mutex_unlock(&g_pci_mutex); 177 178 if (removed) { 179 break; 180 } 181 } 182 183 /* besides checking the removed flag, we also need to wait 184 * for the dpdk detach function to unwind, as it's doing some 185 * operations even after calling our detach callback. Simply 186 * cancel the alarm - if it started executing already, this 187 * call will block and wait for it to finish. 188 */ 189 rte_eal_alarm_cancel(detach_rte_cb, rte_dev); 190 191 /* the device could have been finally removed, so just check 192 * it again. 193 */ 194 pthread_mutex_lock(&g_pci_mutex); 195 removed = dev->internal.removed; 196 pthread_mutex_unlock(&g_pci_mutex); 197 if (!removed) { 198 SPDK_ERRLOG("Timeout waiting for DPDK to remove PCI device %s.\n", 199 rte_dev->name); 200 /* If we reach this state, then the device couldn't be removed and most likely 201 a subsequent hot add of a device in the same BDF will fail */ 202 } 203 } 204 205 void 206 spdk_pci_driver_register(const char *name, struct spdk_pci_id *id_table, uint32_t flags) 207 { 208 struct spdk_pci_driver *driver; 209 210 driver = calloc(1, sizeof(*driver)); 211 if (!driver) { 212 /* we can't do any better than bailing atm */ 213 return; 214 } 215 216 driver->name = name; 217 driver->id_table = id_table; 218 driver->drv_flags = flags; 219 TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq); 220 } 221 222 struct spdk_pci_driver * 223 spdk_pci_nvme_get_driver(void) 224 { 225 return spdk_pci_get_driver("nvme"); 226 } 227 228 struct spdk_pci_driver * 229 spdk_pci_get_driver(const char *name) 230 { 231 struct spdk_pci_driver *driver; 232 233 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 234 if (strcmp(driver->name, name) == 0) { 235 return driver; 236 } 237 } 238 239 return NULL; 240 } 241 242 static void 243 pci_device_rte_dev_event(const char *device_name, 244 enum rte_dev_event_type event, 245 void *cb_arg) 246 { 247 struct spdk_pci_device *dev; 248 bool can_detach = false; 249 250 switch (event) { 251 default: 252 case RTE_DEV_EVENT_ADD: 253 /* Nothing to do here yet. */ 254 break; 255 case RTE_DEV_EVENT_REMOVE: 256 pthread_mutex_lock(&g_pci_mutex); 257 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 258 struct rte_pci_device *rte_dev = dev->dev_handle; 259 260 if (strcmp(rte_dev->name, device_name) == 0 && 261 !dev->internal.pending_removal) { 262 can_detach = !dev->internal.attached; 263 /* prevent any further attaches */ 264 dev->internal.pending_removal = true; 265 break; 266 } 267 } 268 pthread_mutex_unlock(&g_pci_mutex); 269 270 if (dev != NULL && can_detach) { 271 /* if device is not attached we can remove it right away. 272 * Otherwise it will be removed at detach. 273 * 274 * Because the user's callback is invoked in eal interrupt 275 * callback, the interrupt callback need to be finished before 276 * it can be unregistered when detaching device. So finish 277 * callback soon and use a deferred removal to detach device 278 * is need. It is a workaround, once the device detaching be 279 * moved into the eal in the future, the deferred removal could 280 * be deleted. 281 */ 282 rte_eal_alarm_set(1, detach_rte_cb, dev->dev_handle); 283 } 284 break; 285 } 286 } 287 288 static void 289 cleanup_pci_devices(void) 290 { 291 struct spdk_pci_device *dev, *tmp; 292 293 pthread_mutex_lock(&g_pci_mutex); 294 /* cleanup removed devices */ 295 TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) { 296 if (!dev->internal.removed) { 297 continue; 298 } 299 300 vtophys_pci_device_removed(dev->dev_handle); 301 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 302 free(dev); 303 } 304 305 /* add newly-attached devices */ 306 TAILQ_FOREACH_SAFE(dev, &g_pci_hotplugged_devices, internal.tailq, tmp) { 307 TAILQ_REMOVE(&g_pci_hotplugged_devices, dev, internal.tailq); 308 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 309 vtophys_pci_device_added(dev->dev_handle); 310 } 311 pthread_mutex_unlock(&g_pci_mutex); 312 } 313 314 static int scan_pci_bus(bool delay_init); 315 316 /* translate spdk_pci_driver to an rte_pci_driver and register it to dpdk */ 317 static int 318 register_rte_driver(struct spdk_pci_driver *driver) 319 { 320 unsigned pci_id_count = 0; 321 struct rte_pci_id *rte_id_table; 322 char *rte_name; 323 size_t rte_name_len; 324 uint32_t rte_flags; 325 326 assert(driver->id_table); 327 while (driver->id_table[pci_id_count].vendor_id) { 328 pci_id_count++; 329 } 330 assert(pci_id_count > 0); 331 332 rte_id_table = calloc(pci_id_count + 1, sizeof(*rte_id_table)); 333 if (!rte_id_table) { 334 return -ENOMEM; 335 } 336 337 while (pci_id_count > 0) { 338 struct rte_pci_id *rte_id = &rte_id_table[pci_id_count - 1]; 339 const struct spdk_pci_id *spdk_id = &driver->id_table[pci_id_count - 1]; 340 341 rte_id->class_id = spdk_id->class_id; 342 rte_id->vendor_id = spdk_id->vendor_id; 343 rte_id->device_id = spdk_id->device_id; 344 rte_id->subsystem_vendor_id = spdk_id->subvendor_id; 345 rte_id->subsystem_device_id = spdk_id->subdevice_id; 346 pci_id_count--; 347 } 348 349 assert(driver->name); 350 rte_name_len = strlen(driver->name) + strlen("spdk_") + 1; 351 rte_name = calloc(rte_name_len, 1); 352 if (!rte_name) { 353 free(rte_id_table); 354 return -ENOMEM; 355 } 356 357 snprintf(rte_name, rte_name_len, "spdk_%s", driver->name); 358 driver->driver.driver.name = rte_name; 359 driver->driver.id_table = rte_id_table; 360 361 rte_flags = 0; 362 if (driver->drv_flags & SPDK_PCI_DRIVER_NEED_MAPPING) { 363 rte_flags |= RTE_PCI_DRV_NEED_MAPPING; 364 } 365 if (driver->drv_flags & SPDK_PCI_DRIVER_WC_ACTIVATE) { 366 rte_flags |= RTE_PCI_DRV_WC_ACTIVATE; 367 } 368 driver->driver.drv_flags = rte_flags; 369 370 driver->driver.probe = pci_device_init; 371 driver->driver.remove = pci_device_fini; 372 373 rte_pci_register(&driver->driver); 374 return 0; 375 } 376 377 static inline void 378 _pci_env_init(void) 379 { 380 /* We assume devices were present on the bus for more than 2 seconds 381 * before initializing SPDK and there's no need to wait more. We scan 382 * the bus, but we don't block any devices. 383 */ 384 scan_pci_bus(false); 385 386 /* Register a single hotremove callback for all devices. */ 387 if (spdk_process_is_primary()) { 388 rte_dev_event_callback_register(NULL, pci_device_rte_dev_event, NULL); 389 } 390 } 391 392 void 393 pci_env_init(void) 394 { 395 struct spdk_pci_driver *driver; 396 397 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 398 register_rte_driver(driver); 399 } 400 401 _pci_env_init(); 402 } 403 404 void 405 pci_env_reinit(void) 406 { 407 /* There is no need to register pci drivers again, since they were 408 * already pre-registered in pci_env_init. 409 */ 410 411 _pci_env_init(); 412 } 413 414 void 415 pci_env_fini(void) 416 { 417 struct spdk_pci_device *dev; 418 char bdf[32]; 419 420 cleanup_pci_devices(); 421 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 422 if (dev->internal.attached) { 423 spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->addr); 424 SPDK_ERRLOG("Device %s is still attached at shutdown!\n", bdf); 425 } 426 } 427 428 if (spdk_process_is_primary()) { 429 rte_dev_event_callback_unregister(NULL, pci_device_rte_dev_event, NULL); 430 } 431 } 432 433 int 434 pci_device_init(struct rte_pci_driver *_drv, 435 struct rte_pci_device *_dev) 436 { 437 struct spdk_pci_driver *driver = (struct spdk_pci_driver *)_drv; 438 struct spdk_pci_device *dev; 439 int rc; 440 441 dev = calloc(1, sizeof(*dev)); 442 if (dev == NULL) { 443 return -1; 444 } 445 446 dev->dev_handle = _dev; 447 448 dev->addr.domain = _dev->addr.domain; 449 dev->addr.bus = _dev->addr.bus; 450 dev->addr.dev = _dev->addr.devid; 451 dev->addr.func = _dev->addr.function; 452 dev->id.class_id = _dev->id.class_id; 453 dev->id.vendor_id = _dev->id.vendor_id; 454 dev->id.device_id = _dev->id.device_id; 455 dev->id.subvendor_id = _dev->id.subsystem_vendor_id; 456 dev->id.subdevice_id = _dev->id.subsystem_device_id; 457 dev->socket_id = _dev->device.numa_node; 458 dev->type = "pci"; 459 460 dev->map_bar = map_bar_rte; 461 dev->unmap_bar = unmap_bar_rte; 462 dev->cfg_read = cfg_read_rte; 463 dev->cfg_write = cfg_write_rte; 464 465 dev->internal.driver = driver; 466 dev->internal.claim_fd = -1; 467 468 if (driver->cb_fn != NULL) { 469 rc = driver->cb_fn(driver->cb_arg, dev); 470 if (rc != 0) { 471 free(dev); 472 return rc; 473 } 474 dev->internal.attached = true; 475 } 476 477 pthread_mutex_lock(&g_pci_mutex); 478 TAILQ_INSERT_TAIL(&g_pci_hotplugged_devices, dev, internal.tailq); 479 pthread_mutex_unlock(&g_pci_mutex); 480 return 0; 481 } 482 483 static void 484 set_allowed_at(struct rte_devargs *rte_da, uint64_t tsc) 485 { 486 struct env_devargs *env_da; 487 488 env_da = find_env_devargs(rte_da->bus, rte_da->name); 489 if (env_da == NULL) { 490 env_da = calloc(1, sizeof(*env_da)); 491 if (env_da == NULL) { 492 SPDK_ERRLOG("could not set_allowed_at for device %s\n", rte_da->name); 493 return; 494 } 495 env_da->bus = rte_da->bus; 496 spdk_strcpy_pad(env_da->name, rte_da->name, sizeof(env_da->name), 0); 497 TAILQ_INSERT_TAIL(&g_env_devargs, env_da, link); 498 } 499 500 env_da->allowed_at = tsc; 501 } 502 503 static uint64_t 504 get_allowed_at(struct rte_devargs *rte_da) 505 { 506 struct env_devargs *env_da; 507 508 env_da = find_env_devargs(rte_da->bus, rte_da->name); 509 if (env_da) { 510 return env_da->allowed_at; 511 } else { 512 return 0; 513 } 514 } 515 516 int 517 pci_device_fini(struct rte_pci_device *_dev) 518 { 519 struct spdk_pci_device *dev; 520 521 pthread_mutex_lock(&g_pci_mutex); 522 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 523 if (dev->dev_handle == _dev) { 524 break; 525 } 526 } 527 528 if (dev == NULL || dev->internal.attached) { 529 /* The device might be still referenced somewhere in SPDK. */ 530 pthread_mutex_unlock(&g_pci_mutex); 531 return -EBUSY; 532 } 533 534 /* remove our allowed_at option */ 535 if (_dev->device.devargs) { 536 set_allowed_at(_dev->device.devargs, 0); 537 } 538 539 assert(!dev->internal.removed); 540 dev->internal.removed = true; 541 pthread_mutex_unlock(&g_pci_mutex); 542 return 0; 543 544 } 545 546 void 547 spdk_pci_device_detach(struct spdk_pci_device *dev) 548 { 549 assert(dev->internal.attached); 550 551 if (dev->internal.claim_fd >= 0) { 552 spdk_pci_device_unclaim(dev); 553 } 554 555 dev->internal.attached = false; 556 if (strcmp(dev->type, "pci") == 0) { 557 /* if it's a physical device we need to deal with DPDK on 558 * a different process and we can't just unset one flag 559 * here. We also want to stop using any device resources 560 * so that the device isn't "in use" by the userspace driver 561 * once we detach it. This would allow attaching the device 562 * to a different process, or to a kernel driver like nvme. 563 */ 564 detach_rte(dev); 565 } 566 567 cleanup_pci_devices(); 568 } 569 570 static int 571 scan_pci_bus(bool delay_init) 572 { 573 struct spdk_pci_driver *driver; 574 struct rte_pci_device *rte_dev; 575 uint64_t now; 576 577 rte_bus_scan(); 578 now = spdk_get_ticks(); 579 580 driver = TAILQ_FIRST(&g_pci_drivers); 581 if (!driver) { 582 return 0; 583 } 584 585 TAILQ_FOREACH(rte_dev, &driver->driver.bus->device_list, next) { 586 struct rte_devargs *da; 587 588 da = rte_dev->device.devargs; 589 if (!da) { 590 char devargs_str[128]; 591 592 /* the device was never blocked or allowed */ 593 da = calloc(1, sizeof(*da)); 594 if (!da) { 595 return -1; 596 } 597 598 snprintf(devargs_str, sizeof(devargs_str), "pci:%s", rte_dev->device.name); 599 if (rte_devargs_parse(da, devargs_str) != 0) { 600 free(da); 601 return -1; 602 } 603 604 rte_devargs_insert(&da); 605 rte_dev->device.devargs = da; 606 } 607 608 if (get_allowed_at(da)) { 609 uint64_t allowed_at = get_allowed_at(da); 610 611 /* this device was seen by spdk before... */ 612 if (da->policy == RTE_DEV_BLOCKED && allowed_at <= now) { 613 da->policy = RTE_DEV_ALLOWED; 614 } 615 } else if ((driver->driver.bus->bus.conf.scan_mode == RTE_BUS_SCAN_ALLOWLIST && 616 da->policy == RTE_DEV_ALLOWED) || da->policy != RTE_DEV_BLOCKED) { 617 /* override the policy only if not permanently blocked */ 618 619 if (delay_init) { 620 da->policy = RTE_DEV_BLOCKED; 621 set_allowed_at(da, now + 2 * spdk_get_ticks_hz()); 622 } else { 623 da->policy = RTE_DEV_ALLOWED; 624 set_allowed_at(da, now); 625 } 626 } 627 } 628 629 return 0; 630 } 631 632 int 633 spdk_pci_device_attach(struct spdk_pci_driver *driver, 634 spdk_pci_enum_cb enum_cb, 635 void *enum_ctx, struct spdk_pci_addr *pci_address) 636 { 637 struct spdk_pci_device *dev; 638 struct rte_pci_device *rte_dev; 639 struct rte_devargs *da; 640 int rc; 641 char bdf[32]; 642 643 spdk_pci_addr_fmt(bdf, sizeof(bdf), pci_address); 644 645 cleanup_pci_devices(); 646 647 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 648 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 649 break; 650 } 651 } 652 653 if (dev != NULL && dev->internal.driver == driver) { 654 pthread_mutex_lock(&g_pci_mutex); 655 if (dev->internal.attached || dev->internal.pending_removal) { 656 pthread_mutex_unlock(&g_pci_mutex); 657 return -1; 658 } 659 660 rc = enum_cb(enum_ctx, dev); 661 if (rc == 0) { 662 dev->internal.attached = true; 663 } 664 pthread_mutex_unlock(&g_pci_mutex); 665 return rc; 666 } 667 668 driver->cb_fn = enum_cb; 669 driver->cb_arg = enum_ctx; 670 671 int i = 0; 672 673 do { 674 rc = rte_eal_hotplug_add("pci", bdf, ""); 675 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 676 677 if (i > 1 && rc == -EEXIST) { 678 /* Even though the previous request timed out, the device 679 * was attached successfully. 680 */ 681 rc = 0; 682 } 683 684 driver->cb_arg = NULL; 685 driver->cb_fn = NULL; 686 687 cleanup_pci_devices(); 688 689 if (rc != 0) { 690 return -1; 691 } 692 693 /* explicit attach ignores the allowlist, so if we blocked this 694 * device before let's enable it now - just for clarity. 695 */ 696 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 697 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 698 break; 699 } 700 } 701 assert(dev != NULL); 702 703 rte_dev = dev->dev_handle; 704 da = rte_dev->device.devargs; 705 if (da && get_allowed_at(da)) { 706 set_allowed_at(da, spdk_get_ticks()); 707 da->policy = RTE_DEV_ALLOWED; 708 } 709 710 return 0; 711 } 712 713 /* Note: You can call spdk_pci_enumerate from more than one thread 714 * simultaneously safely, but you cannot call spdk_pci_enumerate 715 * and rte_eal_pci_probe simultaneously. 716 */ 717 int 718 spdk_pci_enumerate(struct spdk_pci_driver *driver, 719 spdk_pci_enum_cb enum_cb, 720 void *enum_ctx) 721 { 722 struct spdk_pci_device *dev; 723 int rc; 724 725 cleanup_pci_devices(); 726 727 pthread_mutex_lock(&g_pci_mutex); 728 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 729 if (dev->internal.attached || 730 dev->internal.driver != driver || 731 dev->internal.pending_removal) { 732 continue; 733 } 734 735 rc = enum_cb(enum_ctx, dev); 736 if (rc == 0) { 737 dev->internal.attached = true; 738 } else if (rc < 0) { 739 pthread_mutex_unlock(&g_pci_mutex); 740 return -1; 741 } 742 } 743 pthread_mutex_unlock(&g_pci_mutex); 744 745 if (scan_pci_bus(true) != 0) { 746 return -1; 747 } 748 749 driver->cb_fn = enum_cb; 750 driver->cb_arg = enum_ctx; 751 752 if (rte_bus_probe() != 0) { 753 driver->cb_arg = NULL; 754 driver->cb_fn = NULL; 755 return -1; 756 } 757 758 driver->cb_arg = NULL; 759 driver->cb_fn = NULL; 760 761 cleanup_pci_devices(); 762 return 0; 763 } 764 765 struct spdk_pci_device * 766 spdk_pci_get_first_device(void) 767 { 768 return TAILQ_FIRST(&g_pci_devices); 769 } 770 771 struct spdk_pci_device * 772 spdk_pci_get_next_device(struct spdk_pci_device *prev) 773 { 774 return TAILQ_NEXT(prev, internal.tailq); 775 } 776 777 int 778 spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar, 779 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 780 { 781 return dev->map_bar(dev, bar, mapped_addr, phys_addr, size); 782 } 783 784 int 785 spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr) 786 { 787 return dev->unmap_bar(dev, bar, addr); 788 } 789 790 uint32_t 791 spdk_pci_device_get_domain(struct spdk_pci_device *dev) 792 { 793 return dev->addr.domain; 794 } 795 796 uint8_t 797 spdk_pci_device_get_bus(struct spdk_pci_device *dev) 798 { 799 return dev->addr.bus; 800 } 801 802 uint8_t 803 spdk_pci_device_get_dev(struct spdk_pci_device *dev) 804 { 805 return dev->addr.dev; 806 } 807 808 uint8_t 809 spdk_pci_device_get_func(struct spdk_pci_device *dev) 810 { 811 return dev->addr.func; 812 } 813 814 uint16_t 815 spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev) 816 { 817 return dev->id.vendor_id; 818 } 819 820 uint16_t 821 spdk_pci_device_get_device_id(struct spdk_pci_device *dev) 822 { 823 return dev->id.device_id; 824 } 825 826 uint16_t 827 spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev) 828 { 829 return dev->id.subvendor_id; 830 } 831 832 uint16_t 833 spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev) 834 { 835 return dev->id.subdevice_id; 836 } 837 838 struct spdk_pci_id 839 spdk_pci_device_get_id(struct spdk_pci_device *dev) 840 { 841 return dev->id; 842 } 843 844 int 845 spdk_pci_device_get_socket_id(struct spdk_pci_device *dev) 846 { 847 return dev->socket_id; 848 } 849 850 int 851 spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 852 { 853 return dev->cfg_read(dev, value, len, offset); 854 } 855 856 int 857 spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 858 { 859 return dev->cfg_write(dev, value, len, offset); 860 } 861 862 int 863 spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset) 864 { 865 return spdk_pci_device_cfg_read(dev, value, 1, offset); 866 } 867 868 int 869 spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset) 870 { 871 return spdk_pci_device_cfg_write(dev, &value, 1, offset); 872 } 873 874 int 875 spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset) 876 { 877 return spdk_pci_device_cfg_read(dev, value, 2, offset); 878 } 879 880 int 881 spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset) 882 { 883 return spdk_pci_device_cfg_write(dev, &value, 2, offset); 884 } 885 886 int 887 spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset) 888 { 889 return spdk_pci_device_cfg_read(dev, value, 4, offset); 890 } 891 892 int 893 spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset) 894 { 895 return spdk_pci_device_cfg_write(dev, &value, 4, offset); 896 } 897 898 int 899 spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len) 900 { 901 int err; 902 uint32_t pos, header = 0; 903 uint32_t i, buf[2]; 904 905 if (len < 17) { 906 return -1; 907 } 908 909 err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE); 910 if (err || !header) { 911 return -1; 912 } 913 914 pos = PCI_CFG_SIZE; 915 while (1) { 916 if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) { 917 if (pos) { 918 /* skip the header */ 919 pos += 4; 920 for (i = 0; i < 2; i++) { 921 err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i); 922 if (err) { 923 return -1; 924 } 925 } 926 snprintf(sn, len, "%08x%08x", buf[1], buf[0]); 927 return 0; 928 } 929 } 930 pos = (header >> 20) & 0xffc; 931 /* 0 if no other items exist */ 932 if (pos < PCI_CFG_SIZE) { 933 return -1; 934 } 935 err = spdk_pci_device_cfg_read32(dev, &header, pos); 936 if (err) { 937 return -1; 938 } 939 } 940 return -1; 941 } 942 943 struct spdk_pci_addr 944 spdk_pci_device_get_addr(struct spdk_pci_device *dev) 945 { 946 return dev->addr; 947 } 948 949 bool 950 spdk_pci_device_is_removed(struct spdk_pci_device *dev) 951 { 952 return dev->internal.pending_removal; 953 } 954 955 int 956 spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2) 957 { 958 if (a1->domain > a2->domain) { 959 return 1; 960 } else if (a1->domain < a2->domain) { 961 return -1; 962 } else if (a1->bus > a2->bus) { 963 return 1; 964 } else if (a1->bus < a2->bus) { 965 return -1; 966 } else if (a1->dev > a2->dev) { 967 return 1; 968 } else if (a1->dev < a2->dev) { 969 return -1; 970 } else if (a1->func > a2->func) { 971 return 1; 972 } else if (a1->func < a2->func) { 973 return -1; 974 } 975 976 return 0; 977 } 978 979 #ifdef __linux__ 980 int 981 spdk_pci_device_claim(struct spdk_pci_device *dev) 982 { 983 int dev_fd; 984 char dev_name[64]; 985 int pid; 986 void *dev_map; 987 struct flock pcidev_lock = { 988 .l_type = F_WRLCK, 989 .l_whence = SEEK_SET, 990 .l_start = 0, 991 .l_len = 0, 992 }; 993 994 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 995 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 996 997 dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 998 if (dev_fd == -1) { 999 SPDK_ERRLOG("could not open %s\n", dev_name); 1000 return -errno; 1001 } 1002 1003 if (ftruncate(dev_fd, sizeof(int)) != 0) { 1004 SPDK_ERRLOG("could not truncate %s\n", dev_name); 1005 close(dev_fd); 1006 return -errno; 1007 } 1008 1009 dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, 1010 MAP_SHARED, dev_fd, 0); 1011 if (dev_map == MAP_FAILED) { 1012 SPDK_ERRLOG("could not mmap dev %s (%d)\n", dev_name, errno); 1013 close(dev_fd); 1014 return -errno; 1015 } 1016 1017 if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) { 1018 pid = *(int *)dev_map; 1019 SPDK_ERRLOG("Cannot create lock on device %s, probably" 1020 " process %d has claimed it\n", dev_name, pid); 1021 munmap(dev_map, sizeof(int)); 1022 close(dev_fd); 1023 /* F_SETLK returns unspecified errnos, normalize them */ 1024 return -EACCES; 1025 } 1026 1027 *(int *)dev_map = (int)getpid(); 1028 munmap(dev_map, sizeof(int)); 1029 dev->internal.claim_fd = dev_fd; 1030 /* Keep dev_fd open to maintain the lock. */ 1031 return 0; 1032 } 1033 1034 void 1035 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 1036 { 1037 char dev_name[64]; 1038 1039 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 1040 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 1041 1042 close(dev->internal.claim_fd); 1043 dev->internal.claim_fd = -1; 1044 unlink(dev_name); 1045 } 1046 #else /* !__linux__ */ 1047 int 1048 spdk_pci_device_claim(struct spdk_pci_device *dev) 1049 { 1050 /* TODO */ 1051 return 0; 1052 } 1053 1054 void 1055 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 1056 { 1057 /* TODO */ 1058 } 1059 #endif /* __linux__ */ 1060 1061 int 1062 spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf) 1063 { 1064 unsigned domain, bus, dev, func; 1065 1066 if (addr == NULL || bdf == NULL) { 1067 return -EINVAL; 1068 } 1069 1070 if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) || 1071 (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) { 1072 /* Matched a full address - all variables are initialized */ 1073 } else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) { 1074 func = 0; 1075 } else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) || 1076 (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) { 1077 domain = 0; 1078 } else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) || 1079 (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) { 1080 domain = 0; 1081 func = 0; 1082 } else { 1083 return -EINVAL; 1084 } 1085 1086 if (bus > 0xFF || dev > 0x1F || func > 7) { 1087 return -EINVAL; 1088 } 1089 1090 addr->domain = domain; 1091 addr->bus = bus; 1092 addr->dev = dev; 1093 addr->func = func; 1094 1095 return 0; 1096 } 1097 1098 int 1099 spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr) 1100 { 1101 int rc; 1102 1103 rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x", 1104 addr->domain, addr->bus, 1105 addr->dev, addr->func); 1106 1107 if (rc > 0 && (size_t)rc < sz) { 1108 return 0; 1109 } 1110 1111 return -1; 1112 } 1113 1114 void 1115 spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev) 1116 { 1117 assert(dev->map_bar != NULL); 1118 assert(dev->unmap_bar != NULL); 1119 assert(dev->cfg_read != NULL); 1120 assert(dev->cfg_write != NULL); 1121 dev->internal.driver = drv; 1122 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 1123 } 1124 1125 void 1126 spdk_pci_unhook_device(struct spdk_pci_device *dev) 1127 { 1128 assert(!dev->internal.attached); 1129 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 1130 } 1131 1132 const char * 1133 spdk_pci_device_get_type(const struct spdk_pci_device *dev) 1134 { 1135 return dev->type; 1136 } 1137 1138 int 1139 spdk_pci_device_allow(struct spdk_pci_addr *pci_addr) 1140 { 1141 struct rte_devargs *da; 1142 char devargs_str[128]; 1143 1144 da = calloc(1, sizeof(*da)); 1145 if (da == NULL) { 1146 SPDK_ERRLOG("could not allocate rte_devargs\n"); 1147 return -ENOMEM; 1148 } 1149 1150 snprintf(devargs_str, sizeof(devargs_str), "pci:%04x:%02x:%02x.%x", 1151 pci_addr->domain, pci_addr->bus, pci_addr->dev, pci_addr->func); 1152 if (rte_devargs_parse(da, devargs_str) != 0) { 1153 SPDK_ERRLOG("rte_devargs_parse() failed on '%s'\n", devargs_str); 1154 free(da); 1155 return -EINVAL; 1156 } 1157 da->policy = RTE_DEV_ALLOWED; 1158 /* Note: if a devargs already exists for this device address, it just gets 1159 * overridden. So we do not need to check if the devargs already exists. 1160 * DPDK will take care of memory management for the devargs structure after 1161 * it has been inserted, so there's nothing SPDK needs to track. 1162 */ 1163 if (rte_devargs_insert(&da) != 0) { 1164 SPDK_ERRLOG("rte_devargs_insert() failed on '%s'\n", devargs_str); 1165 free(da); 1166 return -EINVAL; 1167 } 1168 1169 return 0; 1170 } 1171