1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "env_internal.h" 35 36 #include <rte_alarm.h> 37 #include <rte_devargs.h> 38 #include "spdk/env.h" 39 #include "spdk/log.h" 40 #include "spdk/string.h" 41 42 #define SYSFS_PCI_DRIVERS "/sys/bus/pci/drivers" 43 44 /* Compatibility for versions < 20.11 */ 45 #if RTE_VERSION < RTE_VERSION_NUM(20, 11, 0, 0) 46 #define RTE_DEV_ALLOWED RTE_DEV_WHITELISTED 47 #define RTE_DEV_BLOCKED RTE_DEV_BLACKLISTED 48 #define RTE_BUS_SCAN_ALLOWLIST RTE_BUS_SCAN_WHITELIST 49 #endif 50 51 #define PCI_CFG_SIZE 256 52 #define PCI_EXT_CAP_ID_SN 0x03 53 54 /* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time 55 * might cause the internal IPC to misbehave. Just retry in such case. 56 */ 57 #define DPDK_HOTPLUG_RETRY_COUNT 4 58 59 /* DPDK alarm/interrupt thread */ 60 static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER; 61 static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices); 62 /* devices hotplugged on a dpdk thread */ 63 static TAILQ_HEAD(, spdk_pci_device) g_pci_hotplugged_devices = 64 TAILQ_HEAD_INITIALIZER(g_pci_hotplugged_devices); 65 static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers); 66 67 struct env_devargs { 68 struct rte_bus *bus; 69 char name[128]; 70 uint64_t allowed_at; 71 TAILQ_ENTRY(env_devargs) link; 72 }; 73 static TAILQ_HEAD(, env_devargs) g_env_devargs = TAILQ_HEAD_INITIALIZER(g_env_devargs); 74 75 static struct env_devargs * 76 find_env_devargs(struct rte_bus *bus, const char *name) 77 { 78 struct env_devargs *da; 79 80 TAILQ_FOREACH(da, &g_env_devargs, link) { 81 if (bus == da->bus && !strcmp(name, da->name)) { 82 return da; 83 } 84 } 85 86 return NULL; 87 } 88 89 static int 90 map_bar_rte(struct spdk_pci_device *device, uint32_t bar, 91 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 92 { 93 struct rte_pci_device *dev = device->dev_handle; 94 95 *mapped_addr = dev->mem_resource[bar].addr; 96 *phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr; 97 *size = (uint64_t)dev->mem_resource[bar].len; 98 99 return 0; 100 } 101 102 static int 103 unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr) 104 { 105 return 0; 106 } 107 108 static int 109 cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 110 { 111 int rc; 112 113 rc = rte_pci_read_config(dev->dev_handle, value, len, offset); 114 115 return (rc > 0 && (uint32_t) rc == len) ? 0 : -1; 116 } 117 118 static int 119 cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 120 { 121 int rc; 122 123 rc = rte_pci_write_config(dev->dev_handle, value, len, offset); 124 125 #ifdef __FreeBSD__ 126 /* DPDK returns 0 on success and -1 on failure */ 127 return rc; 128 #endif 129 return (rc > 0 && (uint32_t) rc == len) ? 0 : -1; 130 } 131 132 static void 133 remove_rte_dev(struct rte_pci_device *rte_dev) 134 { 135 char bdf[32]; 136 int i = 0, rc; 137 138 snprintf(bdf, sizeof(bdf), "%s", rte_dev->device.name); 139 do { 140 rc = rte_eal_hotplug_remove("pci", bdf); 141 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 142 } 143 144 static void 145 detach_rte_cb(void *_dev) 146 { 147 remove_rte_dev(_dev); 148 } 149 150 static void 151 detach_rte(struct spdk_pci_device *dev) 152 { 153 struct rte_pci_device *rte_dev = dev->dev_handle; 154 int i; 155 bool removed; 156 157 if (!spdk_process_is_primary()) { 158 remove_rte_dev(rte_dev); 159 return; 160 } 161 162 pthread_mutex_lock(&g_pci_mutex); 163 dev->internal.attached = false; 164 /* prevent the hotremove notification from removing this device */ 165 dev->internal.pending_removal = true; 166 pthread_mutex_unlock(&g_pci_mutex); 167 168 rte_eal_alarm_set(1, detach_rte_cb, rte_dev); 169 170 /* wait up to 2s for the cb to execute */ 171 for (i = 2000; i > 0; i--) { 172 173 spdk_delay_us(1000); 174 pthread_mutex_lock(&g_pci_mutex); 175 removed = dev->internal.removed; 176 pthread_mutex_unlock(&g_pci_mutex); 177 178 if (removed) { 179 break; 180 } 181 } 182 183 /* besides checking the removed flag, we also need to wait 184 * for the dpdk detach function to unwind, as it's doing some 185 * operations even after calling our detach callback. Simply 186 * cancel the alarm - if it started executing already, this 187 * call will block and wait for it to finish. 188 */ 189 rte_eal_alarm_cancel(detach_rte_cb, rte_dev); 190 191 /* the device could have been finally removed, so just check 192 * it again. 193 */ 194 pthread_mutex_lock(&g_pci_mutex); 195 removed = dev->internal.removed; 196 pthread_mutex_unlock(&g_pci_mutex); 197 if (!removed) { 198 SPDK_ERRLOG("Timeout waiting for DPDK to remove PCI device %s.\n", 199 rte_dev->name); 200 /* If we reach this state, then the device couldn't be removed and most likely 201 a subsequent hot add of a device in the same BDF will fail */ 202 } 203 } 204 205 void 206 spdk_pci_driver_register(const char *name, struct spdk_pci_id *id_table, uint32_t flags) 207 { 208 struct spdk_pci_driver *driver; 209 210 driver = calloc(1, sizeof(*driver)); 211 if (!driver) { 212 /* we can't do any better than bailing atm */ 213 return; 214 } 215 216 driver->name = name; 217 driver->id_table = id_table; 218 driver->drv_flags = flags; 219 TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq); 220 } 221 222 struct spdk_pci_driver * 223 spdk_pci_nvme_get_driver(void) 224 { 225 return spdk_pci_get_driver("nvme"); 226 } 227 228 struct spdk_pci_driver * 229 spdk_pci_get_driver(const char *name) 230 { 231 struct spdk_pci_driver *driver; 232 233 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 234 if (strcmp(driver->name, name) == 0) { 235 return driver; 236 } 237 } 238 239 return NULL; 240 } 241 242 static void 243 pci_device_rte_dev_event(const char *device_name, 244 enum rte_dev_event_type event, 245 void *cb_arg) 246 { 247 struct spdk_pci_device *dev; 248 bool can_detach = false; 249 250 switch (event) { 251 default: 252 case RTE_DEV_EVENT_ADD: 253 /* Nothing to do here yet. */ 254 break; 255 case RTE_DEV_EVENT_REMOVE: 256 pthread_mutex_lock(&g_pci_mutex); 257 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 258 struct rte_pci_device *rte_dev = dev->dev_handle; 259 260 if (strcmp(rte_dev->name, device_name) == 0 && 261 !dev->internal.pending_removal) { 262 can_detach = !dev->internal.attached; 263 /* prevent any further attaches */ 264 dev->internal.pending_removal = true; 265 break; 266 } 267 } 268 pthread_mutex_unlock(&g_pci_mutex); 269 270 if (dev != NULL && can_detach) { 271 /* if device is not attached we can remove it right away. 272 * Otherwise it will be removed at detach. 273 * 274 * Because the user's callback is invoked in eal interrupt 275 * callback, the interrupt callback need to be finished before 276 * it can be unregistered when detaching device. So finish 277 * callback soon and use a deferred removal to detach device 278 * is need. It is a workaround, once the device detaching be 279 * moved into the eal in the future, the deferred removal could 280 * be deleted. 281 */ 282 rte_eal_alarm_set(1, detach_rte_cb, dev->dev_handle); 283 } 284 break; 285 } 286 } 287 288 static void 289 cleanup_pci_devices(void) 290 { 291 struct spdk_pci_device *dev, *tmp; 292 293 pthread_mutex_lock(&g_pci_mutex); 294 /* cleanup removed devices */ 295 TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) { 296 if (!dev->internal.removed) { 297 continue; 298 } 299 300 vtophys_pci_device_removed(dev->dev_handle); 301 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 302 free(dev); 303 } 304 305 /* add newly-attached devices */ 306 TAILQ_FOREACH_SAFE(dev, &g_pci_hotplugged_devices, internal.tailq, tmp) { 307 TAILQ_REMOVE(&g_pci_hotplugged_devices, dev, internal.tailq); 308 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 309 vtophys_pci_device_added(dev->dev_handle); 310 } 311 pthread_mutex_unlock(&g_pci_mutex); 312 } 313 314 static int scan_pci_bus(bool delay_init); 315 316 /* translate spdk_pci_driver to an rte_pci_driver and register it to dpdk */ 317 static int 318 register_rte_driver(struct spdk_pci_driver *driver) 319 { 320 unsigned pci_id_count = 0; 321 struct rte_pci_id *rte_id_table; 322 char *rte_name; 323 size_t rte_name_len; 324 uint32_t rte_flags; 325 326 assert(driver->id_table); 327 while (driver->id_table[pci_id_count].vendor_id) { 328 pci_id_count++; 329 } 330 assert(pci_id_count > 0); 331 332 rte_id_table = calloc(pci_id_count + 1, sizeof(*rte_id_table)); 333 if (!rte_id_table) { 334 return -ENOMEM; 335 } 336 337 while (pci_id_count > 0) { 338 struct rte_pci_id *rte_id = &rte_id_table[pci_id_count - 1]; 339 const struct spdk_pci_id *spdk_id = &driver->id_table[pci_id_count - 1]; 340 341 rte_id->class_id = spdk_id->class_id; 342 rte_id->vendor_id = spdk_id->vendor_id; 343 rte_id->device_id = spdk_id->device_id; 344 rte_id->subsystem_vendor_id = spdk_id->subvendor_id; 345 rte_id->subsystem_device_id = spdk_id->subdevice_id; 346 pci_id_count--; 347 } 348 349 assert(driver->name); 350 rte_name_len = strlen(driver->name) + strlen("spdk_") + 1; 351 rte_name = calloc(rte_name_len, 1); 352 if (!rte_name) { 353 free(rte_id_table); 354 return -ENOMEM; 355 } 356 357 snprintf(rte_name, rte_name_len, "spdk_%s", driver->name); 358 driver->driver.driver.name = rte_name; 359 driver->driver.id_table = rte_id_table; 360 361 rte_flags = 0; 362 if (driver->drv_flags & SPDK_PCI_DRIVER_NEED_MAPPING) { 363 rte_flags |= RTE_PCI_DRV_NEED_MAPPING; 364 } 365 if (driver->drv_flags & SPDK_PCI_DRIVER_WC_ACTIVATE) { 366 rte_flags |= RTE_PCI_DRV_WC_ACTIVATE; 367 } 368 driver->driver.drv_flags = rte_flags; 369 370 driver->driver.probe = pci_device_init; 371 driver->driver.remove = pci_device_fini; 372 373 rte_pci_register(&driver->driver); 374 return 0; 375 } 376 377 static inline void 378 _pci_env_init(void) 379 { 380 /* We assume devices were present on the bus for more than 2 seconds 381 * before initializing SPDK and there's no need to wait more. We scan 382 * the bus, but we don't block any devices. 383 */ 384 scan_pci_bus(false); 385 386 /* Register a single hotremove callback for all devices. */ 387 if (spdk_process_is_primary()) { 388 rte_dev_event_callback_register(NULL, pci_device_rte_dev_event, NULL); 389 } 390 } 391 392 void 393 pci_env_init(void) 394 { 395 struct spdk_pci_driver *driver; 396 397 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 398 register_rte_driver(driver); 399 } 400 401 _pci_env_init(); 402 } 403 404 void 405 pci_env_reinit(void) 406 { 407 /* There is no need to register pci drivers again, since they were 408 * already pre-registered in pci_env_init. 409 */ 410 411 _pci_env_init(); 412 } 413 414 void 415 pci_env_fini(void) 416 { 417 struct spdk_pci_device *dev; 418 char bdf[32]; 419 420 cleanup_pci_devices(); 421 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 422 if (dev->internal.attached) { 423 spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->addr); 424 SPDK_ERRLOG("Device %s is still attached at shutdown!\n", bdf); 425 } 426 } 427 428 if (spdk_process_is_primary()) { 429 rte_dev_event_callback_unregister(NULL, pci_device_rte_dev_event, NULL); 430 } 431 } 432 433 int 434 pci_device_init(struct rte_pci_driver *_drv, 435 struct rte_pci_device *_dev) 436 { 437 struct spdk_pci_driver *driver = (struct spdk_pci_driver *)_drv; 438 struct spdk_pci_device *dev; 439 int rc; 440 441 dev = calloc(1, sizeof(*dev)); 442 if (dev == NULL) { 443 return -1; 444 } 445 446 dev->dev_handle = _dev; 447 448 dev->addr.domain = _dev->addr.domain; 449 dev->addr.bus = _dev->addr.bus; 450 dev->addr.dev = _dev->addr.devid; 451 dev->addr.func = _dev->addr.function; 452 dev->id.class_id = _dev->id.class_id; 453 dev->id.vendor_id = _dev->id.vendor_id; 454 dev->id.device_id = _dev->id.device_id; 455 dev->id.subvendor_id = _dev->id.subsystem_vendor_id; 456 dev->id.subdevice_id = _dev->id.subsystem_device_id; 457 dev->socket_id = _dev->device.numa_node; 458 dev->type = "pci"; 459 460 dev->map_bar = map_bar_rte; 461 dev->unmap_bar = unmap_bar_rte; 462 dev->cfg_read = cfg_read_rte; 463 dev->cfg_write = cfg_write_rte; 464 465 dev->internal.driver = driver; 466 dev->internal.claim_fd = -1; 467 468 if (driver->cb_fn != NULL) { 469 rc = driver->cb_fn(driver->cb_arg, dev); 470 if (rc != 0) { 471 free(dev); 472 return rc; 473 } 474 dev->internal.attached = true; 475 } 476 477 pthread_mutex_lock(&g_pci_mutex); 478 TAILQ_INSERT_TAIL(&g_pci_hotplugged_devices, dev, internal.tailq); 479 pthread_mutex_unlock(&g_pci_mutex); 480 return 0; 481 } 482 483 static void 484 set_allowed_at(struct rte_devargs *rte_da, uint64_t tsc) 485 { 486 struct env_devargs *env_da; 487 488 env_da = find_env_devargs(rte_da->bus, rte_da->name); 489 if (env_da == NULL) { 490 env_da = calloc(1, sizeof(*env_da)); 491 if (env_da == NULL) { 492 SPDK_ERRLOG("could not set_allowed_at for device %s\n", rte_da->name); 493 return; 494 } 495 env_da->bus = rte_da->bus; 496 spdk_strcpy_pad(env_da->name, rte_da->name, sizeof(env_da->name), 0); 497 TAILQ_INSERT_TAIL(&g_env_devargs, env_da, link); 498 } 499 500 env_da->allowed_at = tsc; 501 } 502 503 static uint64_t 504 get_allowed_at(struct rte_devargs *rte_da) 505 { 506 struct env_devargs *env_da; 507 508 env_da = find_env_devargs(rte_da->bus, rte_da->name); 509 if (env_da) { 510 return env_da->allowed_at; 511 } else { 512 return 0; 513 } 514 } 515 516 int 517 pci_device_fini(struct rte_pci_device *_dev) 518 { 519 struct spdk_pci_device *dev; 520 521 pthread_mutex_lock(&g_pci_mutex); 522 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 523 if (dev->dev_handle == _dev) { 524 break; 525 } 526 } 527 528 if (dev == NULL || dev->internal.attached) { 529 /* The device might be still referenced somewhere in SPDK. */ 530 pthread_mutex_unlock(&g_pci_mutex); 531 return -1; 532 } 533 534 /* remove our allowed_at option */ 535 if (_dev->device.devargs) { 536 set_allowed_at(_dev->device.devargs, 0); 537 } 538 539 assert(!dev->internal.removed); 540 dev->internal.removed = true; 541 pthread_mutex_unlock(&g_pci_mutex); 542 return 0; 543 544 } 545 546 void 547 spdk_pci_device_detach(struct spdk_pci_device *dev) 548 { 549 assert(dev->internal.attached); 550 551 if (dev->internal.claim_fd >= 0) { 552 spdk_pci_device_unclaim(dev); 553 } 554 555 if (strcmp(dev->type, "pci") == 0) { 556 /* if it's a physical device we need to deal with DPDK on 557 * a different process and we can't just unset one flag 558 * here. We also want to stop using any device resources 559 * so that the device isn't "in use" by the userspace driver 560 * once we detach it. This would allow attaching the device 561 * to a different process, or to a kernel driver like nvme. 562 */ 563 detach_rte(dev); 564 } else { 565 dev->internal.attached = false; 566 } 567 568 cleanup_pci_devices(); 569 } 570 571 static int 572 scan_pci_bus(bool delay_init) 573 { 574 struct spdk_pci_driver *driver; 575 struct rte_pci_device *rte_dev; 576 uint64_t now; 577 578 rte_bus_scan(); 579 now = spdk_get_ticks(); 580 581 driver = TAILQ_FIRST(&g_pci_drivers); 582 if (!driver) { 583 return 0; 584 } 585 586 TAILQ_FOREACH(rte_dev, &driver->driver.bus->device_list, next) { 587 struct rte_devargs *da; 588 589 da = rte_dev->device.devargs; 590 if (!da) { 591 char devargs_str[128]; 592 593 /* the device was never blocked or allowed */ 594 da = calloc(1, sizeof(*da)); 595 if (!da) { 596 return -1; 597 } 598 599 snprintf(devargs_str, sizeof(devargs_str), "pci:%s", rte_dev->device.name); 600 if (rte_devargs_parse(da, devargs_str) != 0) { 601 free(da); 602 return -1; 603 } 604 605 rte_devargs_insert(&da); 606 rte_dev->device.devargs = da; 607 } 608 609 if (get_allowed_at(da)) { 610 uint64_t allowed_at = get_allowed_at(da); 611 612 /* this device was seen by spdk before... */ 613 if (da->policy == RTE_DEV_BLOCKED && allowed_at <= now) { 614 da->policy = RTE_DEV_ALLOWED; 615 } 616 } else if ((driver->driver.bus->bus.conf.scan_mode == RTE_BUS_SCAN_ALLOWLIST && 617 da->policy == RTE_DEV_ALLOWED) || da->policy != RTE_DEV_BLOCKED) { 618 /* override the policy only if not permanently blocked */ 619 620 if (delay_init) { 621 da->policy = RTE_DEV_BLOCKED; 622 set_allowed_at(da, now + 2 * spdk_get_ticks_hz()); 623 } else { 624 da->policy = RTE_DEV_ALLOWED; 625 set_allowed_at(da, now); 626 } 627 } 628 } 629 630 return 0; 631 } 632 633 int 634 spdk_pci_device_attach(struct spdk_pci_driver *driver, 635 spdk_pci_enum_cb enum_cb, 636 void *enum_ctx, struct spdk_pci_addr *pci_address) 637 { 638 struct spdk_pci_device *dev; 639 struct rte_pci_device *rte_dev; 640 struct rte_devargs *da; 641 int rc; 642 char bdf[32]; 643 644 spdk_pci_addr_fmt(bdf, sizeof(bdf), pci_address); 645 646 cleanup_pci_devices(); 647 648 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 649 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 650 break; 651 } 652 } 653 654 if (dev != NULL && dev->internal.driver == driver) { 655 pthread_mutex_lock(&g_pci_mutex); 656 if (dev->internal.attached || dev->internal.pending_removal) { 657 pthread_mutex_unlock(&g_pci_mutex); 658 return -1; 659 } 660 661 rc = enum_cb(enum_ctx, dev); 662 if (rc == 0) { 663 dev->internal.attached = true; 664 } 665 pthread_mutex_unlock(&g_pci_mutex); 666 return rc; 667 } 668 669 driver->cb_fn = enum_cb; 670 driver->cb_arg = enum_ctx; 671 672 int i = 0; 673 674 do { 675 rc = rte_eal_hotplug_add("pci", bdf, ""); 676 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 677 678 if (i > 1 && rc == -EEXIST) { 679 /* Even though the previous request timed out, the device 680 * was attached successfully. 681 */ 682 rc = 0; 683 } 684 685 driver->cb_arg = NULL; 686 driver->cb_fn = NULL; 687 688 cleanup_pci_devices(); 689 690 if (rc != 0) { 691 return -1; 692 } 693 694 /* explicit attach ignores the allowlist, so if we blocked this 695 * device before let's enable it now - just for clarity. 696 */ 697 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 698 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 699 break; 700 } 701 } 702 assert(dev != NULL); 703 704 rte_dev = dev->dev_handle; 705 da = rte_dev->device.devargs; 706 if (da && get_allowed_at(da)) { 707 set_allowed_at(da, spdk_get_ticks()); 708 da->policy = RTE_DEV_ALLOWED; 709 } 710 711 return 0; 712 } 713 714 /* Note: You can call spdk_pci_enumerate from more than one thread 715 * simultaneously safely, but you cannot call spdk_pci_enumerate 716 * and rte_eal_pci_probe simultaneously. 717 */ 718 int 719 spdk_pci_enumerate(struct spdk_pci_driver *driver, 720 spdk_pci_enum_cb enum_cb, 721 void *enum_ctx) 722 { 723 struct spdk_pci_device *dev; 724 int rc; 725 726 cleanup_pci_devices(); 727 728 pthread_mutex_lock(&g_pci_mutex); 729 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 730 if (dev->internal.attached || 731 dev->internal.driver != driver || 732 dev->internal.pending_removal) { 733 continue; 734 } 735 736 rc = enum_cb(enum_ctx, dev); 737 if (rc == 0) { 738 dev->internal.attached = true; 739 } else if (rc < 0) { 740 pthread_mutex_unlock(&g_pci_mutex); 741 return -1; 742 } 743 } 744 pthread_mutex_unlock(&g_pci_mutex); 745 746 if (scan_pci_bus(true) != 0) { 747 return -1; 748 } 749 750 driver->cb_fn = enum_cb; 751 driver->cb_arg = enum_ctx; 752 753 if (rte_bus_probe() != 0) { 754 driver->cb_arg = NULL; 755 driver->cb_fn = NULL; 756 return -1; 757 } 758 759 driver->cb_arg = NULL; 760 driver->cb_fn = NULL; 761 762 cleanup_pci_devices(); 763 return 0; 764 } 765 766 struct spdk_pci_device * 767 spdk_pci_get_first_device(void) 768 { 769 return TAILQ_FIRST(&g_pci_devices); 770 } 771 772 struct spdk_pci_device * 773 spdk_pci_get_next_device(struct spdk_pci_device *prev) 774 { 775 return TAILQ_NEXT(prev, internal.tailq); 776 } 777 778 int 779 spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar, 780 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 781 { 782 return dev->map_bar(dev, bar, mapped_addr, phys_addr, size); 783 } 784 785 int 786 spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr) 787 { 788 return dev->unmap_bar(dev, bar, addr); 789 } 790 791 uint32_t 792 spdk_pci_device_get_domain(struct spdk_pci_device *dev) 793 { 794 return dev->addr.domain; 795 } 796 797 uint8_t 798 spdk_pci_device_get_bus(struct spdk_pci_device *dev) 799 { 800 return dev->addr.bus; 801 } 802 803 uint8_t 804 spdk_pci_device_get_dev(struct spdk_pci_device *dev) 805 { 806 return dev->addr.dev; 807 } 808 809 uint8_t 810 spdk_pci_device_get_func(struct spdk_pci_device *dev) 811 { 812 return dev->addr.func; 813 } 814 815 uint16_t 816 spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev) 817 { 818 return dev->id.vendor_id; 819 } 820 821 uint16_t 822 spdk_pci_device_get_device_id(struct spdk_pci_device *dev) 823 { 824 return dev->id.device_id; 825 } 826 827 uint16_t 828 spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev) 829 { 830 return dev->id.subvendor_id; 831 } 832 833 uint16_t 834 spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev) 835 { 836 return dev->id.subdevice_id; 837 } 838 839 struct spdk_pci_id 840 spdk_pci_device_get_id(struct spdk_pci_device *dev) 841 { 842 return dev->id; 843 } 844 845 int 846 spdk_pci_device_get_socket_id(struct spdk_pci_device *dev) 847 { 848 return dev->socket_id; 849 } 850 851 int 852 spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 853 { 854 return dev->cfg_read(dev, value, len, offset); 855 } 856 857 int 858 spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 859 { 860 return dev->cfg_write(dev, value, len, offset); 861 } 862 863 int 864 spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset) 865 { 866 return spdk_pci_device_cfg_read(dev, value, 1, offset); 867 } 868 869 int 870 spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset) 871 { 872 return spdk_pci_device_cfg_write(dev, &value, 1, offset); 873 } 874 875 int 876 spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset) 877 { 878 return spdk_pci_device_cfg_read(dev, value, 2, offset); 879 } 880 881 int 882 spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset) 883 { 884 return spdk_pci_device_cfg_write(dev, &value, 2, offset); 885 } 886 887 int 888 spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset) 889 { 890 return spdk_pci_device_cfg_read(dev, value, 4, offset); 891 } 892 893 int 894 spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset) 895 { 896 return spdk_pci_device_cfg_write(dev, &value, 4, offset); 897 } 898 899 int 900 spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len) 901 { 902 int err; 903 uint32_t pos, header = 0; 904 uint32_t i, buf[2]; 905 906 if (len < 17) { 907 return -1; 908 } 909 910 err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE); 911 if (err || !header) { 912 return -1; 913 } 914 915 pos = PCI_CFG_SIZE; 916 while (1) { 917 if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) { 918 if (pos) { 919 /* skip the header */ 920 pos += 4; 921 for (i = 0; i < 2; i++) { 922 err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i); 923 if (err) { 924 return -1; 925 } 926 } 927 snprintf(sn, len, "%08x%08x", buf[1], buf[0]); 928 return 0; 929 } 930 } 931 pos = (header >> 20) & 0xffc; 932 /* 0 if no other items exist */ 933 if (pos < PCI_CFG_SIZE) { 934 return -1; 935 } 936 err = spdk_pci_device_cfg_read32(dev, &header, pos); 937 if (err) { 938 return -1; 939 } 940 } 941 return -1; 942 } 943 944 struct spdk_pci_addr 945 spdk_pci_device_get_addr(struct spdk_pci_device *dev) 946 { 947 return dev->addr; 948 } 949 950 bool 951 spdk_pci_device_is_removed(struct spdk_pci_device *dev) 952 { 953 return dev->internal.pending_removal; 954 } 955 956 int 957 spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2) 958 { 959 if (a1->domain > a2->domain) { 960 return 1; 961 } else if (a1->domain < a2->domain) { 962 return -1; 963 } else if (a1->bus > a2->bus) { 964 return 1; 965 } else if (a1->bus < a2->bus) { 966 return -1; 967 } else if (a1->dev > a2->dev) { 968 return 1; 969 } else if (a1->dev < a2->dev) { 970 return -1; 971 } else if (a1->func > a2->func) { 972 return 1; 973 } else if (a1->func < a2->func) { 974 return -1; 975 } 976 977 return 0; 978 } 979 980 #ifdef __linux__ 981 int 982 spdk_pci_device_claim(struct spdk_pci_device *dev) 983 { 984 int dev_fd; 985 char dev_name[64]; 986 int pid; 987 void *dev_map; 988 struct flock pcidev_lock = { 989 .l_type = F_WRLCK, 990 .l_whence = SEEK_SET, 991 .l_start = 0, 992 .l_len = 0, 993 }; 994 995 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 996 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 997 998 dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 999 if (dev_fd == -1) { 1000 SPDK_ERRLOG("could not open %s\n", dev_name); 1001 return -errno; 1002 } 1003 1004 if (ftruncate(dev_fd, sizeof(int)) != 0) { 1005 SPDK_ERRLOG("could not truncate %s\n", dev_name); 1006 close(dev_fd); 1007 return -errno; 1008 } 1009 1010 dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, 1011 MAP_SHARED, dev_fd, 0); 1012 if (dev_map == MAP_FAILED) { 1013 SPDK_ERRLOG("could not mmap dev %s (%d)\n", dev_name, errno); 1014 close(dev_fd); 1015 return -errno; 1016 } 1017 1018 if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) { 1019 pid = *(int *)dev_map; 1020 SPDK_ERRLOG("Cannot create lock on device %s, probably" 1021 " process %d has claimed it\n", dev_name, pid); 1022 munmap(dev_map, sizeof(int)); 1023 close(dev_fd); 1024 /* F_SETLK returns unspecified errnos, normalize them */ 1025 return -EACCES; 1026 } 1027 1028 *(int *)dev_map = (int)getpid(); 1029 munmap(dev_map, sizeof(int)); 1030 dev->internal.claim_fd = dev_fd; 1031 /* Keep dev_fd open to maintain the lock. */ 1032 return 0; 1033 } 1034 1035 void 1036 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 1037 { 1038 char dev_name[64]; 1039 1040 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 1041 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 1042 1043 close(dev->internal.claim_fd); 1044 dev->internal.claim_fd = -1; 1045 unlink(dev_name); 1046 } 1047 #else /* !__linux__ */ 1048 int 1049 spdk_pci_device_claim(struct spdk_pci_device *dev) 1050 { 1051 /* TODO */ 1052 return 0; 1053 } 1054 1055 void 1056 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 1057 { 1058 /* TODO */ 1059 } 1060 #endif /* __linux__ */ 1061 1062 int 1063 spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf) 1064 { 1065 unsigned domain, bus, dev, func; 1066 1067 if (addr == NULL || bdf == NULL) { 1068 return -EINVAL; 1069 } 1070 1071 if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) || 1072 (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) { 1073 /* Matched a full address - all variables are initialized */ 1074 } else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) { 1075 func = 0; 1076 } else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) || 1077 (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) { 1078 domain = 0; 1079 } else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) || 1080 (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) { 1081 domain = 0; 1082 func = 0; 1083 } else { 1084 return -EINVAL; 1085 } 1086 1087 if (bus > 0xFF || dev > 0x1F || func > 7) { 1088 return -EINVAL; 1089 } 1090 1091 addr->domain = domain; 1092 addr->bus = bus; 1093 addr->dev = dev; 1094 addr->func = func; 1095 1096 return 0; 1097 } 1098 1099 int 1100 spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr) 1101 { 1102 int rc; 1103 1104 rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x", 1105 addr->domain, addr->bus, 1106 addr->dev, addr->func); 1107 1108 if (rc > 0 && (size_t)rc < sz) { 1109 return 0; 1110 } 1111 1112 return -1; 1113 } 1114 1115 void 1116 spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev) 1117 { 1118 assert(dev->map_bar != NULL); 1119 assert(dev->unmap_bar != NULL); 1120 assert(dev->cfg_read != NULL); 1121 assert(dev->cfg_write != NULL); 1122 dev->internal.driver = drv; 1123 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 1124 } 1125 1126 void 1127 spdk_pci_unhook_device(struct spdk_pci_device *dev) 1128 { 1129 assert(!dev->internal.attached); 1130 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 1131 } 1132 1133 const char * 1134 spdk_pci_device_get_type(const struct spdk_pci_device *dev) 1135 { 1136 return dev->type; 1137 } 1138 1139 int 1140 spdk_pci_device_allow(struct spdk_pci_addr *pci_addr) 1141 { 1142 struct rte_devargs *da; 1143 char devargs_str[128]; 1144 1145 da = calloc(1, sizeof(*da)); 1146 if (da == NULL) { 1147 SPDK_ERRLOG("could not allocate rte_devargs\n"); 1148 return -ENOMEM; 1149 } 1150 1151 snprintf(devargs_str, sizeof(devargs_str), "pci:%04x:%02x:%02x.%x", 1152 pci_addr->domain, pci_addr->bus, pci_addr->dev, pci_addr->func); 1153 if (rte_devargs_parse(da, devargs_str) != 0) { 1154 SPDK_ERRLOG("rte_devargs_parse() failed on '%s'\n", devargs_str); 1155 free(da); 1156 return -EINVAL; 1157 } 1158 da->policy = RTE_DEV_ALLOWED; 1159 /* Note: if a devargs already exists for this device address, it just gets 1160 * overridden. So we do not need to check if the devargs already exists. 1161 * DPDK will take care of memory management for the devargs structure after 1162 * it has been inserted, so there's nothing SPDK needs to track. 1163 */ 1164 if (rte_devargs_insert(&da) != 0) { 1165 SPDK_ERRLOG("rte_devargs_insert() failed on '%s'\n", devargs_str); 1166 free(da); 1167 return -EINVAL; 1168 } 1169 1170 return 0; 1171 } 1172