1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "env_internal.h" 35 36 #include <rte_alarm.h> 37 #include <rte_devargs.h> 38 #include "spdk/env.h" 39 #include "spdk/log.h" 40 41 #define SYSFS_PCI_DRIVERS "/sys/bus/pci/drivers" 42 43 /* Compatibility for versions < 20.11 */ 44 #if RTE_VERSION < RTE_VERSION_NUM(20, 11, 0, 0) 45 #define RTE_DEV_ALLOWED RTE_DEV_WHITELISTED 46 #define RTE_DEV_BLOCKED RTE_DEV_BLACKLISTED 47 #define RTE_BUS_SCAN_ALLOWLIST RTE_BUS_SCAN_WHITELIST 48 #endif 49 50 #define PCI_CFG_SIZE 256 51 #define PCI_EXT_CAP_ID_SN 0x03 52 53 /* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time 54 * might cause the internal IPC to misbehave. Just retry in such case. 55 */ 56 #define DPDK_HOTPLUG_RETRY_COUNT 4 57 58 /* DPDK alarm/interrupt thread */ 59 static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER; 60 static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices); 61 /* devices hotplugged on a dpdk thread */ 62 static TAILQ_HEAD(, spdk_pci_device) g_pci_hotplugged_devices = 63 TAILQ_HEAD_INITIALIZER(g_pci_hotplugged_devices); 64 static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers); 65 66 static int 67 map_bar_rte(struct spdk_pci_device *device, uint32_t bar, 68 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 69 { 70 struct rte_pci_device *dev = device->dev_handle; 71 72 *mapped_addr = dev->mem_resource[bar].addr; 73 *phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr; 74 *size = (uint64_t)dev->mem_resource[bar].len; 75 76 return 0; 77 } 78 79 static int 80 unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr) 81 { 82 return 0; 83 } 84 85 static int 86 cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 87 { 88 int rc; 89 90 rc = rte_pci_read_config(dev->dev_handle, value, len, offset); 91 92 return (rc > 0 && (uint32_t) rc == len) ? 0 : -1; 93 } 94 95 static int 96 cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 97 { 98 int rc; 99 100 rc = rte_pci_write_config(dev->dev_handle, value, len, offset); 101 102 #ifdef __FreeBSD__ 103 /* DPDK returns 0 on success and -1 on failure */ 104 return rc; 105 #endif 106 return (rc > 0 && (uint32_t) rc == len) ? 0 : -1; 107 } 108 109 static void 110 remove_rte_dev(struct rte_pci_device *rte_dev) 111 { 112 char bdf[32]; 113 int i = 0, rc; 114 115 snprintf(bdf, sizeof(bdf), "%s", rte_dev->device.name); 116 do { 117 rc = rte_eal_hotplug_remove("pci", bdf); 118 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 119 } 120 121 static void 122 detach_rte_cb(void *_dev) 123 { 124 remove_rte_dev(_dev); 125 } 126 127 static void 128 detach_rte(struct spdk_pci_device *dev) 129 { 130 struct rte_pci_device *rte_dev = dev->dev_handle; 131 int i; 132 bool removed; 133 134 if (!spdk_process_is_primary()) { 135 remove_rte_dev(rte_dev); 136 return; 137 } 138 139 pthread_mutex_lock(&g_pci_mutex); 140 dev->internal.attached = false; 141 /* prevent the hotremove notification from removing this device */ 142 dev->internal.pending_removal = true; 143 pthread_mutex_unlock(&g_pci_mutex); 144 145 rte_eal_alarm_set(1, detach_rte_cb, rte_dev); 146 147 /* wait up to 2s for the cb to execute */ 148 for (i = 2000; i > 0; i--) { 149 150 spdk_delay_us(1000); 151 pthread_mutex_lock(&g_pci_mutex); 152 removed = dev->internal.removed; 153 pthread_mutex_unlock(&g_pci_mutex); 154 155 if (removed) { 156 break; 157 } 158 } 159 160 /* besides checking the removed flag, we also need to wait 161 * for the dpdk detach function to unwind, as it's doing some 162 * operations even after calling our detach callback. Simply 163 * cancel the alarm - if it started executing already, this 164 * call will block and wait for it to finish. 165 */ 166 rte_eal_alarm_cancel(detach_rte_cb, rte_dev); 167 168 /* the device could have been finally removed, so just check 169 * it again. 170 */ 171 pthread_mutex_lock(&g_pci_mutex); 172 removed = dev->internal.removed; 173 pthread_mutex_unlock(&g_pci_mutex); 174 if (!removed) { 175 SPDK_ERRLOG("Timeout waiting for DPDK to remove PCI device %s.\n", 176 rte_dev->name); 177 /* If we reach this state, then the device couldn't be removed and most likely 178 a subsequent hot add of a device in the same BDF will fail */ 179 } 180 } 181 182 void 183 spdk_pci_driver_register(const char *name, struct spdk_pci_id *id_table, uint32_t flags) 184 { 185 struct spdk_pci_driver *driver; 186 187 driver = calloc(1, sizeof(*driver)); 188 if (!driver) { 189 /* we can't do any better than bailing atm */ 190 return; 191 } 192 193 driver->name = name; 194 driver->id_table = id_table; 195 driver->drv_flags = flags; 196 TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq); 197 } 198 199 struct spdk_pci_driver * 200 spdk_pci_nvme_get_driver(void) 201 { 202 return spdk_pci_get_driver("nvme"); 203 } 204 205 struct spdk_pci_driver * 206 spdk_pci_get_driver(const char *name) 207 { 208 struct spdk_pci_driver *driver; 209 210 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 211 if (strcmp(driver->name, name) == 0) { 212 return driver; 213 } 214 } 215 216 return NULL; 217 } 218 219 static void 220 pci_device_rte_dev_event(const char *device_name, 221 enum rte_dev_event_type event, 222 void *cb_arg) 223 { 224 struct spdk_pci_device *dev; 225 bool can_detach = false; 226 227 switch (event) { 228 default: 229 case RTE_DEV_EVENT_ADD: 230 /* Nothing to do here yet. */ 231 break; 232 case RTE_DEV_EVENT_REMOVE: 233 pthread_mutex_lock(&g_pci_mutex); 234 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 235 struct rte_pci_device *rte_dev = dev->dev_handle; 236 237 if (strcmp(rte_dev->name, device_name) == 0 && 238 !dev->internal.pending_removal) { 239 can_detach = !dev->internal.attached; 240 /* prevent any further attaches */ 241 dev->internal.pending_removal = true; 242 break; 243 } 244 } 245 pthread_mutex_unlock(&g_pci_mutex); 246 247 if (dev != NULL && can_detach) { 248 /* if device is not attached we can remove it right away. 249 * Otherwise it will be removed at detach. */ 250 remove_rte_dev(dev->dev_handle); 251 } 252 break; 253 } 254 } 255 256 static void 257 cleanup_pci_devices(void) 258 { 259 struct spdk_pci_device *dev, *tmp; 260 261 pthread_mutex_lock(&g_pci_mutex); 262 /* cleanup removed devices */ 263 TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) { 264 if (!dev->internal.removed) { 265 continue; 266 } 267 268 vtophys_pci_device_removed(dev->dev_handle); 269 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 270 free(dev); 271 } 272 273 /* add newly-attached devices */ 274 TAILQ_FOREACH_SAFE(dev, &g_pci_hotplugged_devices, internal.tailq, tmp) { 275 TAILQ_REMOVE(&g_pci_hotplugged_devices, dev, internal.tailq); 276 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 277 vtophys_pci_device_added(dev->dev_handle); 278 } 279 pthread_mutex_unlock(&g_pci_mutex); 280 } 281 282 static int scan_pci_bus(bool delay_init); 283 284 /* translate spdk_pci_driver to an rte_pci_driver and register it to dpdk */ 285 static int 286 register_rte_driver(struct spdk_pci_driver *driver) 287 { 288 unsigned pci_id_count = 0; 289 struct rte_pci_id *rte_id_table; 290 char *rte_name; 291 size_t rte_name_len; 292 uint32_t rte_flags; 293 294 assert(driver->id_table); 295 while (driver->id_table[pci_id_count].vendor_id) { 296 pci_id_count++; 297 } 298 assert(pci_id_count > 0); 299 300 rte_id_table = calloc(pci_id_count + 1, sizeof(*rte_id_table)); 301 if (!rte_id_table) { 302 return -ENOMEM; 303 } 304 305 while (pci_id_count > 0) { 306 struct rte_pci_id *rte_id = &rte_id_table[pci_id_count - 1]; 307 const struct spdk_pci_id *spdk_id = &driver->id_table[pci_id_count - 1]; 308 309 rte_id->class_id = spdk_id->class_id; 310 rte_id->vendor_id = spdk_id->vendor_id; 311 rte_id->device_id = spdk_id->device_id; 312 rte_id->subsystem_vendor_id = spdk_id->subvendor_id; 313 rte_id->subsystem_device_id = spdk_id->subdevice_id; 314 pci_id_count--; 315 } 316 317 assert(driver->name); 318 rte_name_len = strlen(driver->name) + strlen("spdk_") + 1; 319 rte_name = calloc(rte_name_len, 1); 320 if (!rte_name) { 321 free(rte_id_table); 322 return -ENOMEM; 323 } 324 325 snprintf(rte_name, rte_name_len, "spdk_%s", driver->name); 326 driver->driver.driver.name = rte_name; 327 driver->driver.id_table = rte_id_table; 328 329 rte_flags = 0; 330 if (driver->drv_flags & SPDK_PCI_DRIVER_NEED_MAPPING) { 331 rte_flags |= RTE_PCI_DRV_NEED_MAPPING; 332 } 333 if (driver->drv_flags & SPDK_PCI_DRIVER_WC_ACTIVATE) { 334 rte_flags |= RTE_PCI_DRV_WC_ACTIVATE; 335 } 336 driver->driver.drv_flags = rte_flags; 337 338 driver->driver.probe = pci_device_init; 339 driver->driver.remove = pci_device_fini; 340 341 rte_pci_register(&driver->driver); 342 return 0; 343 } 344 345 static inline void 346 _pci_env_init(void) 347 { 348 /* We assume devices were present on the bus for more than 2 seconds 349 * before initializing SPDK and there's no need to wait more. We scan 350 * the bus, but we don't block any devices. 351 */ 352 scan_pci_bus(false); 353 354 /* Register a single hotremove callback for all devices. */ 355 if (spdk_process_is_primary()) { 356 rte_dev_event_callback_register(NULL, pci_device_rte_dev_event, NULL); 357 } 358 } 359 360 void 361 pci_env_init(void) 362 { 363 struct spdk_pci_driver *driver; 364 365 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 366 register_rte_driver(driver); 367 } 368 369 _pci_env_init(); 370 } 371 372 void 373 pci_env_reinit(void) 374 { 375 /* There is no need to register pci drivers again, since they were 376 * already pre-registered in pci_env_init. 377 */ 378 379 _pci_env_init(); 380 } 381 382 void 383 pci_env_fini(void) 384 { 385 struct spdk_pci_device *dev; 386 char bdf[32]; 387 388 cleanup_pci_devices(); 389 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 390 if (dev->internal.attached) { 391 spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->addr); 392 SPDK_ERRLOG("Device %s is still attached at shutdown!\n", bdf); 393 } 394 } 395 396 if (spdk_process_is_primary()) { 397 rte_dev_event_callback_unregister(NULL, pci_device_rte_dev_event, NULL); 398 } 399 } 400 401 int 402 pci_device_init(struct rte_pci_driver *_drv, 403 struct rte_pci_device *_dev) 404 { 405 struct spdk_pci_driver *driver = (struct spdk_pci_driver *)_drv; 406 struct spdk_pci_device *dev; 407 int rc; 408 409 dev = calloc(1, sizeof(*dev)); 410 if (dev == NULL) { 411 return -1; 412 } 413 414 dev->dev_handle = _dev; 415 416 dev->addr.domain = _dev->addr.domain; 417 dev->addr.bus = _dev->addr.bus; 418 dev->addr.dev = _dev->addr.devid; 419 dev->addr.func = _dev->addr.function; 420 dev->id.class_id = _dev->id.class_id; 421 dev->id.vendor_id = _dev->id.vendor_id; 422 dev->id.device_id = _dev->id.device_id; 423 dev->id.subvendor_id = _dev->id.subsystem_vendor_id; 424 dev->id.subdevice_id = _dev->id.subsystem_device_id; 425 dev->socket_id = _dev->device.numa_node; 426 dev->type = "pci"; 427 428 dev->map_bar = map_bar_rte; 429 dev->unmap_bar = unmap_bar_rte; 430 dev->cfg_read = cfg_read_rte; 431 dev->cfg_write = cfg_write_rte; 432 433 dev->internal.driver = driver; 434 dev->internal.claim_fd = -1; 435 436 if (driver->cb_fn != NULL) { 437 rc = driver->cb_fn(driver->cb_arg, dev); 438 if (rc != 0) { 439 free(dev); 440 return rc; 441 } 442 dev->internal.attached = true; 443 } 444 445 pthread_mutex_lock(&g_pci_mutex); 446 TAILQ_INSERT_TAIL(&g_pci_hotplugged_devices, dev, internal.tailq); 447 pthread_mutex_unlock(&g_pci_mutex); 448 return 0; 449 } 450 451 int 452 pci_device_fini(struct rte_pci_device *_dev) 453 { 454 struct spdk_pci_device *dev; 455 456 pthread_mutex_lock(&g_pci_mutex); 457 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 458 if (dev->dev_handle == _dev) { 459 break; 460 } 461 } 462 463 if (dev == NULL || dev->internal.attached) { 464 /* The device might be still referenced somewhere in SPDK. */ 465 pthread_mutex_unlock(&g_pci_mutex); 466 return -1; 467 } 468 469 /* remove our allowed_at option */ 470 if (_dev->device.devargs) { 471 _dev->device.devargs->data = NULL; 472 } 473 474 assert(!dev->internal.removed); 475 dev->internal.removed = true; 476 pthread_mutex_unlock(&g_pci_mutex); 477 return 0; 478 479 } 480 481 void 482 spdk_pci_device_detach(struct spdk_pci_device *dev) 483 { 484 assert(dev->internal.attached); 485 486 if (dev->internal.claim_fd >= 0) { 487 spdk_pci_device_unclaim(dev); 488 } 489 490 if (strcmp(dev->type, "pci") == 0) { 491 /* if it's a physical device we need to deal with DPDK on 492 * a different process and we can't just unset one flag 493 * here. We also want to stop using any device resources 494 * so that the device isn't "in use" by the userspace driver 495 * once we detach it. This would allow attaching the device 496 * to a different process, or to a kernel driver like nvme. 497 */ 498 detach_rte(dev); 499 } else { 500 dev->internal.attached = false; 501 } 502 503 cleanup_pci_devices(); 504 } 505 506 static int 507 scan_pci_bus(bool delay_init) 508 { 509 struct spdk_pci_driver *driver; 510 struct rte_pci_device *rte_dev; 511 uint64_t now; 512 513 rte_bus_scan(); 514 now = spdk_get_ticks(); 515 516 driver = TAILQ_FIRST(&g_pci_drivers); 517 if (!driver) { 518 return 0; 519 } 520 521 TAILQ_FOREACH(rte_dev, &driver->driver.bus->device_list, next) { 522 struct rte_devargs *da; 523 524 da = rte_dev->device.devargs; 525 if (!da) { 526 char devargs_str[128]; 527 528 /* the device was never blocked or allowed */ 529 da = calloc(1, sizeof(*da)); 530 if (!da) { 531 return -1; 532 } 533 534 snprintf(devargs_str, sizeof(devargs_str), "pci:%s", rte_dev->device.name); 535 if (rte_devargs_parse(da, devargs_str) != 0) { 536 free(da); 537 return -1; 538 } 539 540 rte_devargs_insert(&da); 541 rte_dev->device.devargs = da; 542 } 543 544 if (da->data) { 545 uint64_t allowed_at = (uint64_t)(uintptr_t)da->data; 546 547 /* this device was seen by spdk before... */ 548 if (da->policy == RTE_DEV_BLOCKED && allowed_at <= now) { 549 da->policy = RTE_DEV_ALLOWED; 550 } 551 } else if ((driver->driver.bus->bus.conf.scan_mode == RTE_BUS_SCAN_ALLOWLIST && 552 da->policy == RTE_DEV_ALLOWED) || da->policy != RTE_DEV_BLOCKED) { 553 /* override the policy only if not permanently blocked */ 554 555 if (delay_init) { 556 da->policy = RTE_DEV_BLOCKED; 557 da->data = (void *)(now + 2 * spdk_get_ticks_hz()); 558 } else { 559 da->policy = RTE_DEV_ALLOWED; 560 da->data = (void *)(uintptr_t)now; 561 } 562 } 563 } 564 565 return 0; 566 } 567 568 int 569 spdk_pci_device_attach(struct spdk_pci_driver *driver, 570 spdk_pci_enum_cb enum_cb, 571 void *enum_ctx, struct spdk_pci_addr *pci_address) 572 { 573 struct spdk_pci_device *dev; 574 struct rte_pci_device *rte_dev; 575 struct rte_devargs *da; 576 int rc; 577 char bdf[32]; 578 579 spdk_pci_addr_fmt(bdf, sizeof(bdf), pci_address); 580 581 cleanup_pci_devices(); 582 583 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 584 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 585 break; 586 } 587 } 588 589 if (dev != NULL && dev->internal.driver == driver) { 590 pthread_mutex_lock(&g_pci_mutex); 591 if (dev->internal.attached || dev->internal.pending_removal) { 592 pthread_mutex_unlock(&g_pci_mutex); 593 return -1; 594 } 595 596 rc = enum_cb(enum_ctx, dev); 597 if (rc == 0) { 598 dev->internal.attached = true; 599 } 600 pthread_mutex_unlock(&g_pci_mutex); 601 return rc; 602 } 603 604 driver->cb_fn = enum_cb; 605 driver->cb_arg = enum_ctx; 606 607 int i = 0; 608 609 do { 610 rc = rte_eal_hotplug_add("pci", bdf, ""); 611 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 612 613 if (i > 1 && rc == -EEXIST) { 614 /* Even though the previous request timed out, the device 615 * was attached successfully. 616 */ 617 rc = 0; 618 } 619 620 driver->cb_arg = NULL; 621 driver->cb_fn = NULL; 622 623 cleanup_pci_devices(); 624 625 if (rc != 0) { 626 return -1; 627 } 628 629 /* explicit attach ignores the allowlist, so if we blocked this 630 * device before let's enable it now - just for clarity. 631 */ 632 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 633 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 634 break; 635 } 636 } 637 assert(dev != NULL); 638 639 rte_dev = dev->dev_handle; 640 da = rte_dev->device.devargs; 641 if (da && da->data) { 642 da->data = (void *)(uintptr_t)spdk_get_ticks(); 643 da->policy = RTE_DEV_ALLOWED; 644 } 645 646 return 0; 647 } 648 649 /* Note: You can call spdk_pci_enumerate from more than one thread 650 * simultaneously safely, but you cannot call spdk_pci_enumerate 651 * and rte_eal_pci_probe simultaneously. 652 */ 653 int 654 spdk_pci_enumerate(struct spdk_pci_driver *driver, 655 spdk_pci_enum_cb enum_cb, 656 void *enum_ctx) 657 { 658 struct spdk_pci_device *dev; 659 int rc; 660 661 cleanup_pci_devices(); 662 663 pthread_mutex_lock(&g_pci_mutex); 664 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 665 if (dev->internal.attached || 666 dev->internal.driver != driver || 667 dev->internal.pending_removal) { 668 continue; 669 } 670 671 rc = enum_cb(enum_ctx, dev); 672 if (rc == 0) { 673 dev->internal.attached = true; 674 } else if (rc < 0) { 675 pthread_mutex_unlock(&g_pci_mutex); 676 return -1; 677 } 678 } 679 pthread_mutex_unlock(&g_pci_mutex); 680 681 if (scan_pci_bus(true) != 0) { 682 return -1; 683 } 684 685 driver->cb_fn = enum_cb; 686 driver->cb_arg = enum_ctx; 687 688 if (rte_bus_probe() != 0) { 689 driver->cb_arg = NULL; 690 driver->cb_fn = NULL; 691 return -1; 692 } 693 694 driver->cb_arg = NULL; 695 driver->cb_fn = NULL; 696 697 cleanup_pci_devices(); 698 return 0; 699 } 700 701 struct spdk_pci_device * 702 spdk_pci_get_first_device(void) 703 { 704 return TAILQ_FIRST(&g_pci_devices); 705 } 706 707 struct spdk_pci_device * 708 spdk_pci_get_next_device(struct spdk_pci_device *prev) 709 { 710 return TAILQ_NEXT(prev, internal.tailq); 711 } 712 713 int 714 spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar, 715 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 716 { 717 return dev->map_bar(dev, bar, mapped_addr, phys_addr, size); 718 } 719 720 int 721 spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr) 722 { 723 return dev->unmap_bar(dev, bar, addr); 724 } 725 726 uint32_t 727 spdk_pci_device_get_domain(struct spdk_pci_device *dev) 728 { 729 return dev->addr.domain; 730 } 731 732 uint8_t 733 spdk_pci_device_get_bus(struct spdk_pci_device *dev) 734 { 735 return dev->addr.bus; 736 } 737 738 uint8_t 739 spdk_pci_device_get_dev(struct spdk_pci_device *dev) 740 { 741 return dev->addr.dev; 742 } 743 744 uint8_t 745 spdk_pci_device_get_func(struct spdk_pci_device *dev) 746 { 747 return dev->addr.func; 748 } 749 750 uint16_t 751 spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev) 752 { 753 return dev->id.vendor_id; 754 } 755 756 uint16_t 757 spdk_pci_device_get_device_id(struct spdk_pci_device *dev) 758 { 759 return dev->id.device_id; 760 } 761 762 uint16_t 763 spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev) 764 { 765 return dev->id.subvendor_id; 766 } 767 768 uint16_t 769 spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev) 770 { 771 return dev->id.subdevice_id; 772 } 773 774 struct spdk_pci_id 775 spdk_pci_device_get_id(struct spdk_pci_device *dev) 776 { 777 return dev->id; 778 } 779 780 int 781 spdk_pci_device_get_socket_id(struct spdk_pci_device *dev) 782 { 783 return dev->socket_id; 784 } 785 786 int 787 spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 788 { 789 return dev->cfg_read(dev, value, len, offset); 790 } 791 792 int 793 spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 794 { 795 return dev->cfg_write(dev, value, len, offset); 796 } 797 798 int 799 spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset) 800 { 801 return spdk_pci_device_cfg_read(dev, value, 1, offset); 802 } 803 804 int 805 spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset) 806 { 807 return spdk_pci_device_cfg_write(dev, &value, 1, offset); 808 } 809 810 int 811 spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset) 812 { 813 return spdk_pci_device_cfg_read(dev, value, 2, offset); 814 } 815 816 int 817 spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset) 818 { 819 return spdk_pci_device_cfg_write(dev, &value, 2, offset); 820 } 821 822 int 823 spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset) 824 { 825 return spdk_pci_device_cfg_read(dev, value, 4, offset); 826 } 827 828 int 829 spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset) 830 { 831 return spdk_pci_device_cfg_write(dev, &value, 4, offset); 832 } 833 834 int 835 spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len) 836 { 837 int err; 838 uint32_t pos, header = 0; 839 uint32_t i, buf[2]; 840 841 if (len < 17) { 842 return -1; 843 } 844 845 err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE); 846 if (err || !header) { 847 return -1; 848 } 849 850 pos = PCI_CFG_SIZE; 851 while (1) { 852 if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) { 853 if (pos) { 854 /* skip the header */ 855 pos += 4; 856 for (i = 0; i < 2; i++) { 857 err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i); 858 if (err) { 859 return -1; 860 } 861 } 862 snprintf(sn, len, "%08x%08x", buf[1], buf[0]); 863 return 0; 864 } 865 } 866 pos = (header >> 20) & 0xffc; 867 /* 0 if no other items exist */ 868 if (pos < PCI_CFG_SIZE) { 869 return -1; 870 } 871 err = spdk_pci_device_cfg_read32(dev, &header, pos); 872 if (err) { 873 return -1; 874 } 875 } 876 return -1; 877 } 878 879 struct spdk_pci_addr 880 spdk_pci_device_get_addr(struct spdk_pci_device *dev) 881 { 882 return dev->addr; 883 } 884 885 bool 886 spdk_pci_device_is_removed(struct spdk_pci_device *dev) 887 { 888 return dev->internal.pending_removal; 889 } 890 891 int 892 spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2) 893 { 894 if (a1->domain > a2->domain) { 895 return 1; 896 } else if (a1->domain < a2->domain) { 897 return -1; 898 } else if (a1->bus > a2->bus) { 899 return 1; 900 } else if (a1->bus < a2->bus) { 901 return -1; 902 } else if (a1->dev > a2->dev) { 903 return 1; 904 } else if (a1->dev < a2->dev) { 905 return -1; 906 } else if (a1->func > a2->func) { 907 return 1; 908 } else if (a1->func < a2->func) { 909 return -1; 910 } 911 912 return 0; 913 } 914 915 #ifdef __linux__ 916 int 917 spdk_pci_device_claim(struct spdk_pci_device *dev) 918 { 919 int dev_fd; 920 char dev_name[64]; 921 int pid; 922 void *dev_map; 923 struct flock pcidev_lock = { 924 .l_type = F_WRLCK, 925 .l_whence = SEEK_SET, 926 .l_start = 0, 927 .l_len = 0, 928 }; 929 930 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 931 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 932 933 dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 934 if (dev_fd == -1) { 935 SPDK_ERRLOG("could not open %s\n", dev_name); 936 return -errno; 937 } 938 939 if (ftruncate(dev_fd, sizeof(int)) != 0) { 940 SPDK_ERRLOG("could not truncate %s\n", dev_name); 941 close(dev_fd); 942 return -errno; 943 } 944 945 dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, 946 MAP_SHARED, dev_fd, 0); 947 if (dev_map == MAP_FAILED) { 948 SPDK_ERRLOG("could not mmap dev %s (%d)\n", dev_name, errno); 949 close(dev_fd); 950 return -errno; 951 } 952 953 if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) { 954 pid = *(int *)dev_map; 955 SPDK_ERRLOG("Cannot create lock on device %s, probably" 956 " process %d has claimed it\n", dev_name, pid); 957 munmap(dev_map, sizeof(int)); 958 close(dev_fd); 959 /* F_SETLK returns unspecified errnos, normalize them */ 960 return -EACCES; 961 } 962 963 *(int *)dev_map = (int)getpid(); 964 munmap(dev_map, sizeof(int)); 965 dev->internal.claim_fd = dev_fd; 966 /* Keep dev_fd open to maintain the lock. */ 967 return 0; 968 } 969 970 void 971 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 972 { 973 char dev_name[64]; 974 975 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 976 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 977 978 close(dev->internal.claim_fd); 979 dev->internal.claim_fd = -1; 980 unlink(dev_name); 981 } 982 #endif /* __linux__ */ 983 984 #ifdef __FreeBSD__ 985 int 986 spdk_pci_device_claim(struct spdk_pci_device *dev) 987 { 988 /* TODO */ 989 return 0; 990 } 991 992 void 993 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 994 { 995 /* TODO */ 996 } 997 #endif /* __FreeBSD__ */ 998 999 int 1000 spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf) 1001 { 1002 unsigned domain, bus, dev, func; 1003 1004 if (addr == NULL || bdf == NULL) { 1005 return -EINVAL; 1006 } 1007 1008 if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) || 1009 (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) { 1010 /* Matched a full address - all variables are initialized */ 1011 } else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) { 1012 func = 0; 1013 } else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) || 1014 (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) { 1015 domain = 0; 1016 } else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) || 1017 (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) { 1018 domain = 0; 1019 func = 0; 1020 } else { 1021 return -EINVAL; 1022 } 1023 1024 if (bus > 0xFF || dev > 0x1F || func > 7) { 1025 return -EINVAL; 1026 } 1027 1028 addr->domain = domain; 1029 addr->bus = bus; 1030 addr->dev = dev; 1031 addr->func = func; 1032 1033 return 0; 1034 } 1035 1036 int 1037 spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr) 1038 { 1039 int rc; 1040 1041 rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x", 1042 addr->domain, addr->bus, 1043 addr->dev, addr->func); 1044 1045 if (rc > 0 && (size_t)rc < sz) { 1046 return 0; 1047 } 1048 1049 return -1; 1050 } 1051 1052 void 1053 spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev) 1054 { 1055 assert(dev->map_bar != NULL); 1056 assert(dev->unmap_bar != NULL); 1057 assert(dev->cfg_read != NULL); 1058 assert(dev->cfg_write != NULL); 1059 dev->internal.driver = drv; 1060 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 1061 } 1062 1063 void 1064 spdk_pci_unhook_device(struct spdk_pci_device *dev) 1065 { 1066 assert(!dev->internal.attached); 1067 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 1068 } 1069 1070 const char * 1071 spdk_pci_device_get_type(const struct spdk_pci_device *dev) 1072 { 1073 return dev->type; 1074 } 1075