1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "env_internal.h" 35 36 #include <rte_alarm.h> 37 #include <rte_devargs.h> 38 #include "spdk/env.h" 39 #include "spdk/log.h" 40 41 #define SYSFS_PCI_DRIVERS "/sys/bus/pci/drivers" 42 43 /* Compatibility for versions < 20.11 */ 44 #if RTE_VERSION < RTE_VERSION_NUM(20, 11, 0, 0) 45 #define RTE_DEV_ALLOWED RTE_DEV_WHITELISTED 46 #define RTE_DEV_BLOCKED RTE_DEV_BLACKLISTED 47 #define RTE_BUS_SCAN_ALLOWLIST RTE_BUS_SCAN_WHITELIST 48 #endif 49 50 #define PCI_CFG_SIZE 256 51 #define PCI_EXT_CAP_ID_SN 0x03 52 53 /* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time 54 * might cause the internal IPC to misbehave. Just retry in such case. 55 */ 56 #define DPDK_HOTPLUG_RETRY_COUNT 4 57 58 /* DPDK alarm/interrupt thread */ 59 static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER; 60 static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices); 61 /* devices hotplugged on a dpdk thread */ 62 static TAILQ_HEAD(, spdk_pci_device) g_pci_hotplugged_devices = 63 TAILQ_HEAD_INITIALIZER(g_pci_hotplugged_devices); 64 static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers); 65 66 static int 67 map_bar_rte(struct spdk_pci_device *device, uint32_t bar, 68 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 69 { 70 struct rte_pci_device *dev = device->dev_handle; 71 72 *mapped_addr = dev->mem_resource[bar].addr; 73 *phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr; 74 *size = (uint64_t)dev->mem_resource[bar].len; 75 76 return 0; 77 } 78 79 static int 80 unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr) 81 { 82 return 0; 83 } 84 85 static int 86 cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 87 { 88 int rc; 89 90 rc = rte_pci_read_config(dev->dev_handle, value, len, offset); 91 92 return (rc > 0 && (uint32_t) rc == len) ? 0 : -1; 93 } 94 95 static int 96 cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 97 { 98 int rc; 99 100 rc = rte_pci_write_config(dev->dev_handle, value, len, offset); 101 102 #ifdef __FreeBSD__ 103 /* DPDK returns 0 on success and -1 on failure */ 104 return rc; 105 #endif 106 return (rc > 0 && (uint32_t) rc == len) ? 0 : -1; 107 } 108 109 static void 110 remove_rte_dev(struct rte_pci_device *rte_dev) 111 { 112 char bdf[32]; 113 int i = 0, rc; 114 115 snprintf(bdf, sizeof(bdf), "%s", rte_dev->device.name); 116 do { 117 rc = rte_eal_hotplug_remove("pci", bdf); 118 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 119 } 120 121 static void 122 detach_rte_cb(void *_dev) 123 { 124 remove_rte_dev(_dev); 125 } 126 127 static void 128 detach_rte(struct spdk_pci_device *dev) 129 { 130 struct rte_pci_device *rte_dev = dev->dev_handle; 131 int i; 132 bool removed; 133 134 if (!spdk_process_is_primary()) { 135 remove_rte_dev(rte_dev); 136 return; 137 } 138 139 pthread_mutex_lock(&g_pci_mutex); 140 dev->internal.attached = false; 141 /* prevent the hotremove notification from removing this device */ 142 dev->internal.pending_removal = true; 143 pthread_mutex_unlock(&g_pci_mutex); 144 145 rte_eal_alarm_set(1, detach_rte_cb, rte_dev); 146 147 /* wait up to 2s for the cb to execute */ 148 for (i = 2000; i > 0; i--) { 149 150 spdk_delay_us(1000); 151 pthread_mutex_lock(&g_pci_mutex); 152 removed = dev->internal.removed; 153 pthread_mutex_unlock(&g_pci_mutex); 154 155 if (removed) { 156 break; 157 } 158 } 159 160 /* besides checking the removed flag, we also need to wait 161 * for the dpdk detach function to unwind, as it's doing some 162 * operations even after calling our detach callback. Simply 163 * cancel the alarm - if it started executing already, this 164 * call will block and wait for it to finish. 165 */ 166 rte_eal_alarm_cancel(detach_rte_cb, rte_dev); 167 168 /* the device could have been finally removed, so just check 169 * it again. 170 */ 171 pthread_mutex_lock(&g_pci_mutex); 172 removed = dev->internal.removed; 173 pthread_mutex_unlock(&g_pci_mutex); 174 if (!removed) { 175 SPDK_ERRLOG("Timeout waiting for DPDK to remove PCI device %s.\n", 176 rte_dev->name); 177 /* If we reach this state, then the device couldn't be removed and most likely 178 a subsequent hot add of a device in the same BDF will fail */ 179 } 180 } 181 182 void 183 spdk_pci_driver_register(const char *name, struct spdk_pci_id *id_table, uint32_t flags) 184 { 185 struct spdk_pci_driver *driver; 186 187 driver = calloc(1, sizeof(*driver)); 188 if (!driver) { 189 /* we can't do any better than bailing atm */ 190 return; 191 } 192 193 driver->name = name; 194 driver->id_table = id_table; 195 driver->drv_flags = flags; 196 TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq); 197 } 198 199 struct spdk_pci_driver * 200 spdk_pci_nvme_get_driver(void) 201 { 202 return spdk_pci_get_driver("nvme"); 203 } 204 205 struct spdk_pci_driver * 206 spdk_pci_get_driver(const char *name) 207 { 208 struct spdk_pci_driver *driver; 209 210 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 211 if (strcmp(driver->name, name) == 0) { 212 return driver; 213 } 214 } 215 216 return NULL; 217 } 218 219 static void 220 pci_device_rte_dev_event(const char *device_name, 221 enum rte_dev_event_type event, 222 void *cb_arg) 223 { 224 struct spdk_pci_device *dev; 225 bool can_detach = false; 226 227 switch (event) { 228 default: 229 case RTE_DEV_EVENT_ADD: 230 /* Nothing to do here yet. */ 231 break; 232 case RTE_DEV_EVENT_REMOVE: 233 pthread_mutex_lock(&g_pci_mutex); 234 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 235 struct rte_pci_device *rte_dev = dev->dev_handle; 236 237 if (strcmp(rte_dev->name, device_name) == 0 && 238 !dev->internal.pending_removal) { 239 can_detach = !dev->internal.attached; 240 /* prevent any further attaches */ 241 dev->internal.pending_removal = true; 242 break; 243 } 244 } 245 pthread_mutex_unlock(&g_pci_mutex); 246 247 if (dev != NULL && can_detach) { 248 /* if device is not attached we can remove it right away. 249 * Otherwise it will be removed at detach. 250 * 251 * Because the user's callback is invoked in eal interrupt 252 * callback, the interrupt callback need to be finished before 253 * it can be unregistered when detaching device. So finish 254 * callback soon and use a deferred removal to detach device 255 * is need. It is a workaround, once the device detaching be 256 * moved into the eal in the future, the deferred removal could 257 * be deleted. 258 */ 259 rte_eal_alarm_set(1, detach_rte_cb, dev->dev_handle); 260 } 261 break; 262 } 263 } 264 265 static void 266 cleanup_pci_devices(void) 267 { 268 struct spdk_pci_device *dev, *tmp; 269 270 pthread_mutex_lock(&g_pci_mutex); 271 /* cleanup removed devices */ 272 TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) { 273 if (!dev->internal.removed) { 274 continue; 275 } 276 277 vtophys_pci_device_removed(dev->dev_handle); 278 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 279 free(dev); 280 } 281 282 /* add newly-attached devices */ 283 TAILQ_FOREACH_SAFE(dev, &g_pci_hotplugged_devices, internal.tailq, tmp) { 284 TAILQ_REMOVE(&g_pci_hotplugged_devices, dev, internal.tailq); 285 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 286 vtophys_pci_device_added(dev->dev_handle); 287 } 288 pthread_mutex_unlock(&g_pci_mutex); 289 } 290 291 static int scan_pci_bus(bool delay_init); 292 293 /* translate spdk_pci_driver to an rte_pci_driver and register it to dpdk */ 294 static int 295 register_rte_driver(struct spdk_pci_driver *driver) 296 { 297 unsigned pci_id_count = 0; 298 struct rte_pci_id *rte_id_table; 299 char *rte_name; 300 size_t rte_name_len; 301 uint32_t rte_flags; 302 303 assert(driver->id_table); 304 while (driver->id_table[pci_id_count].vendor_id) { 305 pci_id_count++; 306 } 307 assert(pci_id_count > 0); 308 309 rte_id_table = calloc(pci_id_count + 1, sizeof(*rte_id_table)); 310 if (!rte_id_table) { 311 return -ENOMEM; 312 } 313 314 while (pci_id_count > 0) { 315 struct rte_pci_id *rte_id = &rte_id_table[pci_id_count - 1]; 316 const struct spdk_pci_id *spdk_id = &driver->id_table[pci_id_count - 1]; 317 318 rte_id->class_id = spdk_id->class_id; 319 rte_id->vendor_id = spdk_id->vendor_id; 320 rte_id->device_id = spdk_id->device_id; 321 rte_id->subsystem_vendor_id = spdk_id->subvendor_id; 322 rte_id->subsystem_device_id = spdk_id->subdevice_id; 323 pci_id_count--; 324 } 325 326 assert(driver->name); 327 rte_name_len = strlen(driver->name) + strlen("spdk_") + 1; 328 rte_name = calloc(rte_name_len, 1); 329 if (!rte_name) { 330 free(rte_id_table); 331 return -ENOMEM; 332 } 333 334 snprintf(rte_name, rte_name_len, "spdk_%s", driver->name); 335 driver->driver.driver.name = rte_name; 336 driver->driver.id_table = rte_id_table; 337 338 rte_flags = 0; 339 if (driver->drv_flags & SPDK_PCI_DRIVER_NEED_MAPPING) { 340 rte_flags |= RTE_PCI_DRV_NEED_MAPPING; 341 } 342 if (driver->drv_flags & SPDK_PCI_DRIVER_WC_ACTIVATE) { 343 rte_flags |= RTE_PCI_DRV_WC_ACTIVATE; 344 } 345 driver->driver.drv_flags = rte_flags; 346 347 driver->driver.probe = pci_device_init; 348 driver->driver.remove = pci_device_fini; 349 350 rte_pci_register(&driver->driver); 351 return 0; 352 } 353 354 static inline void 355 _pci_env_init(void) 356 { 357 /* We assume devices were present on the bus for more than 2 seconds 358 * before initializing SPDK and there's no need to wait more. We scan 359 * the bus, but we don't block any devices. 360 */ 361 scan_pci_bus(false); 362 363 /* Register a single hotremove callback for all devices. */ 364 if (spdk_process_is_primary()) { 365 rte_dev_event_callback_register(NULL, pci_device_rte_dev_event, NULL); 366 } 367 } 368 369 void 370 pci_env_init(void) 371 { 372 struct spdk_pci_driver *driver; 373 374 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 375 register_rte_driver(driver); 376 } 377 378 _pci_env_init(); 379 } 380 381 void 382 pci_env_reinit(void) 383 { 384 /* There is no need to register pci drivers again, since they were 385 * already pre-registered in pci_env_init. 386 */ 387 388 _pci_env_init(); 389 } 390 391 void 392 pci_env_fini(void) 393 { 394 struct spdk_pci_device *dev; 395 char bdf[32]; 396 397 cleanup_pci_devices(); 398 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 399 if (dev->internal.attached) { 400 spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->addr); 401 SPDK_ERRLOG("Device %s is still attached at shutdown!\n", bdf); 402 } 403 } 404 405 if (spdk_process_is_primary()) { 406 rte_dev_event_callback_unregister(NULL, pci_device_rte_dev_event, NULL); 407 } 408 } 409 410 int 411 pci_device_init(struct rte_pci_driver *_drv, 412 struct rte_pci_device *_dev) 413 { 414 struct spdk_pci_driver *driver = (struct spdk_pci_driver *)_drv; 415 struct spdk_pci_device *dev; 416 int rc; 417 418 dev = calloc(1, sizeof(*dev)); 419 if (dev == NULL) { 420 return -1; 421 } 422 423 dev->dev_handle = _dev; 424 425 dev->addr.domain = _dev->addr.domain; 426 dev->addr.bus = _dev->addr.bus; 427 dev->addr.dev = _dev->addr.devid; 428 dev->addr.func = _dev->addr.function; 429 dev->id.class_id = _dev->id.class_id; 430 dev->id.vendor_id = _dev->id.vendor_id; 431 dev->id.device_id = _dev->id.device_id; 432 dev->id.subvendor_id = _dev->id.subsystem_vendor_id; 433 dev->id.subdevice_id = _dev->id.subsystem_device_id; 434 dev->socket_id = _dev->device.numa_node; 435 dev->type = "pci"; 436 437 dev->map_bar = map_bar_rte; 438 dev->unmap_bar = unmap_bar_rte; 439 dev->cfg_read = cfg_read_rte; 440 dev->cfg_write = cfg_write_rte; 441 442 dev->internal.driver = driver; 443 dev->internal.claim_fd = -1; 444 445 if (driver->cb_fn != NULL) { 446 rc = driver->cb_fn(driver->cb_arg, dev); 447 if (rc != 0) { 448 free(dev); 449 return rc; 450 } 451 dev->internal.attached = true; 452 } 453 454 pthread_mutex_lock(&g_pci_mutex); 455 TAILQ_INSERT_TAIL(&g_pci_hotplugged_devices, dev, internal.tailq); 456 pthread_mutex_unlock(&g_pci_mutex); 457 return 0; 458 } 459 460 int 461 pci_device_fini(struct rte_pci_device *_dev) 462 { 463 struct spdk_pci_device *dev; 464 465 pthread_mutex_lock(&g_pci_mutex); 466 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 467 if (dev->dev_handle == _dev) { 468 break; 469 } 470 } 471 472 if (dev == NULL || dev->internal.attached) { 473 /* The device might be still referenced somewhere in SPDK. */ 474 pthread_mutex_unlock(&g_pci_mutex); 475 return -1; 476 } 477 478 /* remove our allowed_at option */ 479 if (_dev->device.devargs) { 480 _dev->device.devargs->data = NULL; 481 } 482 483 assert(!dev->internal.removed); 484 dev->internal.removed = true; 485 pthread_mutex_unlock(&g_pci_mutex); 486 return 0; 487 488 } 489 490 void 491 spdk_pci_device_detach(struct spdk_pci_device *dev) 492 { 493 assert(dev->internal.attached); 494 495 if (dev->internal.claim_fd >= 0) { 496 spdk_pci_device_unclaim(dev); 497 } 498 499 if (strcmp(dev->type, "pci") == 0) { 500 /* if it's a physical device we need to deal with DPDK on 501 * a different process and we can't just unset one flag 502 * here. We also want to stop using any device resources 503 * so that the device isn't "in use" by the userspace driver 504 * once we detach it. This would allow attaching the device 505 * to a different process, or to a kernel driver like nvme. 506 */ 507 detach_rte(dev); 508 } else { 509 dev->internal.attached = false; 510 } 511 512 cleanup_pci_devices(); 513 } 514 515 static int 516 scan_pci_bus(bool delay_init) 517 { 518 struct spdk_pci_driver *driver; 519 struct rte_pci_device *rte_dev; 520 uint64_t now; 521 522 rte_bus_scan(); 523 now = spdk_get_ticks(); 524 525 driver = TAILQ_FIRST(&g_pci_drivers); 526 if (!driver) { 527 return 0; 528 } 529 530 TAILQ_FOREACH(rte_dev, &driver->driver.bus->device_list, next) { 531 struct rte_devargs *da; 532 533 da = rte_dev->device.devargs; 534 if (!da) { 535 char devargs_str[128]; 536 537 /* the device was never blocked or allowed */ 538 da = calloc(1, sizeof(*da)); 539 if (!da) { 540 return -1; 541 } 542 543 snprintf(devargs_str, sizeof(devargs_str), "pci:%s", rte_dev->device.name); 544 if (rte_devargs_parse(da, devargs_str) != 0) { 545 free(da); 546 return -1; 547 } 548 549 rte_devargs_insert(&da); 550 rte_dev->device.devargs = da; 551 } 552 553 if (da->data) { 554 uint64_t allowed_at = (uint64_t)(uintptr_t)da->data; 555 556 /* this device was seen by spdk before... */ 557 if (da->policy == RTE_DEV_BLOCKED && allowed_at <= now) { 558 da->policy = RTE_DEV_ALLOWED; 559 } 560 } else if ((driver->driver.bus->bus.conf.scan_mode == RTE_BUS_SCAN_ALLOWLIST && 561 da->policy == RTE_DEV_ALLOWED) || da->policy != RTE_DEV_BLOCKED) { 562 /* override the policy only if not permanently blocked */ 563 564 if (delay_init) { 565 da->policy = RTE_DEV_BLOCKED; 566 da->data = (void *)(now + 2 * spdk_get_ticks_hz()); 567 } else { 568 da->policy = RTE_DEV_ALLOWED; 569 da->data = (void *)(uintptr_t)now; 570 } 571 } 572 } 573 574 return 0; 575 } 576 577 int 578 spdk_pci_device_attach(struct spdk_pci_driver *driver, 579 spdk_pci_enum_cb enum_cb, 580 void *enum_ctx, struct spdk_pci_addr *pci_address) 581 { 582 struct spdk_pci_device *dev; 583 struct rte_pci_device *rte_dev; 584 struct rte_devargs *da; 585 int rc; 586 char bdf[32]; 587 588 spdk_pci_addr_fmt(bdf, sizeof(bdf), pci_address); 589 590 cleanup_pci_devices(); 591 592 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 593 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 594 break; 595 } 596 } 597 598 if (dev != NULL && dev->internal.driver == driver) { 599 pthread_mutex_lock(&g_pci_mutex); 600 if (dev->internal.attached || dev->internal.pending_removal) { 601 pthread_mutex_unlock(&g_pci_mutex); 602 return -1; 603 } 604 605 rc = enum_cb(enum_ctx, dev); 606 if (rc == 0) { 607 dev->internal.attached = true; 608 } 609 pthread_mutex_unlock(&g_pci_mutex); 610 return rc; 611 } 612 613 driver->cb_fn = enum_cb; 614 driver->cb_arg = enum_ctx; 615 616 int i = 0; 617 618 do { 619 rc = rte_eal_hotplug_add("pci", bdf, ""); 620 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 621 622 if (i > 1 && rc == -EEXIST) { 623 /* Even though the previous request timed out, the device 624 * was attached successfully. 625 */ 626 rc = 0; 627 } 628 629 driver->cb_arg = NULL; 630 driver->cb_fn = NULL; 631 632 cleanup_pci_devices(); 633 634 if (rc != 0) { 635 return -1; 636 } 637 638 /* explicit attach ignores the allowlist, so if we blocked this 639 * device before let's enable it now - just for clarity. 640 */ 641 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 642 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 643 break; 644 } 645 } 646 assert(dev != NULL); 647 648 rte_dev = dev->dev_handle; 649 da = rte_dev->device.devargs; 650 if (da && da->data) { 651 da->data = (void *)(uintptr_t)spdk_get_ticks(); 652 da->policy = RTE_DEV_ALLOWED; 653 } 654 655 return 0; 656 } 657 658 /* Note: You can call spdk_pci_enumerate from more than one thread 659 * simultaneously safely, but you cannot call spdk_pci_enumerate 660 * and rte_eal_pci_probe simultaneously. 661 */ 662 int 663 spdk_pci_enumerate(struct spdk_pci_driver *driver, 664 spdk_pci_enum_cb enum_cb, 665 void *enum_ctx) 666 { 667 struct spdk_pci_device *dev; 668 int rc; 669 670 cleanup_pci_devices(); 671 672 pthread_mutex_lock(&g_pci_mutex); 673 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 674 if (dev->internal.attached || 675 dev->internal.driver != driver || 676 dev->internal.pending_removal) { 677 continue; 678 } 679 680 rc = enum_cb(enum_ctx, dev); 681 if (rc == 0) { 682 dev->internal.attached = true; 683 } else if (rc < 0) { 684 pthread_mutex_unlock(&g_pci_mutex); 685 return -1; 686 } 687 } 688 pthread_mutex_unlock(&g_pci_mutex); 689 690 if (scan_pci_bus(true) != 0) { 691 return -1; 692 } 693 694 driver->cb_fn = enum_cb; 695 driver->cb_arg = enum_ctx; 696 697 if (rte_bus_probe() != 0) { 698 driver->cb_arg = NULL; 699 driver->cb_fn = NULL; 700 return -1; 701 } 702 703 driver->cb_arg = NULL; 704 driver->cb_fn = NULL; 705 706 cleanup_pci_devices(); 707 return 0; 708 } 709 710 struct spdk_pci_device * 711 spdk_pci_get_first_device(void) 712 { 713 return TAILQ_FIRST(&g_pci_devices); 714 } 715 716 struct spdk_pci_device * 717 spdk_pci_get_next_device(struct spdk_pci_device *prev) 718 { 719 return TAILQ_NEXT(prev, internal.tailq); 720 } 721 722 int 723 spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar, 724 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 725 { 726 return dev->map_bar(dev, bar, mapped_addr, phys_addr, size); 727 } 728 729 int 730 spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr) 731 { 732 return dev->unmap_bar(dev, bar, addr); 733 } 734 735 uint32_t 736 spdk_pci_device_get_domain(struct spdk_pci_device *dev) 737 { 738 return dev->addr.domain; 739 } 740 741 uint8_t 742 spdk_pci_device_get_bus(struct spdk_pci_device *dev) 743 { 744 return dev->addr.bus; 745 } 746 747 uint8_t 748 spdk_pci_device_get_dev(struct spdk_pci_device *dev) 749 { 750 return dev->addr.dev; 751 } 752 753 uint8_t 754 spdk_pci_device_get_func(struct spdk_pci_device *dev) 755 { 756 return dev->addr.func; 757 } 758 759 uint16_t 760 spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev) 761 { 762 return dev->id.vendor_id; 763 } 764 765 uint16_t 766 spdk_pci_device_get_device_id(struct spdk_pci_device *dev) 767 { 768 return dev->id.device_id; 769 } 770 771 uint16_t 772 spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev) 773 { 774 return dev->id.subvendor_id; 775 } 776 777 uint16_t 778 spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev) 779 { 780 return dev->id.subdevice_id; 781 } 782 783 struct spdk_pci_id 784 spdk_pci_device_get_id(struct spdk_pci_device *dev) 785 { 786 return dev->id; 787 } 788 789 int 790 spdk_pci_device_get_socket_id(struct spdk_pci_device *dev) 791 { 792 return dev->socket_id; 793 } 794 795 int 796 spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 797 { 798 return dev->cfg_read(dev, value, len, offset); 799 } 800 801 int 802 spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 803 { 804 return dev->cfg_write(dev, value, len, offset); 805 } 806 807 int 808 spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset) 809 { 810 return spdk_pci_device_cfg_read(dev, value, 1, offset); 811 } 812 813 int 814 spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset) 815 { 816 return spdk_pci_device_cfg_write(dev, &value, 1, offset); 817 } 818 819 int 820 spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset) 821 { 822 return spdk_pci_device_cfg_read(dev, value, 2, offset); 823 } 824 825 int 826 spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset) 827 { 828 return spdk_pci_device_cfg_write(dev, &value, 2, offset); 829 } 830 831 int 832 spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset) 833 { 834 return spdk_pci_device_cfg_read(dev, value, 4, offset); 835 } 836 837 int 838 spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset) 839 { 840 return spdk_pci_device_cfg_write(dev, &value, 4, offset); 841 } 842 843 int 844 spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len) 845 { 846 int err; 847 uint32_t pos, header = 0; 848 uint32_t i, buf[2]; 849 850 if (len < 17) { 851 return -1; 852 } 853 854 err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE); 855 if (err || !header) { 856 return -1; 857 } 858 859 pos = PCI_CFG_SIZE; 860 while (1) { 861 if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) { 862 if (pos) { 863 /* skip the header */ 864 pos += 4; 865 for (i = 0; i < 2; i++) { 866 err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i); 867 if (err) { 868 return -1; 869 } 870 } 871 snprintf(sn, len, "%08x%08x", buf[1], buf[0]); 872 return 0; 873 } 874 } 875 pos = (header >> 20) & 0xffc; 876 /* 0 if no other items exist */ 877 if (pos < PCI_CFG_SIZE) { 878 return -1; 879 } 880 err = spdk_pci_device_cfg_read32(dev, &header, pos); 881 if (err) { 882 return -1; 883 } 884 } 885 return -1; 886 } 887 888 struct spdk_pci_addr 889 spdk_pci_device_get_addr(struct spdk_pci_device *dev) 890 { 891 return dev->addr; 892 } 893 894 bool 895 spdk_pci_device_is_removed(struct spdk_pci_device *dev) 896 { 897 return dev->internal.pending_removal; 898 } 899 900 int 901 spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2) 902 { 903 if (a1->domain > a2->domain) { 904 return 1; 905 } else if (a1->domain < a2->domain) { 906 return -1; 907 } else if (a1->bus > a2->bus) { 908 return 1; 909 } else if (a1->bus < a2->bus) { 910 return -1; 911 } else if (a1->dev > a2->dev) { 912 return 1; 913 } else if (a1->dev < a2->dev) { 914 return -1; 915 } else if (a1->func > a2->func) { 916 return 1; 917 } else if (a1->func < a2->func) { 918 return -1; 919 } 920 921 return 0; 922 } 923 924 #ifdef __linux__ 925 int 926 spdk_pci_device_claim(struct spdk_pci_device *dev) 927 { 928 int dev_fd; 929 char dev_name[64]; 930 int pid; 931 void *dev_map; 932 struct flock pcidev_lock = { 933 .l_type = F_WRLCK, 934 .l_whence = SEEK_SET, 935 .l_start = 0, 936 .l_len = 0, 937 }; 938 939 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 940 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 941 942 dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 943 if (dev_fd == -1) { 944 SPDK_ERRLOG("could not open %s\n", dev_name); 945 return -errno; 946 } 947 948 if (ftruncate(dev_fd, sizeof(int)) != 0) { 949 SPDK_ERRLOG("could not truncate %s\n", dev_name); 950 close(dev_fd); 951 return -errno; 952 } 953 954 dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, 955 MAP_SHARED, dev_fd, 0); 956 if (dev_map == MAP_FAILED) { 957 SPDK_ERRLOG("could not mmap dev %s (%d)\n", dev_name, errno); 958 close(dev_fd); 959 return -errno; 960 } 961 962 if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) { 963 pid = *(int *)dev_map; 964 SPDK_ERRLOG("Cannot create lock on device %s, probably" 965 " process %d has claimed it\n", dev_name, pid); 966 munmap(dev_map, sizeof(int)); 967 close(dev_fd); 968 /* F_SETLK returns unspecified errnos, normalize them */ 969 return -EACCES; 970 } 971 972 *(int *)dev_map = (int)getpid(); 973 munmap(dev_map, sizeof(int)); 974 dev->internal.claim_fd = dev_fd; 975 /* Keep dev_fd open to maintain the lock. */ 976 return 0; 977 } 978 979 void 980 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 981 { 982 char dev_name[64]; 983 984 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 985 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 986 987 close(dev->internal.claim_fd); 988 dev->internal.claim_fd = -1; 989 unlink(dev_name); 990 } 991 #else /* !__linux__ */ 992 int 993 spdk_pci_device_claim(struct spdk_pci_device *dev) 994 { 995 /* TODO */ 996 return 0; 997 } 998 999 void 1000 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 1001 { 1002 /* TODO */ 1003 } 1004 #endif /* __linux__ */ 1005 1006 int 1007 spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf) 1008 { 1009 unsigned domain, bus, dev, func; 1010 1011 if (addr == NULL || bdf == NULL) { 1012 return -EINVAL; 1013 } 1014 1015 if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) || 1016 (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) { 1017 /* Matched a full address - all variables are initialized */ 1018 } else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) { 1019 func = 0; 1020 } else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) || 1021 (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) { 1022 domain = 0; 1023 } else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) || 1024 (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) { 1025 domain = 0; 1026 func = 0; 1027 } else { 1028 return -EINVAL; 1029 } 1030 1031 if (bus > 0xFF || dev > 0x1F || func > 7) { 1032 return -EINVAL; 1033 } 1034 1035 addr->domain = domain; 1036 addr->bus = bus; 1037 addr->dev = dev; 1038 addr->func = func; 1039 1040 return 0; 1041 } 1042 1043 int 1044 spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr) 1045 { 1046 int rc; 1047 1048 rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x", 1049 addr->domain, addr->bus, 1050 addr->dev, addr->func); 1051 1052 if (rc > 0 && (size_t)rc < sz) { 1053 return 0; 1054 } 1055 1056 return -1; 1057 } 1058 1059 void 1060 spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev) 1061 { 1062 assert(dev->map_bar != NULL); 1063 assert(dev->unmap_bar != NULL); 1064 assert(dev->cfg_read != NULL); 1065 assert(dev->cfg_write != NULL); 1066 dev->internal.driver = drv; 1067 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 1068 } 1069 1070 void 1071 spdk_pci_unhook_device(struct spdk_pci_device *dev) 1072 { 1073 assert(!dev->internal.attached); 1074 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 1075 } 1076 1077 const char * 1078 spdk_pci_device_get_type(const struct spdk_pci_device *dev) 1079 { 1080 return dev->type; 1081 } 1082 1083 int 1084 spdk_pci_device_allow(struct spdk_pci_addr *pci_addr) 1085 { 1086 struct rte_devargs *da; 1087 char devargs_str[128]; 1088 1089 da = calloc(1, sizeof(*da)); 1090 if (da == NULL) { 1091 SPDK_ERRLOG("could not allocate rte_devargs\n"); 1092 return -ENOMEM; 1093 } 1094 1095 snprintf(devargs_str, sizeof(devargs_str), "pci:%04x:%02x:%02x.%x", 1096 pci_addr->domain, pci_addr->bus, pci_addr->dev, pci_addr->func); 1097 if (rte_devargs_parse(da, devargs_str) != 0) { 1098 SPDK_ERRLOG("rte_devargs_parse() failed on '%s'\n", devargs_str); 1099 free(da); 1100 return -EINVAL; 1101 } 1102 da->policy = RTE_DEV_ALLOWED; 1103 /* Note: if a devargs already exists for this device address, it just gets 1104 * overridden. So we do not need to check if the devargs already exists. 1105 * DPDK will take care of memory management for the devargs structure after 1106 * it has been inserted, so there's nothing SPDK needs to track. 1107 */ 1108 if (rte_devargs_insert(&da) != 0) { 1109 SPDK_ERRLOG("rte_devargs_insert() failed on '%s'\n", devargs_str); 1110 free(da); 1111 return -EINVAL; 1112 } 1113 1114 return 0; 1115 } 1116