1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2015 Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "env_internal.h" 7 #include "pci_dpdk.h" 8 9 #include <rte_alarm.h> 10 #include <rte_devargs.h> 11 #include <rte_pci.h> 12 #include "spdk/env.h" 13 #include "spdk/log.h" 14 #include "spdk/string.h" 15 #include "spdk/memory.h" 16 17 #define SYSFS_PCI_DRIVERS "/sys/bus/pci/drivers" 18 19 /* Compatibility for versions < 20.11 */ 20 #if RTE_VERSION < RTE_VERSION_NUM(20, 11, 0, 0) 21 #define RTE_DEV_ALLOWED RTE_DEV_WHITELISTED 22 #define RTE_DEV_BLOCKED RTE_DEV_BLACKLISTED 23 #define RTE_BUS_SCAN_ALLOWLIST RTE_BUS_SCAN_WHITELIST 24 #endif 25 26 #define PCI_CFG_SIZE 256 27 #define PCI_EXT_CAP_ID_SN 0x03 28 29 /* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time 30 * might cause the internal IPC to misbehave. Just retry in such case. 31 */ 32 #define DPDK_HOTPLUG_RETRY_COUNT 4 33 34 /* DPDK alarm/interrupt thread */ 35 static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER; 36 static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices); 37 /* devices hotplugged on a dpdk thread */ 38 static TAILQ_HEAD(, spdk_pci_device) g_pci_hotplugged_devices = 39 TAILQ_HEAD_INITIALIZER(g_pci_hotplugged_devices); 40 static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers); 41 static TAILQ_HEAD(, spdk_pci_device_provider) g_pci_device_providers = 42 TAILQ_HEAD_INITIALIZER(g_pci_device_providers); 43 44 int pci_device_init(struct rte_pci_driver *driver, struct rte_pci_device *device); 45 int pci_device_fini(struct rte_pci_device *device); 46 47 struct env_devargs { 48 struct rte_bus *bus; 49 char name[128]; 50 uint64_t allowed_at; 51 TAILQ_ENTRY(env_devargs) link; 52 }; 53 static TAILQ_HEAD(, env_devargs) g_env_devargs = TAILQ_HEAD_INITIALIZER(g_env_devargs); 54 55 static struct env_devargs * 56 find_env_devargs(struct rte_bus *bus, const char *name) 57 { 58 struct env_devargs *da; 59 60 TAILQ_FOREACH(da, &g_env_devargs, link) { 61 if (bus == da->bus && !strcmp(name, da->name)) { 62 return da; 63 } 64 } 65 66 return NULL; 67 } 68 69 static int 70 map_bar_rte(struct spdk_pci_device *device, uint32_t bar, 71 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 72 { 73 struct rte_mem_resource *res; 74 75 res = dpdk_pci_device_get_mem_resource(device->dev_handle, bar); 76 *mapped_addr = res->addr; 77 *phys_addr = (uint64_t)res->phys_addr; 78 *size = (uint64_t)res->len; 79 80 return 0; 81 } 82 83 static int 84 unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr) 85 { 86 return 0; 87 } 88 89 static int 90 cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 91 { 92 return dpdk_pci_device_read_config(dev->dev_handle, value, len, offset); 93 } 94 95 static int 96 cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 97 { 98 return dpdk_pci_device_write_config(dev->dev_handle, value, len, offset); 99 } 100 101 static void 102 remove_rte_dev(struct rte_pci_device *rte_dev) 103 { 104 char bdf[32]; 105 int i = 0, rc; 106 107 snprintf(bdf, sizeof(bdf), "%s", dpdk_pci_device_get_name(rte_dev)); 108 do { 109 rc = rte_eal_hotplug_remove("pci", bdf); 110 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 111 } 112 113 static void 114 detach_rte_cb(void *_dev) 115 { 116 remove_rte_dev(_dev); 117 } 118 119 /* if it's a physical device we need to deal with DPDK on 120 * a different process and we can't just unset one flag 121 * here. We also want to stop using any device resources 122 * so that the device isn't "in use" by the userspace driver 123 * once we detach it. This would allow attaching the device 124 * to a different process, or to a kernel driver like nvme. 125 */ 126 static void 127 detach_rte(struct spdk_pci_device *dev) 128 { 129 struct rte_pci_device *rte_dev = dev->dev_handle; 130 int i; 131 bool removed; 132 133 if (!spdk_process_is_primary()) { 134 remove_rte_dev(rte_dev); 135 return; 136 } 137 138 pthread_mutex_lock(&g_pci_mutex); 139 dev->internal.attached = false; 140 /* prevent the hotremove notification from removing this device */ 141 dev->internal.pending_removal = true; 142 pthread_mutex_unlock(&g_pci_mutex); 143 144 rte_eal_alarm_set(1, detach_rte_cb, rte_dev); 145 146 /* wait up to 2s for the cb to execute */ 147 for (i = 2000; i > 0; i--) { 148 149 spdk_delay_us(1000); 150 pthread_mutex_lock(&g_pci_mutex); 151 removed = dev->internal.removed; 152 pthread_mutex_unlock(&g_pci_mutex); 153 154 if (removed) { 155 break; 156 } 157 } 158 159 /* besides checking the removed flag, we also need to wait 160 * for the dpdk detach function to unwind, as it's doing some 161 * operations even after calling our detach callback. Simply 162 * cancel the alarm - if it started executing already, this 163 * call will block and wait for it to finish. 164 */ 165 rte_eal_alarm_cancel(detach_rte_cb, rte_dev); 166 167 /* the device could have been finally removed, so just check 168 * it again. 169 */ 170 pthread_mutex_lock(&g_pci_mutex); 171 removed = dev->internal.removed; 172 pthread_mutex_unlock(&g_pci_mutex); 173 if (!removed) { 174 SPDK_ERRLOG("Timeout waiting for DPDK to remove PCI device %s.\n", 175 dpdk_pci_device_get_name(rte_dev)); 176 /* If we reach this state, then the device couldn't be removed and most likely 177 a subsequent hot add of a device in the same BDF will fail */ 178 } 179 } 180 181 void 182 spdk_pci_driver_register(const char *name, struct spdk_pci_id *id_table, uint32_t flags) 183 { 184 struct spdk_pci_driver *driver; 185 186 driver = calloc(1, sizeof(*driver)); 187 if (!driver) { 188 /* we can't do any better than bailing atm */ 189 return; 190 } 191 192 driver->name = name; 193 driver->id_table = id_table; 194 driver->drv_flags = flags; 195 driver->driver = (struct rte_pci_driver *)driver->driver_buf; 196 TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq); 197 } 198 199 struct spdk_pci_driver * 200 spdk_pci_nvme_get_driver(void) 201 { 202 return spdk_pci_get_driver("nvme"); 203 } 204 205 struct spdk_pci_driver * 206 spdk_pci_get_driver(const char *name) 207 { 208 struct spdk_pci_driver *driver; 209 210 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 211 if (strcmp(driver->name, name) == 0) { 212 return driver; 213 } 214 } 215 216 return NULL; 217 } 218 219 static void 220 pci_device_rte_dev_event(const char *device_name, 221 enum rte_dev_event_type event, 222 void *cb_arg) 223 { 224 struct spdk_pci_device *dev; 225 bool can_detach = false; 226 227 switch (event) { 228 default: 229 case RTE_DEV_EVENT_ADD: 230 /* Nothing to do here yet. */ 231 break; 232 case RTE_DEV_EVENT_REMOVE: 233 pthread_mutex_lock(&g_pci_mutex); 234 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 235 struct rte_pci_device *rte_dev = dev->dev_handle; 236 237 if (strcmp(dpdk_pci_device_get_name(rte_dev), device_name) == 0 && 238 !dev->internal.pending_removal) { 239 can_detach = !dev->internal.attached; 240 /* prevent any further attaches */ 241 dev->internal.pending_removal = true; 242 break; 243 } 244 } 245 pthread_mutex_unlock(&g_pci_mutex); 246 247 if (dev != NULL && can_detach) { 248 /* if device is not attached we can remove it right away. 249 * Otherwise it will be removed at detach. 250 * 251 * Because the user's callback is invoked in eal interrupt 252 * callback, the interrupt callback need to be finished before 253 * it can be unregistered when detaching device. So finish 254 * callback soon and use a deferred removal to detach device 255 * is need. It is a workaround, once the device detaching be 256 * moved into the eal in the future, the deferred removal could 257 * be deleted. 258 */ 259 rte_eal_alarm_set(1, detach_rte_cb, dev->dev_handle); 260 } 261 break; 262 } 263 } 264 265 static void 266 cleanup_pci_devices(void) 267 { 268 struct spdk_pci_device *dev, *tmp; 269 270 pthread_mutex_lock(&g_pci_mutex); 271 /* cleanup removed devices */ 272 TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) { 273 if (!dev->internal.removed) { 274 continue; 275 } 276 277 vtophys_pci_device_removed(dev->dev_handle); 278 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 279 free(dev); 280 } 281 282 /* add newly-attached devices */ 283 TAILQ_FOREACH_SAFE(dev, &g_pci_hotplugged_devices, internal.tailq, tmp) { 284 TAILQ_REMOVE(&g_pci_hotplugged_devices, dev, internal.tailq); 285 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 286 vtophys_pci_device_added(dev->dev_handle); 287 } 288 pthread_mutex_unlock(&g_pci_mutex); 289 } 290 291 static int scan_pci_bus(bool delay_init); 292 293 static inline void 294 _pci_env_init(void) 295 { 296 /* We assume devices were present on the bus for more than 2 seconds 297 * before initializing SPDK and there's no need to wait more. We scan 298 * the bus, but we don't block any devices. 299 */ 300 scan_pci_bus(false); 301 302 /* Register a single hotremove callback for all devices. */ 303 if (spdk_process_is_primary()) { 304 rte_dev_event_callback_register(NULL, pci_device_rte_dev_event, NULL); 305 } 306 } 307 308 int 309 pci_env_init(void) 310 { 311 struct spdk_pci_driver *driver; 312 int rc; 313 314 rc = dpdk_pci_init(); 315 if (rc) { 316 return rc; 317 } 318 319 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 320 dpdk_pci_driver_register(driver, pci_device_init, pci_device_fini); 321 } 322 323 _pci_env_init(); 324 return 0; 325 } 326 327 void 328 pci_env_reinit(void) 329 { 330 /* There is no need to register pci drivers again, since they were 331 * already pre-registered in pci_env_init. 332 */ 333 334 _pci_env_init(); 335 } 336 337 void 338 pci_env_fini(void) 339 { 340 struct spdk_pci_device *dev; 341 char bdf[32]; 342 343 cleanup_pci_devices(); 344 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 345 if (dev->internal.attached) { 346 spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->addr); 347 SPDK_ERRLOG("Device %s is still attached at shutdown!\n", bdf); 348 } 349 } 350 351 if (spdk_process_is_primary()) { 352 rte_dev_event_callback_unregister(NULL, pci_device_rte_dev_event, NULL); 353 } 354 } 355 356 int 357 pci_device_init(struct rte_pci_driver *_drv, 358 struct rte_pci_device *_dev) 359 { 360 struct spdk_pci_driver *driver = (struct spdk_pci_driver *)_drv; 361 struct spdk_pci_device *dev; 362 struct rte_pci_addr *addr; 363 struct rte_pci_id *id; 364 int rc; 365 366 dev = calloc(1, sizeof(*dev)); 367 if (dev == NULL) { 368 return -1; 369 } 370 371 dev->dev_handle = _dev; 372 373 addr = dpdk_pci_device_get_addr(_dev); 374 dev->addr.domain = addr->domain; 375 dev->addr.bus = addr->bus; 376 dev->addr.dev = addr->devid; 377 dev->addr.func = addr->function; 378 379 id = dpdk_pci_device_get_id(_dev); 380 dev->id.class_id = id->class_id; 381 dev->id.vendor_id = id->vendor_id; 382 dev->id.device_id = id->device_id; 383 dev->id.subvendor_id = id->subsystem_vendor_id; 384 dev->id.subdevice_id = id->subsystem_device_id; 385 386 dev->socket_id = dpdk_pci_device_get_numa_node(_dev); 387 dev->type = "pci"; 388 389 dev->map_bar = map_bar_rte; 390 dev->unmap_bar = unmap_bar_rte; 391 dev->cfg_read = cfg_read_rte; 392 dev->cfg_write = cfg_write_rte; 393 394 dev->internal.driver = driver; 395 dev->internal.claim_fd = -1; 396 397 if (driver->cb_fn != NULL) { 398 rc = driver->cb_fn(driver->cb_arg, dev); 399 if (rc != 0) { 400 free(dev); 401 return rc; 402 } 403 dev->internal.attached = true; 404 } 405 406 pthread_mutex_lock(&g_pci_mutex); 407 TAILQ_INSERT_TAIL(&g_pci_hotplugged_devices, dev, internal.tailq); 408 pthread_mutex_unlock(&g_pci_mutex); 409 return 0; 410 } 411 412 static void 413 set_allowed_at(struct rte_devargs *rte_da, uint64_t tsc) 414 { 415 struct env_devargs *env_da; 416 417 env_da = find_env_devargs(rte_da->bus, rte_da->name); 418 if (env_da == NULL) { 419 env_da = calloc(1, sizeof(*env_da)); 420 if (env_da == NULL) { 421 SPDK_ERRLOG("could not set_allowed_at for device %s\n", rte_da->name); 422 return; 423 } 424 env_da->bus = rte_da->bus; 425 spdk_strcpy_pad(env_da->name, rte_da->name, sizeof(env_da->name), 0); 426 TAILQ_INSERT_TAIL(&g_env_devargs, env_da, link); 427 } 428 429 env_da->allowed_at = tsc; 430 } 431 432 static uint64_t 433 get_allowed_at(struct rte_devargs *rte_da) 434 { 435 struct env_devargs *env_da; 436 437 env_da = find_env_devargs(rte_da->bus, rte_da->name); 438 if (env_da) { 439 return env_da->allowed_at; 440 } else { 441 return 0; 442 } 443 } 444 445 int 446 pci_device_fini(struct rte_pci_device *_dev) 447 { 448 struct spdk_pci_device *dev; 449 450 pthread_mutex_lock(&g_pci_mutex); 451 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 452 if (dev->dev_handle == _dev) { 453 break; 454 } 455 } 456 457 if (dev == NULL || dev->internal.attached) { 458 /* The device might be still referenced somewhere in SPDK. */ 459 pthread_mutex_unlock(&g_pci_mutex); 460 return -EBUSY; 461 } 462 463 /* remove our allowed_at option */ 464 if (dpdk_pci_device_get_devargs(_dev)) { 465 set_allowed_at(dpdk_pci_device_get_devargs(_dev), 0); 466 } 467 468 /* It is possible that removed flag was already set when there is a race 469 * between the remove notification for this process, and another process 470 * that is also detaching from this same device (for example, when using 471 * nvme driver in multi-process mode. So do not assert here. See 472 * #2456 for additional details. 473 */ 474 dev->internal.removed = true; 475 pthread_mutex_unlock(&g_pci_mutex); 476 return 0; 477 478 } 479 480 void 481 spdk_pci_device_detach(struct spdk_pci_device *dev) 482 { 483 struct spdk_pci_device_provider *provider; 484 485 assert(dev->internal.attached); 486 487 if (dev->internal.claim_fd >= 0) { 488 spdk_pci_device_unclaim(dev); 489 } 490 491 TAILQ_FOREACH(provider, &g_pci_device_providers, tailq) { 492 if (strcmp(dev->type, provider->name) == 0) { 493 break; 494 } 495 } 496 497 assert(provider != NULL); 498 dev->internal.attached = false; 499 provider->detach_cb(dev); 500 501 cleanup_pci_devices(); 502 } 503 504 static int 505 scan_pci_bus(bool delay_init) 506 { 507 struct rte_dev_iterator it; 508 struct rte_device *rte_dev; 509 uint64_t now; 510 511 dpdk_bus_scan(); 512 now = spdk_get_ticks(); 513 514 if (!TAILQ_FIRST(&g_pci_drivers)) { 515 return 0; 516 } 517 518 RTE_DEV_FOREACH(rte_dev, "bus=pci", &it) { 519 struct rte_devargs *da; 520 521 da = dpdk_device_get_devargs(rte_dev); 522 if (!da) { 523 char devargs_str[128]; 524 525 /* the device was never blocked or allowed */ 526 da = calloc(1, sizeof(*da)); 527 if (!da) { 528 return -1; 529 } 530 531 snprintf(devargs_str, sizeof(devargs_str), "pci:%s", dpdk_device_get_name(rte_dev)); 532 if (rte_devargs_parse(da, devargs_str) != 0) { 533 free(da); 534 return -1; 535 } 536 537 rte_devargs_insert(&da); 538 dpdk_device_set_devargs(rte_dev, da); 539 } 540 541 if (get_allowed_at(da)) { 542 uint64_t allowed_at = get_allowed_at(da); 543 544 /* this device was seen by spdk before... */ 545 if (da->policy == RTE_DEV_BLOCKED && allowed_at <= now) { 546 da->policy = RTE_DEV_ALLOWED; 547 } 548 } else if ((dpdk_device_scan_allowed(rte_dev) && da->policy == RTE_DEV_ALLOWED) || 549 da->policy != RTE_DEV_BLOCKED) { 550 /* override the policy only if not permanently blocked */ 551 552 if (delay_init) { 553 da->policy = RTE_DEV_BLOCKED; 554 set_allowed_at(da, now + 2 * spdk_get_ticks_hz()); 555 } else { 556 da->policy = RTE_DEV_ALLOWED; 557 set_allowed_at(da, now); 558 } 559 } 560 } 561 562 return 0; 563 } 564 565 static int 566 pci_attach_rte(const struct spdk_pci_addr *addr) 567 { 568 char bdf[32]; 569 int rc, i = 0; 570 571 spdk_pci_addr_fmt(bdf, sizeof(bdf), addr); 572 573 do { 574 rc = rte_eal_hotplug_add("pci", bdf, ""); 575 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 576 577 if (i > 1 && rc == -EEXIST) { 578 /* Even though the previous request timed out, the device 579 * was attached successfully. 580 */ 581 rc = 0; 582 } 583 584 return rc; 585 } 586 587 static struct spdk_pci_device_provider g_pci_rte_provider = { 588 .name = "pci", 589 .attach_cb = pci_attach_rte, 590 .detach_cb = detach_rte, 591 }; 592 593 SPDK_PCI_REGISTER_DEVICE_PROVIDER(pci, &g_pci_rte_provider); 594 595 int 596 spdk_pci_device_attach(struct spdk_pci_driver *driver, 597 spdk_pci_enum_cb enum_cb, 598 void *enum_ctx, struct spdk_pci_addr *pci_address) 599 { 600 struct spdk_pci_device *dev; 601 struct spdk_pci_device_provider *provider; 602 struct rte_pci_device *rte_dev; 603 struct rte_devargs *da; 604 int rc; 605 606 cleanup_pci_devices(); 607 608 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 609 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 610 break; 611 } 612 } 613 614 if (dev != NULL && dev->internal.driver == driver) { 615 pthread_mutex_lock(&g_pci_mutex); 616 if (dev->internal.attached || dev->internal.pending_removal) { 617 pthread_mutex_unlock(&g_pci_mutex); 618 return -1; 619 } 620 621 rc = enum_cb(enum_ctx, dev); 622 if (rc == 0) { 623 dev->internal.attached = true; 624 } 625 pthread_mutex_unlock(&g_pci_mutex); 626 return rc; 627 } 628 629 driver->cb_fn = enum_cb; 630 driver->cb_arg = enum_ctx; 631 632 rc = -ENODEV; 633 TAILQ_FOREACH(provider, &g_pci_device_providers, tailq) { 634 rc = provider->attach_cb(pci_address); 635 if (rc == 0) { 636 break; 637 } 638 } 639 640 driver->cb_arg = NULL; 641 driver->cb_fn = NULL; 642 643 cleanup_pci_devices(); 644 645 if (rc != 0) { 646 return -1; 647 } 648 649 /* explicit attach ignores the allowlist, so if we blocked this 650 * device before let's enable it now - just for clarity. 651 */ 652 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 653 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 654 break; 655 } 656 } 657 assert(dev != NULL); 658 659 rte_dev = dev->dev_handle; 660 if (rte_dev != NULL) { 661 da = dpdk_pci_device_get_devargs(rte_dev); 662 if (da && get_allowed_at(da)) { 663 set_allowed_at(da, spdk_get_ticks()); 664 da->policy = RTE_DEV_ALLOWED; 665 } 666 } 667 668 return 0; 669 } 670 671 /* Note: You can call spdk_pci_enumerate from more than one thread 672 * simultaneously safely, but you cannot call spdk_pci_enumerate 673 * and rte_eal_pci_probe simultaneously. 674 */ 675 int 676 spdk_pci_enumerate(struct spdk_pci_driver *driver, 677 spdk_pci_enum_cb enum_cb, 678 void *enum_ctx) 679 { 680 struct spdk_pci_device *dev; 681 int rc; 682 683 cleanup_pci_devices(); 684 685 pthread_mutex_lock(&g_pci_mutex); 686 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 687 if (dev->internal.attached || 688 dev->internal.driver != driver || 689 dev->internal.pending_removal) { 690 continue; 691 } 692 693 rc = enum_cb(enum_ctx, dev); 694 if (rc == 0) { 695 dev->internal.attached = true; 696 } else if (rc < 0) { 697 pthread_mutex_unlock(&g_pci_mutex); 698 return -1; 699 } 700 } 701 pthread_mutex_unlock(&g_pci_mutex); 702 703 if (scan_pci_bus(true) != 0) { 704 return -1; 705 } 706 707 driver->cb_fn = enum_cb; 708 driver->cb_arg = enum_ctx; 709 710 if (dpdk_bus_probe() != 0) { 711 driver->cb_arg = NULL; 712 driver->cb_fn = NULL; 713 return -1; 714 } 715 716 driver->cb_arg = NULL; 717 driver->cb_fn = NULL; 718 719 cleanup_pci_devices(); 720 return 0; 721 } 722 723 void 724 spdk_pci_for_each_device(void *ctx, void (*fn)(void *ctx, struct spdk_pci_device *dev)) 725 { 726 struct spdk_pci_device *dev, *tmp; 727 728 pthread_mutex_lock(&g_pci_mutex); 729 TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) { 730 fn(ctx, dev); 731 } 732 pthread_mutex_unlock(&g_pci_mutex); 733 } 734 735 int 736 spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar, 737 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 738 { 739 int rc; 740 741 rc = dev->map_bar(dev, bar, mapped_addr, phys_addr, size); 742 if (rc) { 743 return rc; 744 } 745 746 #if VFIO_ENABLED 747 /* Automatically map the BAR to the IOMMU */ 748 if (!spdk_iommu_is_enabled()) { 749 return 0; 750 } 751 752 if (rte_eal_iova_mode() == RTE_IOVA_VA) { 753 /* We'll use the virtual address as the iova to match DPDK. */ 754 rc = vtophys_iommu_map_dma_bar((uint64_t)(*mapped_addr), (uint64_t) * mapped_addr, *size); 755 if (rc) { 756 dev->unmap_bar(dev, bar, *mapped_addr); 757 return -EFAULT; 758 } 759 760 *phys_addr = (uint64_t)(*mapped_addr); 761 } else { 762 /* We'll use the physical address as the iova to match DPDK. */ 763 rc = vtophys_iommu_map_dma_bar((uint64_t)(*mapped_addr), *phys_addr, *size); 764 if (rc) { 765 dev->unmap_bar(dev, bar, *mapped_addr); 766 return -EFAULT; 767 } 768 } 769 #endif 770 return rc; 771 } 772 773 int 774 spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr) 775 { 776 #if VFIO_ENABLED 777 int rc; 778 779 if (spdk_iommu_is_enabled()) { 780 rc = vtophys_iommu_unmap_dma_bar((uint64_t)addr); 781 if (rc) { 782 return -EFAULT; 783 } 784 } 785 #endif 786 787 return dev->unmap_bar(dev, bar, addr); 788 } 789 790 int 791 spdk_pci_device_enable_interrupt(struct spdk_pci_device *dev) 792 { 793 return dpdk_pci_device_enable_interrupt(dev->dev_handle); 794 } 795 796 int 797 spdk_pci_device_disable_interrupt(struct spdk_pci_device *dev) 798 { 799 return dpdk_pci_device_disable_interrupt(dev->dev_handle); 800 } 801 802 int 803 spdk_pci_device_get_interrupt_efd(struct spdk_pci_device *dev) 804 { 805 return dpdk_pci_device_get_interrupt_efd(dev->dev_handle); 806 } 807 808 uint32_t 809 spdk_pci_device_get_domain(struct spdk_pci_device *dev) 810 { 811 return dev->addr.domain; 812 } 813 814 uint8_t 815 spdk_pci_device_get_bus(struct spdk_pci_device *dev) 816 { 817 return dev->addr.bus; 818 } 819 820 uint8_t 821 spdk_pci_device_get_dev(struct spdk_pci_device *dev) 822 { 823 return dev->addr.dev; 824 } 825 826 uint8_t 827 spdk_pci_device_get_func(struct spdk_pci_device *dev) 828 { 829 return dev->addr.func; 830 } 831 832 uint16_t 833 spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev) 834 { 835 return dev->id.vendor_id; 836 } 837 838 uint16_t 839 spdk_pci_device_get_device_id(struct spdk_pci_device *dev) 840 { 841 return dev->id.device_id; 842 } 843 844 uint16_t 845 spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev) 846 { 847 return dev->id.subvendor_id; 848 } 849 850 uint16_t 851 spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev) 852 { 853 return dev->id.subdevice_id; 854 } 855 856 struct spdk_pci_id 857 spdk_pci_device_get_id(struct spdk_pci_device *dev) 858 { 859 return dev->id; 860 } 861 862 int 863 spdk_pci_device_get_socket_id(struct spdk_pci_device *dev) 864 { 865 return dev->socket_id; 866 } 867 868 int 869 spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 870 { 871 return dev->cfg_read(dev, value, len, offset); 872 } 873 874 int 875 spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 876 { 877 return dev->cfg_write(dev, value, len, offset); 878 } 879 880 int 881 spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset) 882 { 883 return spdk_pci_device_cfg_read(dev, value, 1, offset); 884 } 885 886 int 887 spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset) 888 { 889 return spdk_pci_device_cfg_write(dev, &value, 1, offset); 890 } 891 892 int 893 spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset) 894 { 895 return spdk_pci_device_cfg_read(dev, value, 2, offset); 896 } 897 898 int 899 spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset) 900 { 901 return spdk_pci_device_cfg_write(dev, &value, 2, offset); 902 } 903 904 int 905 spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset) 906 { 907 return spdk_pci_device_cfg_read(dev, value, 4, offset); 908 } 909 910 int 911 spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset) 912 { 913 return spdk_pci_device_cfg_write(dev, &value, 4, offset); 914 } 915 916 int 917 spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len) 918 { 919 int err; 920 uint32_t pos, header = 0; 921 uint32_t i, buf[2]; 922 923 if (len < 17) { 924 return -1; 925 } 926 927 err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE); 928 if (err || !header) { 929 return -1; 930 } 931 932 pos = PCI_CFG_SIZE; 933 while (1) { 934 if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) { 935 if (pos) { 936 /* skip the header */ 937 pos += 4; 938 for (i = 0; i < 2; i++) { 939 err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i); 940 if (err) { 941 return -1; 942 } 943 } 944 snprintf(sn, len, "%08x%08x", buf[1], buf[0]); 945 return 0; 946 } 947 } 948 pos = (header >> 20) & 0xffc; 949 /* 0 if no other items exist */ 950 if (pos < PCI_CFG_SIZE) { 951 return -1; 952 } 953 err = spdk_pci_device_cfg_read32(dev, &header, pos); 954 if (err) { 955 return -1; 956 } 957 } 958 return -1; 959 } 960 961 struct spdk_pci_addr 962 spdk_pci_device_get_addr(struct spdk_pci_device *dev) 963 { 964 return dev->addr; 965 } 966 967 bool 968 spdk_pci_device_is_removed(struct spdk_pci_device *dev) 969 { 970 return dev->internal.pending_removal; 971 } 972 973 int 974 spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2) 975 { 976 if (a1->domain > a2->domain) { 977 return 1; 978 } else if (a1->domain < a2->domain) { 979 return -1; 980 } else if (a1->bus > a2->bus) { 981 return 1; 982 } else if (a1->bus < a2->bus) { 983 return -1; 984 } else if (a1->dev > a2->dev) { 985 return 1; 986 } else if (a1->dev < a2->dev) { 987 return -1; 988 } else if (a1->func > a2->func) { 989 return 1; 990 } else if (a1->func < a2->func) { 991 return -1; 992 } 993 994 return 0; 995 } 996 997 #ifdef __linux__ 998 int 999 spdk_pci_device_claim(struct spdk_pci_device *dev) 1000 { 1001 int dev_fd; 1002 char dev_name[64]; 1003 int pid; 1004 void *dev_map; 1005 struct flock pcidev_lock = { 1006 .l_type = F_WRLCK, 1007 .l_whence = SEEK_SET, 1008 .l_start = 0, 1009 .l_len = 0, 1010 }; 1011 1012 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 1013 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 1014 1015 dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 1016 if (dev_fd == -1) { 1017 SPDK_ERRLOG("could not open %s\n", dev_name); 1018 return -errno; 1019 } 1020 1021 if (ftruncate(dev_fd, sizeof(int)) != 0) { 1022 SPDK_ERRLOG("could not truncate %s\n", dev_name); 1023 close(dev_fd); 1024 return -errno; 1025 } 1026 1027 dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, 1028 MAP_SHARED, dev_fd, 0); 1029 if (dev_map == MAP_FAILED) { 1030 SPDK_ERRLOG("could not mmap dev %s (%d)\n", dev_name, errno); 1031 close(dev_fd); 1032 return -errno; 1033 } 1034 1035 if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) { 1036 pid = *(int *)dev_map; 1037 SPDK_ERRLOG("Cannot create lock on device %s, probably" 1038 " process %d has claimed it\n", dev_name, pid); 1039 munmap(dev_map, sizeof(int)); 1040 close(dev_fd); 1041 /* F_SETLK returns unspecified errnos, normalize them */ 1042 return -EACCES; 1043 } 1044 1045 *(int *)dev_map = (int)getpid(); 1046 munmap(dev_map, sizeof(int)); 1047 dev->internal.claim_fd = dev_fd; 1048 /* Keep dev_fd open to maintain the lock. */ 1049 return 0; 1050 } 1051 1052 void 1053 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 1054 { 1055 char dev_name[64]; 1056 1057 snprintf(dev_name, sizeof(dev_name), "/var/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", 1058 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); 1059 1060 close(dev->internal.claim_fd); 1061 dev->internal.claim_fd = -1; 1062 unlink(dev_name); 1063 } 1064 #else /* !__linux__ */ 1065 int 1066 spdk_pci_device_claim(struct spdk_pci_device *dev) 1067 { 1068 /* TODO */ 1069 return 0; 1070 } 1071 1072 void 1073 spdk_pci_device_unclaim(struct spdk_pci_device *dev) 1074 { 1075 /* TODO */ 1076 } 1077 #endif /* __linux__ */ 1078 1079 int 1080 spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf) 1081 { 1082 unsigned domain, bus, dev, func; 1083 1084 if (addr == NULL || bdf == NULL) { 1085 return -EINVAL; 1086 } 1087 1088 if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) || 1089 (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) { 1090 /* Matched a full address - all variables are initialized */ 1091 } else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) { 1092 func = 0; 1093 } else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) || 1094 (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) { 1095 domain = 0; 1096 } else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) || 1097 (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) { 1098 domain = 0; 1099 func = 0; 1100 } else { 1101 return -EINVAL; 1102 } 1103 1104 if (bus > 0xFF || dev > 0x1F || func > 7) { 1105 return -EINVAL; 1106 } 1107 1108 addr->domain = domain; 1109 addr->bus = bus; 1110 addr->dev = dev; 1111 addr->func = func; 1112 1113 return 0; 1114 } 1115 1116 int 1117 spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr) 1118 { 1119 int rc; 1120 1121 rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x", 1122 addr->domain, addr->bus, 1123 addr->dev, addr->func); 1124 1125 if (rc > 0 && (size_t)rc < sz) { 1126 return 0; 1127 } 1128 1129 return -1; 1130 } 1131 1132 int 1133 spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev) 1134 { 1135 int rc; 1136 1137 assert(dev->map_bar != NULL); 1138 assert(dev->unmap_bar != NULL); 1139 assert(dev->cfg_read != NULL); 1140 assert(dev->cfg_write != NULL); 1141 dev->internal.driver = drv; 1142 1143 if (drv->cb_fn != NULL) { 1144 rc = drv->cb_fn(drv->cb_arg, dev); 1145 if (rc != 0) { 1146 return -ECANCELED; 1147 } 1148 1149 dev->internal.attached = true; 1150 } 1151 1152 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 1153 1154 return 0; 1155 } 1156 1157 void 1158 spdk_pci_unhook_device(struct spdk_pci_device *dev) 1159 { 1160 assert(!dev->internal.attached); 1161 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 1162 } 1163 1164 void 1165 spdk_pci_register_device_provider(struct spdk_pci_device_provider *provider) 1166 { 1167 TAILQ_INSERT_TAIL(&g_pci_device_providers, provider, tailq); 1168 } 1169 1170 const char * 1171 spdk_pci_device_get_type(const struct spdk_pci_device *dev) 1172 { 1173 return dev->type; 1174 } 1175 1176 int 1177 spdk_pci_device_allow(struct spdk_pci_addr *pci_addr) 1178 { 1179 struct rte_devargs *da; 1180 char devargs_str[128]; 1181 1182 da = calloc(1, sizeof(*da)); 1183 if (da == NULL) { 1184 SPDK_ERRLOG("could not allocate rte_devargs\n"); 1185 return -ENOMEM; 1186 } 1187 1188 snprintf(devargs_str, sizeof(devargs_str), "pci:%04x:%02x:%02x.%x", 1189 pci_addr->domain, pci_addr->bus, pci_addr->dev, pci_addr->func); 1190 if (rte_devargs_parse(da, devargs_str) != 0) { 1191 SPDK_ERRLOG("rte_devargs_parse() failed on '%s'\n", devargs_str); 1192 free(da); 1193 return -EINVAL; 1194 } 1195 da->policy = RTE_DEV_ALLOWED; 1196 /* Note: if a devargs already exists for this device address, it just gets 1197 * overridden. So we do not need to check if the devargs already exists. 1198 * DPDK will take care of memory management for the devargs structure after 1199 * it has been inserted, so there's nothing SPDK needs to track. 1200 */ 1201 if (rte_devargs_insert(&da) != 0) { 1202 SPDK_ERRLOG("rte_devargs_insert() failed on '%s'\n", devargs_str); 1203 free(da); 1204 return -EINVAL; 1205 } 1206 1207 return 0; 1208 } 1209