1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "env_internal.h" 35 36 #include "spdk/env.h" 37 38 #define SYSFS_PCI_DRIVERS "/sys/bus/pci/drivers" 39 40 #define PCI_CFG_SIZE 256 41 #define PCI_EXT_CAP_ID_SN 0x03 42 43 /* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time 44 * might cause the internal IPC to misbehave. Just retry in such case. 45 */ 46 #define DPDK_HOTPLUG_RETRY_COUNT 4 47 48 static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER; 49 static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices); 50 static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers); 51 52 static int 53 spdk_map_bar_rte(struct spdk_pci_device *device, uint32_t bar, 54 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 55 { 56 struct rte_pci_device *dev = device->dev_handle; 57 58 *mapped_addr = dev->mem_resource[bar].addr; 59 *phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr; 60 *size = (uint64_t)dev->mem_resource[bar].len; 61 62 return 0; 63 } 64 65 static int 66 spdk_unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr) 67 { 68 return 0; 69 } 70 71 static int 72 spdk_cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 73 { 74 int rc; 75 76 rc = rte_pci_read_config(dev->dev_handle, value, len, offset); 77 78 #if defined(__FreeBSD__) && RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0) 79 /* Older DPDKs return 0 on success and -1 on failure */ 80 return rc; 81 #endif 82 return (rc > 0 && (uint32_t) rc == len) ? 0 : -1; 83 } 84 85 static int 86 spdk_cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 87 { 88 int rc; 89 90 rc = rte_pci_write_config(dev->dev_handle, value, len, offset); 91 92 #ifdef __FreeBSD__ 93 /* DPDK returns 0 on success and -1 on failure */ 94 return rc; 95 #endif 96 return (rc > 0 && (uint32_t) rc == len) ? 0 : -1; 97 } 98 99 static void 100 spdk_detach_rte(struct spdk_pci_device *dev) 101 { 102 struct rte_pci_device *rte_dev = dev->dev_handle; 103 104 #if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0) 105 char bdf[32]; 106 int i = 0, rc; 107 108 snprintf(bdf, sizeof(bdf), "%s", rte_dev->device.name); 109 do { 110 rc = rte_eal_hotplug_remove("pci", bdf); 111 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 112 #else 113 rte_eal_dev_detach(&rte_dev->device); 114 #endif 115 } 116 117 void 118 spdk_pci_driver_register(struct spdk_pci_driver *driver) 119 { 120 TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq); 121 } 122 123 void 124 spdk_pci_init(void) 125 { 126 #if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0) 127 struct spdk_pci_driver *driver; 128 129 /* We need to pre-register pci drivers for the pci devices to be 130 * attachable in multi-process with DPDK 18.11+. 131 * 132 * DPDK 18.11+ does its best to ensure all devices are equally 133 * attached or detached in all processes within a shared memory group. 134 * For SPDK it means that if a device is hotplugged in the primary, 135 * then DPDK will automatically send an IPC hotplug request to all other 136 * processes. Those other processes may not have the same SPDK PCI 137 * driver registered and may fail to attach the device. DPDK will send 138 * back the failure status, and the the primary process will also fail 139 * to hotplug the device. To prevent that, we need to pre-register the 140 * pci drivers here. 141 */ 142 TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { 143 assert(!driver->is_registered); 144 driver->is_registered = true; 145 rte_pci_register(&driver->driver); 146 } 147 #endif 148 } 149 150 void 151 spdk_pci_fini(void) 152 { 153 struct spdk_pci_device *dev; 154 char bdf[32]; 155 156 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 157 if (dev->internal.attached) { 158 spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->addr); 159 fprintf(stderr, "Device %s is still attached at shutdown!\n", bdf); 160 } 161 } 162 } 163 164 int 165 spdk_pci_device_init(struct rte_pci_driver *_drv, 166 struct rte_pci_device *_dev) 167 { 168 struct spdk_pci_driver *driver = (struct spdk_pci_driver *)_drv; 169 struct spdk_pci_device *dev; 170 int rc; 171 172 #if RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0) 173 if (!driver->cb_fn) { 174 /* Return a positive value to indicate that this device does 175 * not belong to this driver, but this isn't an error. 176 */ 177 return 1; 178 } 179 #endif 180 181 dev = calloc(1, sizeof(*dev)); 182 if (dev == NULL) { 183 return -1; 184 } 185 186 dev->dev_handle = _dev; 187 188 dev->addr.domain = _dev->addr.domain; 189 dev->addr.bus = _dev->addr.bus; 190 dev->addr.dev = _dev->addr.devid; 191 dev->addr.func = _dev->addr.function; 192 dev->id.vendor_id = _dev->id.vendor_id; 193 dev->id.device_id = _dev->id.device_id; 194 dev->id.subvendor_id = _dev->id.subsystem_vendor_id; 195 dev->id.subdevice_id = _dev->id.subsystem_device_id; 196 dev->socket_id = _dev->device.numa_node; 197 198 dev->map_bar = spdk_map_bar_rte; 199 dev->unmap_bar = spdk_unmap_bar_rte; 200 dev->cfg_read = spdk_cfg_read_rte; 201 dev->cfg_write = spdk_cfg_write_rte; 202 dev->detach = spdk_detach_rte; 203 204 dev->internal.driver = driver; 205 206 if (driver->cb_fn != NULL) { 207 rc = driver->cb_fn(driver->cb_arg, dev); 208 if (rc != 0) { 209 free(dev); 210 return rc; 211 } 212 dev->internal.attached = true; 213 } 214 215 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 216 spdk_vtophys_pci_device_added(dev->dev_handle); 217 return 0; 218 } 219 220 int 221 spdk_pci_device_fini(struct rte_pci_device *_dev) 222 { 223 struct spdk_pci_device *dev; 224 225 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 226 if (dev->dev_handle == _dev) { 227 break; 228 } 229 } 230 231 if (dev == NULL || dev->internal.attached) { 232 /* The device might be still referenced somewhere in SPDK. */ 233 return -1; 234 } 235 236 spdk_vtophys_pci_device_removed(dev->dev_handle); 237 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 238 free(dev); 239 return 0; 240 241 } 242 243 void 244 spdk_pci_device_detach(struct spdk_pci_device *dev) 245 { 246 assert(dev->internal.attached); 247 dev->internal.attached = false; 248 dev->detach(dev); 249 } 250 251 int 252 spdk_pci_device_attach(struct spdk_pci_driver *driver, 253 spdk_pci_enum_cb enum_cb, 254 void *enum_ctx, struct spdk_pci_addr *pci_address) 255 { 256 struct spdk_pci_device *dev; 257 int rc; 258 char bdf[32]; 259 260 spdk_pci_addr_fmt(bdf, sizeof(bdf), pci_address); 261 262 pthread_mutex_lock(&g_pci_mutex); 263 264 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 265 if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { 266 break; 267 } 268 } 269 270 if (dev != NULL && dev->internal.driver == driver) { 271 if (dev->internal.attached) { 272 pthread_mutex_unlock(&g_pci_mutex); 273 return -1; 274 } 275 276 rc = enum_cb(enum_ctx, dev); 277 if (rc == 0) { 278 dev->internal.attached = true; 279 } 280 pthread_mutex_unlock(&g_pci_mutex); 281 return rc; 282 } 283 284 if (!driver->is_registered) { 285 driver->is_registered = true; 286 rte_pci_register(&driver->driver); 287 } 288 289 driver->cb_fn = enum_cb; 290 driver->cb_arg = enum_ctx; 291 292 #if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0) 293 int i = 0; 294 295 do { 296 rc = rte_eal_hotplug_add("pci", bdf, ""); 297 } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); 298 299 if (i > 1 && rc == -EEXIST) { 300 /* Even though the previous request timed out, the device 301 * was attached successfully. 302 */ 303 rc = 0; 304 } 305 #else 306 rc = rte_eal_dev_attach(bdf, ""); 307 #endif 308 309 driver->cb_arg = NULL; 310 driver->cb_fn = NULL; 311 pthread_mutex_unlock(&g_pci_mutex); 312 313 return rc == 0 ? 0 : -1; 314 } 315 316 /* Note: You can call spdk_pci_enumerate from more than one thread 317 * simultaneously safely, but you cannot call spdk_pci_enumerate 318 * and rte_eal_pci_probe simultaneously. 319 */ 320 int 321 spdk_pci_enumerate(struct spdk_pci_driver *driver, 322 spdk_pci_enum_cb enum_cb, 323 void *enum_ctx) 324 { 325 struct spdk_pci_device *dev; 326 int rc; 327 328 pthread_mutex_lock(&g_pci_mutex); 329 330 TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { 331 if (dev->internal.attached || dev->internal.driver != driver) { 332 continue; 333 } 334 335 rc = enum_cb(enum_ctx, dev); 336 if (rc == 0) { 337 dev->internal.attached = true; 338 } else if (rc < 0) { 339 pthread_mutex_unlock(&g_pci_mutex); 340 return -1; 341 } 342 } 343 344 if (!driver->is_registered) { 345 driver->is_registered = true; 346 rte_pci_register(&driver->driver); 347 } 348 349 driver->cb_fn = enum_cb; 350 driver->cb_arg = enum_ctx; 351 352 if (rte_bus_scan() != 0 || rte_bus_probe() != 0) { 353 driver->cb_arg = NULL; 354 driver->cb_fn = NULL; 355 pthread_mutex_unlock(&g_pci_mutex); 356 return -1; 357 } 358 359 driver->cb_arg = NULL; 360 driver->cb_fn = NULL; 361 pthread_mutex_unlock(&g_pci_mutex); 362 363 return 0; 364 } 365 366 int 367 spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar, 368 void **mapped_addr, uint64_t *phys_addr, uint64_t *size) 369 { 370 return dev->map_bar(dev, bar, mapped_addr, phys_addr, size); 371 } 372 373 int 374 spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr) 375 { 376 return dev->unmap_bar(dev, bar, addr); 377 } 378 379 uint32_t 380 spdk_pci_device_get_domain(struct spdk_pci_device *dev) 381 { 382 return dev->addr.domain; 383 } 384 385 uint8_t 386 spdk_pci_device_get_bus(struct spdk_pci_device *dev) 387 { 388 return dev->addr.bus; 389 } 390 391 uint8_t 392 spdk_pci_device_get_dev(struct spdk_pci_device *dev) 393 { 394 return dev->addr.dev; 395 } 396 397 uint8_t 398 spdk_pci_device_get_func(struct spdk_pci_device *dev) 399 { 400 return dev->addr.func; 401 } 402 403 uint16_t 404 spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev) 405 { 406 return dev->id.vendor_id; 407 } 408 409 uint16_t 410 spdk_pci_device_get_device_id(struct spdk_pci_device *dev) 411 { 412 return dev->id.device_id; 413 } 414 415 uint16_t 416 spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev) 417 { 418 return dev->id.subvendor_id; 419 } 420 421 uint16_t 422 spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev) 423 { 424 return dev->id.subdevice_id; 425 } 426 427 struct spdk_pci_id 428 spdk_pci_device_get_id(struct spdk_pci_device *dev) 429 { 430 return dev->id; 431 } 432 433 int 434 spdk_pci_device_get_socket_id(struct spdk_pci_device *dev) 435 { 436 return dev->socket_id; 437 } 438 439 int 440 spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 441 { 442 return dev->cfg_read(dev, value, len, offset); 443 } 444 445 int 446 spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) 447 { 448 return dev->cfg_write(dev, value, len, offset); 449 } 450 451 int 452 spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset) 453 { 454 return spdk_pci_device_cfg_read(dev, value, 1, offset); 455 } 456 457 int 458 spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset) 459 { 460 return spdk_pci_device_cfg_write(dev, &value, 1, offset); 461 } 462 463 int 464 spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset) 465 { 466 return spdk_pci_device_cfg_read(dev, value, 2, offset); 467 } 468 469 int 470 spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset) 471 { 472 return spdk_pci_device_cfg_write(dev, &value, 2, offset); 473 } 474 475 int 476 spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset) 477 { 478 return spdk_pci_device_cfg_read(dev, value, 4, offset); 479 } 480 481 int 482 spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset) 483 { 484 return spdk_pci_device_cfg_write(dev, &value, 4, offset); 485 } 486 487 int 488 spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len) 489 { 490 int err; 491 uint32_t pos, header = 0; 492 uint32_t i, buf[2]; 493 494 if (len < 17) { 495 return -1; 496 } 497 498 err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE); 499 if (err || !header) { 500 return -1; 501 } 502 503 pos = PCI_CFG_SIZE; 504 while (1) { 505 if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) { 506 if (pos) { 507 /* skip the header */ 508 pos += 4; 509 for (i = 0; i < 2; i++) { 510 err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i); 511 if (err) { 512 return -1; 513 } 514 } 515 snprintf(sn, len, "%08x%08x", buf[1], buf[0]); 516 return 0; 517 } 518 } 519 pos = (header >> 20) & 0xffc; 520 /* 0 if no other items exist */ 521 if (pos < PCI_CFG_SIZE) { 522 return -1; 523 } 524 err = spdk_pci_device_cfg_read32(dev, &header, pos); 525 if (err) { 526 return -1; 527 } 528 } 529 return -1; 530 } 531 532 struct spdk_pci_addr 533 spdk_pci_device_get_addr(struct spdk_pci_device *dev) 534 { 535 return dev->addr; 536 } 537 538 int 539 spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2) 540 { 541 if (a1->domain > a2->domain) { 542 return 1; 543 } else if (a1->domain < a2->domain) { 544 return -1; 545 } else if (a1->bus > a2->bus) { 546 return 1; 547 } else if (a1->bus < a2->bus) { 548 return -1; 549 } else if (a1->dev > a2->dev) { 550 return 1; 551 } else if (a1->dev < a2->dev) { 552 return -1; 553 } else if (a1->func > a2->func) { 554 return 1; 555 } else if (a1->func < a2->func) { 556 return -1; 557 } 558 559 return 0; 560 } 561 562 #ifdef __linux__ 563 int 564 spdk_pci_device_claim(const struct spdk_pci_addr *pci_addr) 565 { 566 int dev_fd; 567 char dev_name[64]; 568 int pid; 569 void *dev_map; 570 struct flock pcidev_lock = { 571 .l_type = F_WRLCK, 572 .l_whence = SEEK_SET, 573 .l_start = 0, 574 .l_len = 0, 575 }; 576 577 snprintf(dev_name, sizeof(dev_name), "/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", pci_addr->domain, 578 pci_addr->bus, 579 pci_addr->dev, pci_addr->func); 580 581 dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 582 if (dev_fd == -1) { 583 fprintf(stderr, "could not open %s\n", dev_name); 584 return -1; 585 } 586 587 if (ftruncate(dev_fd, sizeof(int)) != 0) { 588 fprintf(stderr, "could not truncate %s\n", dev_name); 589 close(dev_fd); 590 return -1; 591 } 592 593 dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, 594 MAP_SHARED, dev_fd, 0); 595 if (dev_map == MAP_FAILED) { 596 fprintf(stderr, "could not mmap dev %s (%d)\n", dev_name, errno); 597 close(dev_fd); 598 return -1; 599 } 600 601 if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) { 602 pid = *(int *)dev_map; 603 fprintf(stderr, "Cannot create lock on device %s, probably" 604 " process %d has claimed it\n", dev_name, pid); 605 munmap(dev_map, sizeof(int)); 606 close(dev_fd); 607 return -1; 608 } 609 610 *(int *)dev_map = (int)getpid(); 611 munmap(dev_map, sizeof(int)); 612 /* Keep dev_fd open to maintain the lock. */ 613 return dev_fd; 614 } 615 #endif /* __linux__ */ 616 617 #ifdef __FreeBSD__ 618 int 619 spdk_pci_device_claim(const struct spdk_pci_addr *pci_addr) 620 { 621 /* TODO */ 622 return 0; 623 } 624 #endif /* __FreeBSD__ */ 625 626 int 627 spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf) 628 { 629 unsigned domain, bus, dev, func; 630 631 if (addr == NULL || bdf == NULL) { 632 return -EINVAL; 633 } 634 635 if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) || 636 (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) { 637 /* Matched a full address - all variables are initialized */ 638 } else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) { 639 func = 0; 640 } else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) || 641 (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) { 642 domain = 0; 643 } else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) || 644 (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) { 645 domain = 0; 646 func = 0; 647 } else { 648 return -EINVAL; 649 } 650 651 if (bus > 0xFF || dev > 0x1F || func > 7) { 652 return -EINVAL; 653 } 654 655 addr->domain = domain; 656 addr->bus = bus; 657 addr->dev = dev; 658 addr->func = func; 659 660 return 0; 661 } 662 663 int 664 spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr) 665 { 666 int rc; 667 668 rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x", 669 addr->domain, addr->bus, 670 addr->dev, addr->func); 671 672 if (rc > 0 && (size_t)rc < sz) { 673 return 0; 674 } 675 676 return -1; 677 } 678 679 void 680 spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev) 681 { 682 assert(dev->map_bar != NULL); 683 assert(dev->unmap_bar != NULL); 684 assert(dev->cfg_read != NULL); 685 assert(dev->cfg_write != NULL); 686 assert(dev->detach != NULL); 687 dev->internal.driver = drv; 688 TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); 689 } 690 691 void 692 spdk_pci_unhook_device(struct spdk_pci_device *dev) 693 { 694 assert(!dev->internal.attached); 695 TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); 696 } 697