1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation. 3 * Copyright 2013-2014 6WIND S.A. 4 */ 5 6 #include <string.h> 7 #include <inttypes.h> 8 #include <stdint.h> 9 #include <stdbool.h> 10 #include <stdlib.h> 11 #include <stdio.h> 12 #include <sys/queue.h> 13 #include <sys/mman.h> 14 15 #include <rte_errno.h> 16 #include <rte_interrupts.h> 17 #include <rte_log.h> 18 #include <rte_bus.h> 19 #include <rte_pci.h> 20 #include <rte_bus_pci.h> 21 #include <rte_per_lcore.h> 22 #include <rte_memory.h> 23 #include <rte_eal.h> 24 #include <rte_string_fns.h> 25 #include <rte_common.h> 26 #include <rte_devargs.h> 27 #include <rte_vfio.h> 28 29 #include "private.h" 30 31 32 #define SYSFS_PCI_DEVICES "/sys/bus/pci/devices" 33 34 const char *rte_pci_get_sysfs_path(void) 35 { 36 const char *path = NULL; 37 38 path = getenv("SYSFS_PCI_DEVICES"); 39 if (path == NULL) 40 return SYSFS_PCI_DEVICES; 41 42 return path; 43 } 44 45 static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev) 46 { 47 struct rte_devargs *devargs; 48 struct rte_pci_addr addr; 49 50 RTE_EAL_DEVARGS_FOREACH("pci", devargs) { 51 devargs->bus->parse(devargs->name, &addr); 52 if (!rte_pci_addr_cmp(&dev->addr, &addr)) 53 return devargs; 54 } 55 return NULL; 56 } 57 58 void 59 pci_name_set(struct rte_pci_device *dev) 60 { 61 struct rte_devargs *devargs; 62 63 /* Each device has its internal, canonical name set. */ 64 rte_pci_device_name(&dev->addr, 65 dev->name, sizeof(dev->name)); 66 devargs = pci_devargs_lookup(dev); 67 dev->device.devargs = devargs; 68 /* In blacklist mode, if the device is not blacklisted, no 69 * rte_devargs exists for it. 70 */ 71 if (devargs != NULL) 72 /* If an rte_devargs exists, the generic rte_device uses the 73 * given name as its name. 74 */ 75 dev->device.name = dev->device.devargs->name; 76 else 77 /* Otherwise, it uses the internal, canonical form. */ 78 dev->device.name = dev->name; 79 } 80 81 /* 82 * Match the PCI Driver and Device using the ID Table 83 */ 84 int 85 rte_pci_match(const struct rte_pci_driver *pci_drv, 86 const struct rte_pci_device *pci_dev) 87 { 88 const struct rte_pci_id *id_table; 89 90 for (id_table = pci_drv->id_table; id_table->vendor_id != 0; 91 id_table++) { 92 /* check if device's identifiers match the driver's ones */ 93 if (id_table->vendor_id != pci_dev->id.vendor_id && 94 id_table->vendor_id != PCI_ANY_ID) 95 continue; 96 if (id_table->device_id != pci_dev->id.device_id && 97 id_table->device_id != PCI_ANY_ID) 98 continue; 99 if (id_table->subsystem_vendor_id != 100 pci_dev->id.subsystem_vendor_id && 101 id_table->subsystem_vendor_id != PCI_ANY_ID) 102 continue; 103 if (id_table->subsystem_device_id != 104 pci_dev->id.subsystem_device_id && 105 id_table->subsystem_device_id != PCI_ANY_ID) 106 continue; 107 if (id_table->class_id != pci_dev->id.class_id && 108 id_table->class_id != RTE_CLASS_ANY_ID) 109 continue; 110 111 return 1; 112 } 113 114 return 0; 115 } 116 117 /* 118 * If vendor/device ID match, call the probe() function of the 119 * driver. 120 */ 121 static int 122 rte_pci_probe_one_driver(struct rte_pci_driver *dr, 123 struct rte_pci_device *dev) 124 { 125 int ret; 126 bool already_probed; 127 struct rte_pci_addr *loc; 128 129 if ((dr == NULL) || (dev == NULL)) 130 return -EINVAL; 131 132 loc = &dev->addr; 133 134 /* The device is not blacklisted; Check if driver supports it */ 135 if (!rte_pci_match(dr, dev)) 136 /* Match of device and driver failed */ 137 return 1; 138 139 RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", 140 loc->domain, loc->bus, loc->devid, loc->function, 141 dev->device.numa_node); 142 143 /* no initialization when blacklisted, return without error */ 144 if (dev->device.devargs != NULL && 145 dev->device.devargs->policy == 146 RTE_DEV_BLACKLISTED) { 147 RTE_LOG(INFO, EAL, " Device is blacklisted, not" 148 " initializing\n"); 149 return 1; 150 } 151 152 if (dev->device.numa_node < 0) { 153 RTE_LOG(WARNING, EAL, " Invalid NUMA socket, default to 0\n"); 154 dev->device.numa_node = 0; 155 } 156 157 already_probed = rte_dev_is_probed(&dev->device); 158 if (already_probed && !(dr->drv_flags & RTE_PCI_DRV_PROBE_AGAIN)) { 159 RTE_LOG(DEBUG, EAL, "Device %s is already probed\n", 160 dev->device.name); 161 return -EEXIST; 162 } 163 164 RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id, 165 dev->id.device_id, dr->driver.name); 166 167 /* 168 * reference driver structure 169 * This needs to be before rte_pci_map_device(), as it enables to use 170 * driver flags for adjusting configuration. 171 */ 172 if (!already_probed) 173 dev->driver = dr; 174 175 if (!already_probed && (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)) { 176 /* map resources for devices that use igb_uio */ 177 ret = rte_pci_map_device(dev); 178 if (ret != 0) { 179 dev->driver = NULL; 180 return ret; 181 } 182 } 183 184 /* call the driver probe() function */ 185 ret = dr->probe(dr, dev); 186 if (already_probed) 187 return ret; /* no rollback if already succeeded earlier */ 188 if (ret) { 189 dev->driver = NULL; 190 if ((dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) && 191 /* Don't unmap if device is unsupported and 192 * driver needs mapped resources. 193 */ 194 !(ret > 0 && 195 (dr->drv_flags & RTE_PCI_DRV_KEEP_MAPPED_RES))) 196 rte_pci_unmap_device(dev); 197 } else { 198 dev->device.driver = &dr->driver; 199 } 200 201 return ret; 202 } 203 204 /* 205 * If vendor/device ID match, call the remove() function of the 206 * driver. 207 */ 208 static int 209 rte_pci_detach_dev(struct rte_pci_device *dev) 210 { 211 struct rte_pci_addr *loc; 212 struct rte_pci_driver *dr; 213 int ret = 0; 214 215 if (dev == NULL) 216 return -EINVAL; 217 218 dr = dev->driver; 219 loc = &dev->addr; 220 221 RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", 222 loc->domain, loc->bus, loc->devid, 223 loc->function, dev->device.numa_node); 224 225 RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id, 226 dev->id.device_id, dr->driver.name); 227 228 if (dr->remove) { 229 ret = dr->remove(dev); 230 if (ret < 0) 231 return ret; 232 } 233 234 /* clear driver structure */ 235 dev->driver = NULL; 236 237 if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) 238 /* unmap resources for devices that use igb_uio */ 239 rte_pci_unmap_device(dev); 240 241 return 0; 242 } 243 244 /* 245 * If vendor/device ID match, call the probe() function of all 246 * registered driver for the given device. Return < 0 if initialization 247 * failed, return 1 if no driver is found for this device. 248 */ 249 static int 250 pci_probe_all_drivers(struct rte_pci_device *dev) 251 { 252 struct rte_pci_driver *dr = NULL; 253 int rc = 0; 254 255 if (dev == NULL) 256 return -EINVAL; 257 258 FOREACH_DRIVER_ON_PCIBUS(dr) { 259 rc = rte_pci_probe_one_driver(dr, dev); 260 if (rc < 0) 261 /* negative value is an error */ 262 return rc; 263 if (rc > 0) 264 /* positive value means driver doesn't support it */ 265 continue; 266 return 0; 267 } 268 return 1; 269 } 270 271 /* 272 * Scan the content of the PCI bus, and call the probe() function for 273 * all registered drivers that have a matching entry in its id_table 274 * for discovered devices. 275 */ 276 int 277 rte_pci_probe(void) 278 { 279 struct rte_pci_device *dev = NULL; 280 size_t probed = 0, failed = 0; 281 struct rte_devargs *devargs; 282 int probe_all = 0; 283 int ret = 0; 284 285 if (rte_pci_bus.bus.conf.scan_mode != RTE_BUS_SCAN_WHITELIST) 286 probe_all = 1; 287 288 FOREACH_DEVICE_ON_PCIBUS(dev) { 289 probed++; 290 291 devargs = dev->device.devargs; 292 /* probe all or only whitelisted devices */ 293 if (probe_all) 294 ret = pci_probe_all_drivers(dev); 295 else if (devargs != NULL && 296 devargs->policy == RTE_DEV_WHITELISTED) 297 ret = pci_probe_all_drivers(dev); 298 if (ret < 0) { 299 if (ret != -EEXIST) { 300 RTE_LOG(ERR, EAL, "Requested device " 301 PCI_PRI_FMT " cannot be used\n", 302 dev->addr.domain, dev->addr.bus, 303 dev->addr.devid, dev->addr.function); 304 rte_errno = errno; 305 failed++; 306 } 307 ret = 0; 308 } 309 } 310 311 return (probed && probed == failed) ? -1 : 0; 312 } 313 314 /* dump one device */ 315 static int 316 pci_dump_one_device(FILE *f, struct rte_pci_device *dev) 317 { 318 int i; 319 320 fprintf(f, PCI_PRI_FMT, dev->addr.domain, dev->addr.bus, 321 dev->addr.devid, dev->addr.function); 322 fprintf(f, " - vendor:%x device:%x\n", dev->id.vendor_id, 323 dev->id.device_id); 324 325 for (i = 0; i != sizeof(dev->mem_resource) / 326 sizeof(dev->mem_resource[0]); i++) { 327 fprintf(f, " %16.16"PRIx64" %16.16"PRIx64"\n", 328 dev->mem_resource[i].phys_addr, 329 dev->mem_resource[i].len); 330 } 331 return 0; 332 } 333 334 /* dump devices on the bus */ 335 void 336 rte_pci_dump(FILE *f) 337 { 338 struct rte_pci_device *dev = NULL; 339 340 FOREACH_DEVICE_ON_PCIBUS(dev) { 341 pci_dump_one_device(f, dev); 342 } 343 } 344 345 static int 346 pci_parse(const char *name, void *addr) 347 { 348 struct rte_pci_addr *out = addr; 349 struct rte_pci_addr pci_addr; 350 bool parse; 351 352 parse = (rte_pci_addr_parse(name, &pci_addr) == 0); 353 if (parse && addr != NULL) 354 *out = pci_addr; 355 return parse == false; 356 } 357 358 /* register a driver */ 359 void 360 rte_pci_register(struct rte_pci_driver *driver) 361 { 362 TAILQ_INSERT_TAIL(&rte_pci_bus.driver_list, driver, next); 363 driver->bus = &rte_pci_bus; 364 } 365 366 /* unregister a driver */ 367 void 368 rte_pci_unregister(struct rte_pci_driver *driver) 369 { 370 TAILQ_REMOVE(&rte_pci_bus.driver_list, driver, next); 371 driver->bus = NULL; 372 } 373 374 /* Add a device to PCI bus */ 375 void 376 rte_pci_add_device(struct rte_pci_device *pci_dev) 377 { 378 TAILQ_INSERT_TAIL(&rte_pci_bus.device_list, pci_dev, next); 379 } 380 381 /* Insert a device into a predefined position in PCI bus */ 382 void 383 rte_pci_insert_device(struct rte_pci_device *exist_pci_dev, 384 struct rte_pci_device *new_pci_dev) 385 { 386 TAILQ_INSERT_BEFORE(exist_pci_dev, new_pci_dev, next); 387 } 388 389 /* Remove a device from PCI bus */ 390 static void 391 rte_pci_remove_device(struct rte_pci_device *pci_dev) 392 { 393 TAILQ_REMOVE(&rte_pci_bus.device_list, pci_dev, next); 394 } 395 396 static struct rte_device * 397 pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp, 398 const void *data) 399 { 400 const struct rte_pci_device *pstart; 401 struct rte_pci_device *pdev; 402 403 if (start != NULL) { 404 pstart = RTE_DEV_TO_PCI_CONST(start); 405 pdev = TAILQ_NEXT(pstart, next); 406 } else { 407 pdev = TAILQ_FIRST(&rte_pci_bus.device_list); 408 } 409 while (pdev != NULL) { 410 if (cmp(&pdev->device, data) == 0) 411 return &pdev->device; 412 pdev = TAILQ_NEXT(pdev, next); 413 } 414 return NULL; 415 } 416 417 /* 418 * find the device which encounter the failure, by iterate over all device on 419 * PCI bus to check if the memory failure address is located in the range 420 * of the BARs of the device. 421 */ 422 static struct rte_pci_device * 423 pci_find_device_by_addr(const void *failure_addr) 424 { 425 struct rte_pci_device *pdev = NULL; 426 uint64_t check_point, start, end, len; 427 int i; 428 429 check_point = (uint64_t)(uintptr_t)failure_addr; 430 431 FOREACH_DEVICE_ON_PCIBUS(pdev) { 432 for (i = 0; i != RTE_DIM(pdev->mem_resource); i++) { 433 start = (uint64_t)(uintptr_t)pdev->mem_resource[i].addr; 434 len = pdev->mem_resource[i].len; 435 end = start + len; 436 if (check_point >= start && check_point < end) { 437 RTE_LOG(DEBUG, EAL, "Failure address %16.16" 438 PRIx64" belongs to device %s!\n", 439 check_point, pdev->device.name); 440 return pdev; 441 } 442 } 443 } 444 return NULL; 445 } 446 447 static int 448 pci_hot_unplug_handler(struct rte_device *dev) 449 { 450 struct rte_pci_device *pdev = NULL; 451 int ret = 0; 452 453 pdev = RTE_DEV_TO_PCI(dev); 454 if (!pdev) 455 return -1; 456 457 switch (pdev->kdrv) { 458 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 459 case RTE_KDRV_VFIO: 460 /* 461 * vfio kernel module guaranty the pci device would not be 462 * deleted until the user space release the resource, so no 463 * need to remap BARs resource here, just directly notify 464 * the req event to the user space to handle it. 465 */ 466 rte_dev_event_callback_process(dev->name, 467 RTE_DEV_EVENT_REMOVE); 468 break; 469 #endif 470 case RTE_KDRV_IGB_UIO: 471 case RTE_KDRV_UIO_GENERIC: 472 case RTE_KDRV_NIC_UIO: 473 /* BARs resource is invalid, remap it to be safe. */ 474 ret = pci_uio_remap_resource(pdev); 475 break; 476 default: 477 RTE_LOG(DEBUG, EAL, 478 "Not managed by a supported kernel driver, skipped\n"); 479 ret = -1; 480 break; 481 } 482 483 return ret; 484 } 485 486 static int 487 pci_sigbus_handler(const void *failure_addr) 488 { 489 struct rte_pci_device *pdev = NULL; 490 int ret = 0; 491 492 pdev = pci_find_device_by_addr(failure_addr); 493 if (!pdev) { 494 /* It is a generic sigbus error, no bus would handle it. */ 495 ret = 1; 496 } else { 497 /* The sigbus error is caused of hot-unplug. */ 498 ret = pci_hot_unplug_handler(&pdev->device); 499 if (ret) { 500 RTE_LOG(ERR, EAL, 501 "Failed to handle hot-unplug for device %s", 502 pdev->name); 503 ret = -1; 504 } 505 } 506 return ret; 507 } 508 509 static int 510 pci_plug(struct rte_device *dev) 511 { 512 return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev)); 513 } 514 515 static int 516 pci_unplug(struct rte_device *dev) 517 { 518 struct rte_pci_device *pdev; 519 int ret; 520 521 pdev = RTE_DEV_TO_PCI(dev); 522 ret = rte_pci_detach_dev(pdev); 523 if (ret == 0) { 524 rte_pci_remove_device(pdev); 525 rte_devargs_remove(dev->devargs); 526 free(pdev); 527 } 528 return ret; 529 } 530 531 static int 532 pci_dma_map(struct rte_device *dev, void *addr, uint64_t iova, size_t len) 533 { 534 struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev); 535 536 if (!pdev || !pdev->driver) { 537 rte_errno = EINVAL; 538 return -1; 539 } 540 if (pdev->driver->dma_map) 541 return pdev->driver->dma_map(pdev, addr, iova, len); 542 /** 543 * In case driver don't provides any specific mapping 544 * try fallback to VFIO. 545 */ 546 if (pdev->kdrv == RTE_KDRV_VFIO) 547 return rte_vfio_container_dma_map 548 (RTE_VFIO_DEFAULT_CONTAINER_FD, (uintptr_t)addr, 549 iova, len); 550 rte_errno = ENOTSUP; 551 return -1; 552 } 553 554 static int 555 pci_dma_unmap(struct rte_device *dev, void *addr, uint64_t iova, size_t len) 556 { 557 struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev); 558 559 if (!pdev || !pdev->driver) { 560 rte_errno = EINVAL; 561 return -1; 562 } 563 if (pdev->driver->dma_unmap) 564 return pdev->driver->dma_unmap(pdev, addr, iova, len); 565 /** 566 * In case driver don't provides any specific mapping 567 * try fallback to VFIO. 568 */ 569 if (pdev->kdrv == RTE_KDRV_VFIO) 570 return rte_vfio_container_dma_unmap 571 (RTE_VFIO_DEFAULT_CONTAINER_FD, (uintptr_t)addr, 572 iova, len); 573 rte_errno = ENOTSUP; 574 return -1; 575 } 576 577 struct rte_pci_bus rte_pci_bus = { 578 .bus = { 579 .scan = rte_pci_scan, 580 .probe = rte_pci_probe, 581 .find_device = pci_find_device, 582 .plug = pci_plug, 583 .unplug = pci_unplug, 584 .parse = pci_parse, 585 .dma_map = pci_dma_map, 586 .dma_unmap = pci_dma_unmap, 587 .get_iommu_class = rte_pci_get_iommu_class, 588 .dev_iterate = rte_pci_dev_iterate, 589 .hot_unplug_handler = pci_hot_unplug_handler, 590 .sigbus_handler = pci_sigbus_handler, 591 }, 592 .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list), 593 .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list), 594 }; 595 596 RTE_REGISTER_BUS(pci, rte_pci_bus.bus); 597