xref: /dpdk/drivers/bus/pci/pci_common.c (revision 2d0c29a37a9c080c1cccb1ad7941aba2ccf5437e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation.
3  * Copyright 2013-2014 6WIND S.A.
4  */
5 
6 #include <string.h>
7 #include <inttypes.h>
8 #include <stdint.h>
9 #include <stdbool.h>
10 #include <stdlib.h>
11 #include <stdio.h>
12 #include <sys/queue.h>
13 #include <sys/mman.h>
14 
15 #include <rte_errno.h>
16 #include <rte_interrupts.h>
17 #include <rte_log.h>
18 #include <rte_bus.h>
19 #include <rte_pci.h>
20 #include <rte_bus_pci.h>
21 #include <rte_per_lcore.h>
22 #include <rte_memory.h>
23 #include <rte_eal.h>
24 #include <rte_string_fns.h>
25 #include <rte_common.h>
26 #include <rte_devargs.h>
27 #include <rte_vfio.h>
28 
29 #include "private.h"
30 
31 
32 #define SYSFS_PCI_DEVICES "/sys/bus/pci/devices"
33 
34 const char *rte_pci_get_sysfs_path(void)
35 {
36 	const char *path = NULL;
37 
38 	path = getenv("SYSFS_PCI_DEVICES");
39 	if (path == NULL)
40 		return SYSFS_PCI_DEVICES;
41 
42 	return path;
43 }
44 
45 static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
46 {
47 	struct rte_devargs *devargs;
48 	struct rte_pci_addr addr;
49 
50 	RTE_EAL_DEVARGS_FOREACH("pci", devargs) {
51 		devargs->bus->parse(devargs->name, &addr);
52 		if (!rte_pci_addr_cmp(&dev->addr, &addr))
53 			return devargs;
54 	}
55 	return NULL;
56 }
57 
58 void
59 pci_name_set(struct rte_pci_device *dev)
60 {
61 	struct rte_devargs *devargs;
62 
63 	/* Each device has its internal, canonical name set. */
64 	rte_pci_device_name(&dev->addr,
65 			dev->name, sizeof(dev->name));
66 	devargs = pci_devargs_lookup(dev);
67 	dev->device.devargs = devargs;
68 	/* In blacklist mode, if the device is not blacklisted, no
69 	 * rte_devargs exists for it.
70 	 */
71 	if (devargs != NULL)
72 		/* If an rte_devargs exists, the generic rte_device uses the
73 		 * given name as its name.
74 		 */
75 		dev->device.name = dev->device.devargs->name;
76 	else
77 		/* Otherwise, it uses the internal, canonical form. */
78 		dev->device.name = dev->name;
79 }
80 
81 /*
82  * Match the PCI Driver and Device using the ID Table
83  */
84 int
85 rte_pci_match(const struct rte_pci_driver *pci_drv,
86 	      const struct rte_pci_device *pci_dev)
87 {
88 	const struct rte_pci_id *id_table;
89 
90 	for (id_table = pci_drv->id_table; id_table->vendor_id != 0;
91 	     id_table++) {
92 		/* check if device's identifiers match the driver's ones */
93 		if (id_table->vendor_id != pci_dev->id.vendor_id &&
94 				id_table->vendor_id != PCI_ANY_ID)
95 			continue;
96 		if (id_table->device_id != pci_dev->id.device_id &&
97 				id_table->device_id != PCI_ANY_ID)
98 			continue;
99 		if (id_table->subsystem_vendor_id !=
100 		    pci_dev->id.subsystem_vendor_id &&
101 		    id_table->subsystem_vendor_id != PCI_ANY_ID)
102 			continue;
103 		if (id_table->subsystem_device_id !=
104 		    pci_dev->id.subsystem_device_id &&
105 		    id_table->subsystem_device_id != PCI_ANY_ID)
106 			continue;
107 		if (id_table->class_id != pci_dev->id.class_id &&
108 				id_table->class_id != RTE_CLASS_ANY_ID)
109 			continue;
110 
111 		return 1;
112 	}
113 
114 	return 0;
115 }
116 
117 /*
118  * If vendor/device ID match, call the probe() function of the
119  * driver.
120  */
121 static int
122 rte_pci_probe_one_driver(struct rte_pci_driver *dr,
123 			 struct rte_pci_device *dev)
124 {
125 	int ret;
126 	bool already_probed;
127 	struct rte_pci_addr *loc;
128 
129 	if ((dr == NULL) || (dev == NULL))
130 		return -EINVAL;
131 
132 	loc = &dev->addr;
133 
134 	/* The device is not blacklisted; Check if driver supports it */
135 	if (!rte_pci_match(dr, dev))
136 		/* Match of device and driver failed */
137 		return 1;
138 
139 	RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
140 			loc->domain, loc->bus, loc->devid, loc->function,
141 			dev->device.numa_node);
142 
143 	/* no initialization when blacklisted, return without error */
144 	if (dev->device.devargs != NULL &&
145 		dev->device.devargs->policy ==
146 			RTE_DEV_BLACKLISTED) {
147 		RTE_LOG(INFO, EAL, "  Device is blacklisted, not"
148 			" initializing\n");
149 		return 1;
150 	}
151 
152 	if (dev->device.numa_node < 0) {
153 		RTE_LOG(WARNING, EAL, "  Invalid NUMA socket, default to 0\n");
154 		dev->device.numa_node = 0;
155 	}
156 
157 	already_probed = rte_dev_is_probed(&dev->device);
158 	if (already_probed && !(dr->drv_flags & RTE_PCI_DRV_PROBE_AGAIN)) {
159 		RTE_LOG(DEBUG, EAL, "Device %s is already probed\n",
160 				dev->device.name);
161 		return -EEXIST;
162 	}
163 
164 	RTE_LOG(INFO, EAL, "  probe driver: %x:%x %s\n", dev->id.vendor_id,
165 		dev->id.device_id, dr->driver.name);
166 
167 	/*
168 	 * reference driver structure
169 	 * This needs to be before rte_pci_map_device(), as it enables to use
170 	 * driver flags for adjusting configuration.
171 	 */
172 	if (!already_probed)
173 		dev->driver = dr;
174 
175 	if (!already_probed && (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)) {
176 		/* map resources for devices that use igb_uio */
177 		ret = rte_pci_map_device(dev);
178 		if (ret != 0) {
179 			dev->driver = NULL;
180 			return ret;
181 		}
182 	}
183 
184 	/* call the driver probe() function */
185 	ret = dr->probe(dr, dev);
186 	if (already_probed)
187 		return ret; /* no rollback if already succeeded earlier */
188 	if (ret) {
189 		dev->driver = NULL;
190 		if ((dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) &&
191 			/* Don't unmap if device is unsupported and
192 			 * driver needs mapped resources.
193 			 */
194 			!(ret > 0 &&
195 				(dr->drv_flags & RTE_PCI_DRV_KEEP_MAPPED_RES)))
196 			rte_pci_unmap_device(dev);
197 	} else {
198 		dev->device.driver = &dr->driver;
199 	}
200 
201 	return ret;
202 }
203 
204 /*
205  * If vendor/device ID match, call the remove() function of the
206  * driver.
207  */
208 static int
209 rte_pci_detach_dev(struct rte_pci_device *dev)
210 {
211 	struct rte_pci_addr *loc;
212 	struct rte_pci_driver *dr;
213 	int ret = 0;
214 
215 	if (dev == NULL)
216 		return -EINVAL;
217 
218 	dr = dev->driver;
219 	loc = &dev->addr;
220 
221 	RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
222 			loc->domain, loc->bus, loc->devid,
223 			loc->function, dev->device.numa_node);
224 
225 	RTE_LOG(DEBUG, EAL, "  remove driver: %x:%x %s\n", dev->id.vendor_id,
226 			dev->id.device_id, dr->driver.name);
227 
228 	if (dr->remove) {
229 		ret = dr->remove(dev);
230 		if (ret < 0)
231 			return ret;
232 	}
233 
234 	/* clear driver structure */
235 	dev->driver = NULL;
236 
237 	if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)
238 		/* unmap resources for devices that use igb_uio */
239 		rte_pci_unmap_device(dev);
240 
241 	return 0;
242 }
243 
244 /*
245  * If vendor/device ID match, call the probe() function of all
246  * registered driver for the given device. Return < 0 if initialization
247  * failed, return 1 if no driver is found for this device.
248  */
249 static int
250 pci_probe_all_drivers(struct rte_pci_device *dev)
251 {
252 	struct rte_pci_driver *dr = NULL;
253 	int rc = 0;
254 
255 	if (dev == NULL)
256 		return -EINVAL;
257 
258 	FOREACH_DRIVER_ON_PCIBUS(dr) {
259 		rc = rte_pci_probe_one_driver(dr, dev);
260 		if (rc < 0)
261 			/* negative value is an error */
262 			return rc;
263 		if (rc > 0)
264 			/* positive value means driver doesn't support it */
265 			continue;
266 		return 0;
267 	}
268 	return 1;
269 }
270 
271 /*
272  * Scan the content of the PCI bus, and call the probe() function for
273  * all registered drivers that have a matching entry in its id_table
274  * for discovered devices.
275  */
276 int
277 rte_pci_probe(void)
278 {
279 	struct rte_pci_device *dev = NULL;
280 	size_t probed = 0, failed = 0;
281 	struct rte_devargs *devargs;
282 	int probe_all = 0;
283 	int ret = 0;
284 
285 	if (rte_pci_bus.bus.conf.scan_mode != RTE_BUS_SCAN_WHITELIST)
286 		probe_all = 1;
287 
288 	FOREACH_DEVICE_ON_PCIBUS(dev) {
289 		probed++;
290 
291 		devargs = dev->device.devargs;
292 		/* probe all or only whitelisted devices */
293 		if (probe_all)
294 			ret = pci_probe_all_drivers(dev);
295 		else if (devargs != NULL &&
296 			devargs->policy == RTE_DEV_WHITELISTED)
297 			ret = pci_probe_all_drivers(dev);
298 		if (ret < 0) {
299 			if (ret != -EEXIST) {
300 				RTE_LOG(ERR, EAL, "Requested device "
301 					PCI_PRI_FMT " cannot be used\n",
302 					dev->addr.domain, dev->addr.bus,
303 					dev->addr.devid, dev->addr.function);
304 				rte_errno = errno;
305 				failed++;
306 			}
307 			ret = 0;
308 		}
309 	}
310 
311 	return (probed && probed == failed) ? -1 : 0;
312 }
313 
314 /* dump one device */
315 static int
316 pci_dump_one_device(FILE *f, struct rte_pci_device *dev)
317 {
318 	int i;
319 
320 	fprintf(f, PCI_PRI_FMT, dev->addr.domain, dev->addr.bus,
321 	       dev->addr.devid, dev->addr.function);
322 	fprintf(f, " - vendor:%x device:%x\n", dev->id.vendor_id,
323 	       dev->id.device_id);
324 
325 	for (i = 0; i != sizeof(dev->mem_resource) /
326 		sizeof(dev->mem_resource[0]); i++) {
327 		fprintf(f, "   %16.16"PRIx64" %16.16"PRIx64"\n",
328 			dev->mem_resource[i].phys_addr,
329 			dev->mem_resource[i].len);
330 	}
331 	return 0;
332 }
333 
334 /* dump devices on the bus */
335 void
336 rte_pci_dump(FILE *f)
337 {
338 	struct rte_pci_device *dev = NULL;
339 
340 	FOREACH_DEVICE_ON_PCIBUS(dev) {
341 		pci_dump_one_device(f, dev);
342 	}
343 }
344 
345 static int
346 pci_parse(const char *name, void *addr)
347 {
348 	struct rte_pci_addr *out = addr;
349 	struct rte_pci_addr pci_addr;
350 	bool parse;
351 
352 	parse = (rte_pci_addr_parse(name, &pci_addr) == 0);
353 	if (parse && addr != NULL)
354 		*out = pci_addr;
355 	return parse == false;
356 }
357 
358 /* register a driver */
359 void
360 rte_pci_register(struct rte_pci_driver *driver)
361 {
362 	TAILQ_INSERT_TAIL(&rte_pci_bus.driver_list, driver, next);
363 	driver->bus = &rte_pci_bus;
364 }
365 
366 /* unregister a driver */
367 void
368 rte_pci_unregister(struct rte_pci_driver *driver)
369 {
370 	TAILQ_REMOVE(&rte_pci_bus.driver_list, driver, next);
371 	driver->bus = NULL;
372 }
373 
374 /* Add a device to PCI bus */
375 void
376 rte_pci_add_device(struct rte_pci_device *pci_dev)
377 {
378 	TAILQ_INSERT_TAIL(&rte_pci_bus.device_list, pci_dev, next);
379 }
380 
381 /* Insert a device into a predefined position in PCI bus */
382 void
383 rte_pci_insert_device(struct rte_pci_device *exist_pci_dev,
384 		      struct rte_pci_device *new_pci_dev)
385 {
386 	TAILQ_INSERT_BEFORE(exist_pci_dev, new_pci_dev, next);
387 }
388 
389 /* Remove a device from PCI bus */
390 static void
391 rte_pci_remove_device(struct rte_pci_device *pci_dev)
392 {
393 	TAILQ_REMOVE(&rte_pci_bus.device_list, pci_dev, next);
394 }
395 
396 static struct rte_device *
397 pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
398 		const void *data)
399 {
400 	const struct rte_pci_device *pstart;
401 	struct rte_pci_device *pdev;
402 
403 	if (start != NULL) {
404 		pstart = RTE_DEV_TO_PCI_CONST(start);
405 		pdev = TAILQ_NEXT(pstart, next);
406 	} else {
407 		pdev = TAILQ_FIRST(&rte_pci_bus.device_list);
408 	}
409 	while (pdev != NULL) {
410 		if (cmp(&pdev->device, data) == 0)
411 			return &pdev->device;
412 		pdev = TAILQ_NEXT(pdev, next);
413 	}
414 	return NULL;
415 }
416 
417 /*
418  * find the device which encounter the failure, by iterate over all device on
419  * PCI bus to check if the memory failure address is located in the range
420  * of the BARs of the device.
421  */
422 static struct rte_pci_device *
423 pci_find_device_by_addr(const void *failure_addr)
424 {
425 	struct rte_pci_device *pdev = NULL;
426 	uint64_t check_point, start, end, len;
427 	int i;
428 
429 	check_point = (uint64_t)(uintptr_t)failure_addr;
430 
431 	FOREACH_DEVICE_ON_PCIBUS(pdev) {
432 		for (i = 0; i != RTE_DIM(pdev->mem_resource); i++) {
433 			start = (uint64_t)(uintptr_t)pdev->mem_resource[i].addr;
434 			len = pdev->mem_resource[i].len;
435 			end = start + len;
436 			if (check_point >= start && check_point < end) {
437 				RTE_LOG(DEBUG, EAL, "Failure address %16.16"
438 					PRIx64" belongs to device %s!\n",
439 					check_point, pdev->device.name);
440 				return pdev;
441 			}
442 		}
443 	}
444 	return NULL;
445 }
446 
447 static int
448 pci_hot_unplug_handler(struct rte_device *dev)
449 {
450 	struct rte_pci_device *pdev = NULL;
451 	int ret = 0;
452 
453 	pdev = RTE_DEV_TO_PCI(dev);
454 	if (!pdev)
455 		return -1;
456 
457 	switch (pdev->kdrv) {
458 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE
459 	case RTE_KDRV_VFIO:
460 		/*
461 		 * vfio kernel module guaranty the pci device would not be
462 		 * deleted until the user space release the resource, so no
463 		 * need to remap BARs resource here, just directly notify
464 		 * the req event to the user space to handle it.
465 		 */
466 		rte_dev_event_callback_process(dev->name,
467 					       RTE_DEV_EVENT_REMOVE);
468 		break;
469 #endif
470 	case RTE_KDRV_IGB_UIO:
471 	case RTE_KDRV_UIO_GENERIC:
472 	case RTE_KDRV_NIC_UIO:
473 		/* BARs resource is invalid, remap it to be safe. */
474 		ret = pci_uio_remap_resource(pdev);
475 		break;
476 	default:
477 		RTE_LOG(DEBUG, EAL,
478 			"Not managed by a supported kernel driver, skipped\n");
479 		ret = -1;
480 		break;
481 	}
482 
483 	return ret;
484 }
485 
486 static int
487 pci_sigbus_handler(const void *failure_addr)
488 {
489 	struct rte_pci_device *pdev = NULL;
490 	int ret = 0;
491 
492 	pdev = pci_find_device_by_addr(failure_addr);
493 	if (!pdev) {
494 		/* It is a generic sigbus error, no bus would handle it. */
495 		ret = 1;
496 	} else {
497 		/* The sigbus error is caused of hot-unplug. */
498 		ret = pci_hot_unplug_handler(&pdev->device);
499 		if (ret) {
500 			RTE_LOG(ERR, EAL,
501 				"Failed to handle hot-unplug for device %s",
502 				pdev->name);
503 			ret = -1;
504 		}
505 	}
506 	return ret;
507 }
508 
509 static int
510 pci_plug(struct rte_device *dev)
511 {
512 	return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev));
513 }
514 
515 static int
516 pci_unplug(struct rte_device *dev)
517 {
518 	struct rte_pci_device *pdev;
519 	int ret;
520 
521 	pdev = RTE_DEV_TO_PCI(dev);
522 	ret = rte_pci_detach_dev(pdev);
523 	if (ret == 0) {
524 		rte_pci_remove_device(pdev);
525 		rte_devargs_remove(dev->devargs);
526 		free(pdev);
527 	}
528 	return ret;
529 }
530 
531 static int
532 pci_dma_map(struct rte_device *dev, void *addr, uint64_t iova, size_t len)
533 {
534 	struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev);
535 
536 	if (!pdev || !pdev->driver) {
537 		rte_errno = EINVAL;
538 		return -1;
539 	}
540 	if (pdev->driver->dma_map)
541 		return pdev->driver->dma_map(pdev, addr, iova, len);
542 	/**
543 	 *  In case driver don't provides any specific mapping
544 	 *  try fallback to VFIO.
545 	 */
546 	if (pdev->kdrv == RTE_KDRV_VFIO)
547 		return rte_vfio_container_dma_map
548 				(RTE_VFIO_DEFAULT_CONTAINER_FD, (uintptr_t)addr,
549 				 iova, len);
550 	rte_errno = ENOTSUP;
551 	return -1;
552 }
553 
554 static int
555 pci_dma_unmap(struct rte_device *dev, void *addr, uint64_t iova, size_t len)
556 {
557 	struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev);
558 
559 	if (!pdev || !pdev->driver) {
560 		rte_errno = EINVAL;
561 		return -1;
562 	}
563 	if (pdev->driver->dma_unmap)
564 		return pdev->driver->dma_unmap(pdev, addr, iova, len);
565 	/**
566 	 *  In case driver don't provides any specific mapping
567 	 *  try fallback to VFIO.
568 	 */
569 	if (pdev->kdrv == RTE_KDRV_VFIO)
570 		return rte_vfio_container_dma_unmap
571 				(RTE_VFIO_DEFAULT_CONTAINER_FD, (uintptr_t)addr,
572 				 iova, len);
573 	rte_errno = ENOTSUP;
574 	return -1;
575 }
576 
577 struct rte_pci_bus rte_pci_bus = {
578 	.bus = {
579 		.scan = rte_pci_scan,
580 		.probe = rte_pci_probe,
581 		.find_device = pci_find_device,
582 		.plug = pci_plug,
583 		.unplug = pci_unplug,
584 		.parse = pci_parse,
585 		.dma_map = pci_dma_map,
586 		.dma_unmap = pci_dma_unmap,
587 		.get_iommu_class = rte_pci_get_iommu_class,
588 		.dev_iterate = rte_pci_dev_iterate,
589 		.hot_unplug_handler = pci_hot_unplug_handler,
590 		.sigbus_handler = pci_sigbus_handler,
591 	},
592 	.device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
593 	.driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
594 };
595 
596 RTE_REGISTER_BUS(pci, rte_pci_bus.bus);
597