xref: /dpdk/drivers/bus/pci/pci_common.c (revision 849f773b7645216954022a47e466043a23125af9)
1fd4ab1feSOlivier Matz /* SPDX-License-Identifier: BSD-3-Clause
2fd4ab1feSOlivier Matz  * Copyright(c) 2010-2014 Intel Corporation.
3c752998bSGaetan Rivet  * Copyright 2013-2014 6WIND S.A.
4c752998bSGaetan Rivet  */
5c752998bSGaetan Rivet 
6c752998bSGaetan Rivet #include <string.h>
7c752998bSGaetan Rivet #include <inttypes.h>
8c752998bSGaetan Rivet #include <stdint.h>
9e9d159c3SThomas Monjalon #include <stdbool.h>
10c752998bSGaetan Rivet #include <stdlib.h>
11c752998bSGaetan Rivet #include <stdio.h>
12c752998bSGaetan Rivet #include <sys/queue.h>
13c752998bSGaetan Rivet #include <rte_errno.h>
14c752998bSGaetan Rivet #include <rte_interrupts.h>
15c752998bSGaetan Rivet #include <rte_log.h>
16a04322f6SDavid Marchand #include <bus_driver.h>
17c752998bSGaetan Rivet #include <rte_pci.h>
18c752998bSGaetan Rivet #include <rte_bus_pci.h>
19e9b3d79bSDmitry Kozlyuk #include <rte_lcore.h>
20c752998bSGaetan Rivet #include <rte_per_lcore.h>
21c752998bSGaetan Rivet #include <rte_memory.h>
22c752998bSGaetan Rivet #include <rte_eal.h>
23e1ece609SDavid Marchand #include <rte_eal_paging.h>
24c752998bSGaetan Rivet #include <rte_string_fns.h>
25c752998bSGaetan Rivet #include <rte_common.h>
26c752998bSGaetan Rivet #include <rte_devargs.h>
2794d72659SJerin Jacob #include <rte_vfio.h>
281cab1a40SKevin Laatz #include <rte_tailq.h>
29c752998bSGaetan Rivet 
30c752998bSGaetan Rivet #include "private.h"
31c752998bSGaetan Rivet 
32607514e7SStephen Hemminger 
33c752998bSGaetan Rivet #define SYSFS_PCI_DEVICES "/sys/bus/pci/devices"
34c752998bSGaetan Rivet 
rte_pci_get_sysfs_path(void)35c52dd394SThomas Monjalon const char *rte_pci_get_sysfs_path(void)
36c752998bSGaetan Rivet {
37c752998bSGaetan Rivet 	const char *path = NULL;
38c752998bSGaetan Rivet 
39b137f953STal Shnaiderman #ifdef RTE_EXEC_ENV_LINUX
40c752998bSGaetan Rivet 	path = getenv("SYSFS_PCI_DEVICES");
41c752998bSGaetan Rivet 	if (path == NULL)
42c752998bSGaetan Rivet 		return SYSFS_PCI_DEVICES;
43b137f953STal Shnaiderman #endif
44c752998bSGaetan Rivet 
45c752998bSGaetan Rivet 	return path;
46c752998bSGaetan Rivet }
47c752998bSGaetan Rivet 
488f4de2dbSDavid Marchand #ifdef RTE_EXEC_ENV_WINDOWS
498f4de2dbSDavid Marchand #define asprintf pci_asprintf
508f4de2dbSDavid Marchand 
518f4de2dbSDavid Marchand static int
528f4de2dbSDavid Marchand __rte_format_printf(2, 3)
pci_asprintf(char ** buffer,const char * format,...)538f4de2dbSDavid Marchand pci_asprintf(char **buffer, const char *format, ...)
548f4de2dbSDavid Marchand {
558f4de2dbSDavid Marchand 	int size, ret;
568f4de2dbSDavid Marchand 	va_list arg;
578f4de2dbSDavid Marchand 
588f4de2dbSDavid Marchand 	va_start(arg, format);
598f4de2dbSDavid Marchand 	size = vsnprintf(NULL, 0, format, arg);
608f4de2dbSDavid Marchand 	va_end(arg);
618f4de2dbSDavid Marchand 	if (size < 0)
628f4de2dbSDavid Marchand 		return -1;
638f4de2dbSDavid Marchand 	size++;
648f4de2dbSDavid Marchand 
658f4de2dbSDavid Marchand 	*buffer = malloc(size);
668f4de2dbSDavid Marchand 	if (*buffer == NULL)
678f4de2dbSDavid Marchand 		return -1;
688f4de2dbSDavid Marchand 
698f4de2dbSDavid Marchand 	va_start(arg, format);
708f4de2dbSDavid Marchand 	ret = vsnprintf(*buffer, size, format, arg);
718f4de2dbSDavid Marchand 	va_end(arg);
728f4de2dbSDavid Marchand 	if (ret != size - 1) {
738f4de2dbSDavid Marchand 		free(*buffer);
748f4de2dbSDavid Marchand 		return -1;
758f4de2dbSDavid Marchand 	}
768f4de2dbSDavid Marchand 	return ret;
778f4de2dbSDavid Marchand }
788f4de2dbSDavid Marchand #endif /* RTE_EXEC_ENV_WINDOWS */
798f4de2dbSDavid Marchand 
80463a5245SSunil Kumar Kori static struct rte_devargs *
pci_devargs_lookup(const struct rte_pci_addr * pci_addr)81463a5245SSunil Kumar Kori pci_devargs_lookup(const struct rte_pci_addr *pci_addr)
82c752998bSGaetan Rivet {
83c752998bSGaetan Rivet 	struct rte_devargs *devargs;
84c752998bSGaetan Rivet 	struct rte_pci_addr addr;
85c752998bSGaetan Rivet 
867765f0f4SGaetan Rivet 	RTE_EAL_DEVARGS_FOREACH("pci", devargs) {
87c752998bSGaetan Rivet 		devargs->bus->parse(devargs->name, &addr);
88463a5245SSunil Kumar Kori 		if (!rte_pci_addr_cmp(pci_addr, &addr))
89c752998bSGaetan Rivet 			return devargs;
90c752998bSGaetan Rivet 	}
91c752998bSGaetan Rivet 	return NULL;
92c752998bSGaetan Rivet }
93c752998bSGaetan Rivet 
94c752998bSGaetan Rivet void
pci_common_set(struct rte_pci_device * dev)958f4de2dbSDavid Marchand pci_common_set(struct rte_pci_device *dev)
96c752998bSGaetan Rivet {
97c752998bSGaetan Rivet 	struct rte_devargs *devargs;
98c752998bSGaetan Rivet 
99c752998bSGaetan Rivet 	/* Each device has its internal, canonical name set. */
100c752998bSGaetan Rivet 	rte_pci_device_name(&dev->addr,
101c752998bSGaetan Rivet 			dev->name, sizeof(dev->name));
102463a5245SSunil Kumar Kori 	devargs = pci_devargs_lookup(&dev->addr);
103c752998bSGaetan Rivet 	dev->device.devargs = devargs;
104a65a34a8SStephen Hemminger 
105b23fd370SGaetan Rivet 	/* When using a blocklist, only blocked devices will have
106b23fd370SGaetan Rivet 	 * an rte_devargs. Allowed devices won't have one.
107b23fd370SGaetan Rivet 	 */
108c752998bSGaetan Rivet 	if (devargs != NULL)
109c752998bSGaetan Rivet 		/* If an rte_devargs exists, the generic rte_device uses the
11073d15621SRami Rosen 		 * given name as its name.
111c752998bSGaetan Rivet 		 */
112c752998bSGaetan Rivet 		dev->device.name = dev->device.devargs->name;
113c752998bSGaetan Rivet 	else
114c752998bSGaetan Rivet 		/* Otherwise, it uses the internal, canonical form. */
115c752998bSGaetan Rivet 		dev->device.name = dev->name;
1168f4de2dbSDavid Marchand 
117db409227SDavid Marchand 	if (dev->bus_info != NULL ||
118db409227SDavid Marchand 			asprintf(&dev->bus_info, "vendor_id=%"PRIx16", device_id=%"PRIx16,
1198f4de2dbSDavid Marchand 				dev->id.vendor_id, dev->id.device_id) != -1)
1208f4de2dbSDavid Marchand 		dev->device.bus_info = dev->bus_info;
1218f4de2dbSDavid Marchand }
1228f4de2dbSDavid Marchand 
1238f4de2dbSDavid Marchand void
pci_free(struct rte_pci_device_internal * pdev)12487a02023SChenbo Xia pci_free(struct rte_pci_device_internal *pdev)
1258f4de2dbSDavid Marchand {
12687a02023SChenbo Xia 	if (pdev == NULL)
1278f4de2dbSDavid Marchand 		return;
12887a02023SChenbo Xia 	free(pdev->device.bus_info);
12987a02023SChenbo Xia 	free(pdev);
130c752998bSGaetan Rivet }
131c752998bSGaetan Rivet 
132e1ece609SDavid Marchand /* map a particular resource from a file */
133e1ece609SDavid Marchand void *
pci_map_resource(void * requested_addr,int fd,off_t offset,size_t size,int additional_flags)134e1ece609SDavid Marchand pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
135e1ece609SDavid Marchand 		 int additional_flags)
136e1ece609SDavid Marchand {
137e1ece609SDavid Marchand 	void *mapaddr;
138e1ece609SDavid Marchand 
139e1ece609SDavid Marchand 	/* Map the PCI memory resource of device */
140e1ece609SDavid Marchand 	mapaddr = rte_mem_map(requested_addr, size,
141e1ece609SDavid Marchand 		RTE_PROT_READ | RTE_PROT_WRITE,
142e1ece609SDavid Marchand 		RTE_MAP_SHARED | additional_flags, fd, offset);
143e1ece609SDavid Marchand 	if (mapaddr == NULL) {
144*849f773bSDavid Marchand 		PCI_LOG(ERR, "%s(): cannot map resource(%d, %p, 0x%zx, 0x%llx): %s (%p)",
145e1ece609SDavid Marchand 			__func__, fd, requested_addr, size,
146e1ece609SDavid Marchand 			(unsigned long long)offset,
147e1ece609SDavid Marchand 			rte_strerror(rte_errno), mapaddr);
148e1ece609SDavid Marchand 	} else
149*849f773bSDavid Marchand 		PCI_LOG(DEBUG, "  PCI memory mapped at %p", mapaddr);
150e1ece609SDavid Marchand 
151e1ece609SDavid Marchand 	return mapaddr;
152e1ece609SDavid Marchand }
153e1ece609SDavid Marchand 
154e1ece609SDavid Marchand /* unmap a particular resource */
155e1ece609SDavid Marchand void
pci_unmap_resource(void * requested_addr,size_t size)156e1ece609SDavid Marchand pci_unmap_resource(void *requested_addr, size_t size)
157e1ece609SDavid Marchand {
158e1ece609SDavid Marchand 	if (requested_addr == NULL)
159e1ece609SDavid Marchand 		return;
160e1ece609SDavid Marchand 
161e1ece609SDavid Marchand 	/* Unmap the PCI memory resource of device */
162e1ece609SDavid Marchand 	if (rte_mem_unmap(requested_addr, size)) {
163*849f773bSDavid Marchand 		PCI_LOG(ERR, "%s(): cannot mem unmap(%p, %#zx): %s",
164e1ece609SDavid Marchand 			__func__, requested_addr, size,
165e1ece609SDavid Marchand 			rte_strerror(rte_errno));
166e1ece609SDavid Marchand 	} else
167*849f773bSDavid Marchand 		PCI_LOG(DEBUG, "  PCI memory unmapped at %p", requested_addr);
168e1ece609SDavid Marchand }
169c752998bSGaetan Rivet /*
170c752998bSGaetan Rivet  * Match the PCI Driver and Device using the ID Table
171c752998bSGaetan Rivet  */
172c752998bSGaetan Rivet int
rte_pci_match(const struct rte_pci_driver * pci_drv,const struct rte_pci_device * pci_dev)173c752998bSGaetan Rivet rte_pci_match(const struct rte_pci_driver *pci_drv,
174c752998bSGaetan Rivet 	      const struct rte_pci_device *pci_dev)
175c752998bSGaetan Rivet {
176c752998bSGaetan Rivet 	const struct rte_pci_id *id_table;
177c752998bSGaetan Rivet 
178c752998bSGaetan Rivet 	for (id_table = pci_drv->id_table; id_table->vendor_id != 0;
179c752998bSGaetan Rivet 	     id_table++) {
180c752998bSGaetan Rivet 		/* check if device's identifiers match the driver's ones */
181c752998bSGaetan Rivet 		if (id_table->vendor_id != pci_dev->id.vendor_id &&
1824d509afaSThomas Monjalon 				id_table->vendor_id != RTE_PCI_ANY_ID)
183c752998bSGaetan Rivet 			continue;
184c752998bSGaetan Rivet 		if (id_table->device_id != pci_dev->id.device_id &&
1854d509afaSThomas Monjalon 				id_table->device_id != RTE_PCI_ANY_ID)
186c752998bSGaetan Rivet 			continue;
187c752998bSGaetan Rivet 		if (id_table->subsystem_vendor_id !=
188c752998bSGaetan Rivet 		    pci_dev->id.subsystem_vendor_id &&
1894d509afaSThomas Monjalon 		    id_table->subsystem_vendor_id != RTE_PCI_ANY_ID)
190c752998bSGaetan Rivet 			continue;
191c752998bSGaetan Rivet 		if (id_table->subsystem_device_id !=
192c752998bSGaetan Rivet 		    pci_dev->id.subsystem_device_id &&
1934d509afaSThomas Monjalon 		    id_table->subsystem_device_id != RTE_PCI_ANY_ID)
194c752998bSGaetan Rivet 			continue;
195c752998bSGaetan Rivet 		if (id_table->class_id != pci_dev->id.class_id &&
196c752998bSGaetan Rivet 				id_table->class_id != RTE_CLASS_ANY_ID)
197c752998bSGaetan Rivet 			continue;
198c752998bSGaetan Rivet 
199c752998bSGaetan Rivet 		return 1;
200c752998bSGaetan Rivet 	}
201c752998bSGaetan Rivet 
202c752998bSGaetan Rivet 	return 0;
203c752998bSGaetan Rivet }
204c752998bSGaetan Rivet 
205c752998bSGaetan Rivet /*
206c752998bSGaetan Rivet  * If vendor/device ID match, call the probe() function of the
207c752998bSGaetan Rivet  * driver.
208c752998bSGaetan Rivet  */
209c752998bSGaetan Rivet static int
rte_pci_probe_one_driver(struct rte_pci_driver * dr,struct rte_pci_device * dev)210c752998bSGaetan Rivet rte_pci_probe_one_driver(struct rte_pci_driver *dr,
211c752998bSGaetan Rivet 			 struct rte_pci_device *dev)
212c752998bSGaetan Rivet {
213c752998bSGaetan Rivet 	int ret;
214e9d159c3SThomas Monjalon 	bool already_probed;
215c752998bSGaetan Rivet 	struct rte_pci_addr *loc;
216c752998bSGaetan Rivet 
217c752998bSGaetan Rivet 	if ((dr == NULL) || (dev == NULL))
218c752998bSGaetan Rivet 		return -EINVAL;
219c752998bSGaetan Rivet 
220c752998bSGaetan Rivet 	loc = &dev->addr;
221c752998bSGaetan Rivet 
222a65a34a8SStephen Hemminger 	/* The device is not blocked; Check if driver supports it */
223c752998bSGaetan Rivet 	if (!rte_pci_match(dr, dev))
224c752998bSGaetan Rivet 		/* Match of device and driver failed */
225c752998bSGaetan Rivet 		return 1;
226c752998bSGaetan Rivet 
227*849f773bSDavid Marchand 	PCI_LOG(DEBUG, "PCI device "PCI_PRI_FMT" on NUMA socket %i",
228c752998bSGaetan Rivet 		loc->domain, loc->bus, loc->devid, loc->function,
229c752998bSGaetan Rivet 		dev->device.numa_node);
230c752998bSGaetan Rivet 
231a65a34a8SStephen Hemminger 	/* no initialization when marked as blocked, return without error */
232c752998bSGaetan Rivet 	if (dev->device.devargs != NULL &&
233a65a34a8SStephen Hemminger 		dev->device.devargs->policy == RTE_DEV_BLOCKED) {
234*849f773bSDavid Marchand 		PCI_LOG(INFO, "  Device is blocked, not initializing");
235c752998bSGaetan Rivet 		return 1;
236c752998bSGaetan Rivet 	}
237c752998bSGaetan Rivet 
2387dcd73e3SOlivier Matz 	if (dev->device.numa_node < 0 && rte_socket_count() > 1)
239*849f773bSDavid Marchand 		PCI_LOG(INFO, "Device %s is not NUMA-aware", dev->name);
240c752998bSGaetan Rivet 
241e9d159c3SThomas Monjalon 	already_probed = rte_dev_is_probed(&dev->device);
242e9d159c3SThomas Monjalon 	if (already_probed && !(dr->drv_flags & RTE_PCI_DRV_PROBE_AGAIN)) {
243*849f773bSDavid Marchand 		PCI_LOG(DEBUG, "Device %s is already probed", dev->device.name);
244e9d159c3SThomas Monjalon 		return -EEXIST;
245e9d159c3SThomas Monjalon 	}
246e9d159c3SThomas Monjalon 
247*849f773bSDavid Marchand 	PCI_LOG(DEBUG, "  probe driver: %x:%x %s", dev->id.vendor_id,
248c752998bSGaetan Rivet 		dev->id.device_id, dr->driver.name);
249c752998bSGaetan Rivet 
250b76fafb1SDavid Marchand 	if (!already_probed) {
251b76fafb1SDavid Marchand 		enum rte_iova_mode dev_iova_mode;
252b76fafb1SDavid Marchand 		enum rte_iova_mode iova_mode;
253b76fafb1SDavid Marchand 
254b76fafb1SDavid Marchand 		dev_iova_mode = pci_device_iova_mode(dr, dev);
255b76fafb1SDavid Marchand 		iova_mode = rte_eal_iova_mode();
256b76fafb1SDavid Marchand 		if (dev_iova_mode != RTE_IOVA_DC &&
257b76fafb1SDavid Marchand 		    dev_iova_mode != iova_mode) {
258*849f773bSDavid Marchand 			PCI_LOG(ERR, "  Expecting '%s' IOVA mode but current mode is '%s', not initializing",
259b76fafb1SDavid Marchand 				dev_iova_mode == RTE_IOVA_PA ? "PA" : "VA",
260b76fafb1SDavid Marchand 				iova_mode == RTE_IOVA_PA ? "PA" : "VA");
261b76fafb1SDavid Marchand 			return -EINVAL;
262b76fafb1SDavid Marchand 		}
263b76fafb1SDavid Marchand 
264d61138d4SHarman Kalra 		/* Allocate interrupt instance for pci device */
265d61138d4SHarman Kalra 		dev->intr_handle =
266d61138d4SHarman Kalra 			rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_PRIVATE);
267d61138d4SHarman Kalra 		if (dev->intr_handle == NULL) {
268*849f773bSDavid Marchand 			PCI_LOG(ERR, "Failed to create interrupt instance for %s",
269d61138d4SHarman Kalra 				dev->device.name);
270d61138d4SHarman Kalra 			return -ENOMEM;
271b76fafb1SDavid Marchand 		}
2721d20a073SRafal Kozik 
273d61138d4SHarman Kalra 		dev->vfio_req_intr_handle =
274d61138d4SHarman Kalra 			rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_PRIVATE);
275d61138d4SHarman Kalra 		if (dev->vfio_req_intr_handle == NULL) {
276d61138d4SHarman Kalra 			rte_intr_instance_free(dev->intr_handle);
277d61138d4SHarman Kalra 			dev->intr_handle = NULL;
278*849f773bSDavid Marchand 			PCI_LOG(ERR, "Failed to create vfio req interrupt instance for %s",
279d61138d4SHarman Kalra 				dev->device.name);
280d61138d4SHarman Kalra 			return -ENOMEM;
281d61138d4SHarman Kalra 		}
282d61138d4SHarman Kalra 
283d1ef517dSDavid Marchand 		/*
284d1ef517dSDavid Marchand 		 * Reference driver structure.
285d1ef517dSDavid Marchand 		 * This needs to be before rte_pci_map_device(), as it enables
286d1ef517dSDavid Marchand 		 * to use driver flags for adjusting configuration.
287d1ef517dSDavid Marchand 		 */
2886b92f184SMichal Krawczyk 		dev->driver = dr;
289d1ef517dSDavid Marchand 		if (dev->driver->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
290c752998bSGaetan Rivet 			ret = rte_pci_map_device(dev);
2911d20a073SRafal Kozik 			if (ret != 0) {
2926b92f184SMichal Krawczyk 				dev->driver = NULL;
293d61138d4SHarman Kalra 				rte_intr_instance_free(dev->vfio_req_intr_handle);
294d61138d4SHarman Kalra 				dev->vfio_req_intr_handle = NULL;
295d61138d4SHarman Kalra 				rte_intr_instance_free(dev->intr_handle);
296d61138d4SHarman Kalra 				dev->intr_handle = NULL;
297c752998bSGaetan Rivet 				return ret;
298c752998bSGaetan Rivet 			}
2991d20a073SRafal Kozik 		}
300d61138d4SHarman Kalra 	}
301d61138d4SHarman Kalra 
302*849f773bSDavid Marchand 	PCI_LOG(INFO, "Probe PCI driver: %s (%x:%04x) device: "PCI_PRI_FMT" (socket %i)",
303e4f27af0SJerin Jacob 		dr->driver.name, dev->id.vendor_id, dev->id.device_id,
304e4f27af0SJerin Jacob 		loc->domain, loc->bus, loc->devid, loc->function,
305e4f27af0SJerin Jacob 		dev->device.numa_node);
306c752998bSGaetan Rivet 	/* call the driver probe() function */
307c752998bSGaetan Rivet 	ret = dr->probe(dr, dev);
308e9d159c3SThomas Monjalon 	if (already_probed)
309e9d159c3SThomas Monjalon 		return ret; /* no rollback if already succeeded earlier */
310c752998bSGaetan Rivet 	if (ret) {
311c752998bSGaetan Rivet 		dev->driver = NULL;
312c752998bSGaetan Rivet 		if ((dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) &&
313c752998bSGaetan Rivet 			/* Don't unmap if device is unsupported and
314c752998bSGaetan Rivet 			 * driver needs mapped resources.
315c752998bSGaetan Rivet 			 */
316c752998bSGaetan Rivet 			!(ret > 0 &&
317c752998bSGaetan Rivet 				(dr->drv_flags & RTE_PCI_DRV_KEEP_MAPPED_RES)))
318c752998bSGaetan Rivet 			rte_pci_unmap_device(dev);
319f2777b53SDavid Marchand 		rte_intr_instance_free(dev->vfio_req_intr_handle);
320f2777b53SDavid Marchand 		dev->vfio_req_intr_handle = NULL;
321f2777b53SDavid Marchand 		rte_intr_instance_free(dev->intr_handle);
322f2777b53SDavid Marchand 		dev->intr_handle = NULL;
323391797f0SThomas Monjalon 	} else {
324391797f0SThomas Monjalon 		dev->device.driver = &dr->driver;
325c752998bSGaetan Rivet 	}
326c752998bSGaetan Rivet 
327c752998bSGaetan Rivet 	return ret;
328c752998bSGaetan Rivet }
329c752998bSGaetan Rivet 
330c752998bSGaetan Rivet /*
331c752998bSGaetan Rivet  * If vendor/device ID match, call the remove() function of the
332c752998bSGaetan Rivet  * driver.
333c752998bSGaetan Rivet  */
334c752998bSGaetan Rivet static int
rte_pci_detach_dev(struct rte_pci_device * dev)335c752998bSGaetan Rivet rte_pci_detach_dev(struct rte_pci_device *dev)
336c752998bSGaetan Rivet {
337c752998bSGaetan Rivet 	struct rte_pci_addr *loc;
338c752998bSGaetan Rivet 	struct rte_pci_driver *dr;
339c752998bSGaetan Rivet 	int ret = 0;
340c752998bSGaetan Rivet 
341c752998bSGaetan Rivet 	if (dev == NULL)
342c752998bSGaetan Rivet 		return -EINVAL;
343c752998bSGaetan Rivet 
344c752998bSGaetan Rivet 	dr = dev->driver;
345c752998bSGaetan Rivet 	loc = &dev->addr;
346c752998bSGaetan Rivet 
347*849f773bSDavid Marchand 	PCI_LOG(DEBUG, "PCI device "PCI_PRI_FMT" on NUMA socket %i",
348c752998bSGaetan Rivet 		loc->domain, loc->bus, loc->devid,
349c752998bSGaetan Rivet 		loc->function, dev->device.numa_node);
350c752998bSGaetan Rivet 
351*849f773bSDavid Marchand 	PCI_LOG(DEBUG, "  remove driver: %x:%x %s", dev->id.vendor_id,
352c752998bSGaetan Rivet 		dev->id.device_id, dr->driver.name);
353c752998bSGaetan Rivet 
354c752998bSGaetan Rivet 	if (dr->remove) {
355c752998bSGaetan Rivet 		ret = dr->remove(dev);
356c752998bSGaetan Rivet 		if (ret < 0)
357c752998bSGaetan Rivet 			return ret;
358c752998bSGaetan Rivet 	}
359c752998bSGaetan Rivet 
360c752998bSGaetan Rivet 	/* clear driver structure */
361c752998bSGaetan Rivet 	dev->driver = NULL;
362fd03a747SMatan Azrad 	dev->device.driver = NULL;
363c752998bSGaetan Rivet 
364c752998bSGaetan Rivet 	if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)
365c752998bSGaetan Rivet 		/* unmap resources for devices that use igb_uio */
366c752998bSGaetan Rivet 		rte_pci_unmap_device(dev);
367c752998bSGaetan Rivet 
368f2777b53SDavid Marchand 	rte_intr_instance_free(dev->intr_handle);
369f2777b53SDavid Marchand 	dev->intr_handle = NULL;
370f2777b53SDavid Marchand 	rte_intr_instance_free(dev->vfio_req_intr_handle);
371f2777b53SDavid Marchand 	dev->vfio_req_intr_handle = NULL;
372f2777b53SDavid Marchand 
373c752998bSGaetan Rivet 	return 0;
374c752998bSGaetan Rivet }
375c752998bSGaetan Rivet 
376c752998bSGaetan Rivet /*
377c752998bSGaetan Rivet  * If vendor/device ID match, call the probe() function of all
3783f2ef279SDarek Stojaczyk  * registered driver for the given device. Return < 0 if initialization
379c752998bSGaetan Rivet  * failed, return 1 if no driver is found for this device.
380c752998bSGaetan Rivet  */
381c752998bSGaetan Rivet static int
pci_probe_all_drivers(struct rte_pci_device * dev)382c752998bSGaetan Rivet pci_probe_all_drivers(struct rte_pci_device *dev)
383c752998bSGaetan Rivet {
384c752998bSGaetan Rivet 	struct rte_pci_driver *dr = NULL;
385c752998bSGaetan Rivet 	int rc = 0;
386c752998bSGaetan Rivet 
387c752998bSGaetan Rivet 	if (dev == NULL)
3883f2ef279SDarek Stojaczyk 		return -EINVAL;
389c752998bSGaetan Rivet 
390c752998bSGaetan Rivet 	FOREACH_DRIVER_ON_PCIBUS(dr) {
391c752998bSGaetan Rivet 		rc = rte_pci_probe_one_driver(dr, dev);
392c752998bSGaetan Rivet 		if (rc < 0)
393c752998bSGaetan Rivet 			/* negative value is an error */
3943f2ef279SDarek Stojaczyk 			return rc;
395c752998bSGaetan Rivet 		if (rc > 0)
396c752998bSGaetan Rivet 			/* positive value means driver doesn't support it */
397c752998bSGaetan Rivet 			continue;
398c752998bSGaetan Rivet 		return 0;
399c752998bSGaetan Rivet 	}
400c752998bSGaetan Rivet 	return 1;
401c752998bSGaetan Rivet }
402c752998bSGaetan Rivet 
403c752998bSGaetan Rivet /*
404c752998bSGaetan Rivet  * Scan the content of the PCI bus, and call the probe() function for
405c752998bSGaetan Rivet  * all registered drivers that have a matching entry in its id_table
406c752998bSGaetan Rivet  * for discovered devices.
407c752998bSGaetan Rivet  */
40887db93e0SDavid Marchand static int
pci_probe(void)40987db93e0SDavid Marchand pci_probe(void)
410c752998bSGaetan Rivet {
411c752998bSGaetan Rivet 	struct rte_pci_device *dev = NULL;
412c752998bSGaetan Rivet 	size_t probed = 0, failed = 0;
413c752998bSGaetan Rivet 	int ret = 0;
414c752998bSGaetan Rivet 
415c752998bSGaetan Rivet 	FOREACH_DEVICE_ON_PCIBUS(dev) {
416c752998bSGaetan Rivet 		probed++;
417c752998bSGaetan Rivet 
418c752998bSGaetan Rivet 		ret = pci_probe_all_drivers(dev);
419c752998bSGaetan Rivet 		if (ret < 0) {
420e9d159c3SThomas Monjalon 			if (ret != -EEXIST) {
421*849f773bSDavid Marchand 				PCI_LOG(ERR, "Requested device " PCI_PRI_FMT " cannot be used",
422e9d159c3SThomas Monjalon 					dev->addr.domain, dev->addr.bus,
423c752998bSGaetan Rivet 					dev->addr.devid, dev->addr.function);
424c752998bSGaetan Rivet 				rte_errno = errno;
425c752998bSGaetan Rivet 				failed++;
426e9d159c3SThomas Monjalon 			}
427c752998bSGaetan Rivet 			ret = 0;
428c752998bSGaetan Rivet 		}
429c752998bSGaetan Rivet 	}
430c752998bSGaetan Rivet 
431c752998bSGaetan Rivet 	return (probed && probed == failed) ? -1 : 0;
432c752998bSGaetan Rivet }
433c752998bSGaetan Rivet 
4341cab1a40SKevin Laatz static int
pci_cleanup(void)4351cab1a40SKevin Laatz pci_cleanup(void)
4361cab1a40SKevin Laatz {
4371cab1a40SKevin Laatz 	struct rte_pci_device *dev, *tmp_dev;
4381cab1a40SKevin Laatz 	int error = 0;
4391cab1a40SKevin Laatz 
4401cab1a40SKevin Laatz 	RTE_TAILQ_FOREACH_SAFE(dev, &rte_pci_bus.device_list, next, tmp_dev) {
4411cab1a40SKevin Laatz 		struct rte_pci_driver *drv = dev->driver;
4421cab1a40SKevin Laatz 		int ret = 0;
4431cab1a40SKevin Laatz 
4441cab1a40SKevin Laatz 		if (drv == NULL || drv->remove == NULL)
445deb44af7SVolodymyr Fialko 			goto free;
4461cab1a40SKevin Laatz 
4471cab1a40SKevin Laatz 		ret = drv->remove(dev);
4481cab1a40SKevin Laatz 		if (ret < 0) {
4491cab1a40SKevin Laatz 			rte_errno = errno;
4501cab1a40SKevin Laatz 			error = -1;
4511cab1a40SKevin Laatz 		}
4521cab1a40SKevin Laatz 		dev->driver = NULL;
4531cab1a40SKevin Laatz 		dev->device.driver = NULL;
454d5c39874SKevin Laatz 
455deb44af7SVolodymyr Fialko free:
456d5c39874SKevin Laatz 		/* free interrupt handles */
457d5c39874SKevin Laatz 		rte_intr_instance_free(dev->intr_handle);
458d5c39874SKevin Laatz 		dev->intr_handle = NULL;
459d5c39874SKevin Laatz 		rte_intr_instance_free(dev->vfio_req_intr_handle);
460d5c39874SKevin Laatz 		dev->vfio_req_intr_handle = NULL;
461d5c39874SKevin Laatz 
46287a02023SChenbo Xia 		pci_free(RTE_PCI_DEVICE_INTERNAL(dev));
4631cab1a40SKevin Laatz 	}
4641cab1a40SKevin Laatz 
4651cab1a40SKevin Laatz 	return error;
4661cab1a40SKevin Laatz }
4671cab1a40SKevin Laatz 
468c752998bSGaetan Rivet /* dump one device */
469c752998bSGaetan Rivet static int
pci_dump_one_device(FILE * f,struct rte_pci_device * dev)470c752998bSGaetan Rivet pci_dump_one_device(FILE *f, struct rte_pci_device *dev)
471c752998bSGaetan Rivet {
472c752998bSGaetan Rivet 	int i;
473c752998bSGaetan Rivet 
474c752998bSGaetan Rivet 	fprintf(f, PCI_PRI_FMT, dev->addr.domain, dev->addr.bus,
475c752998bSGaetan Rivet 	       dev->addr.devid, dev->addr.function);
476c752998bSGaetan Rivet 	fprintf(f, " - vendor:%x device:%x\n", dev->id.vendor_id,
477c752998bSGaetan Rivet 	       dev->id.device_id);
478c752998bSGaetan Rivet 
479c752998bSGaetan Rivet 	for (i = 0; i != sizeof(dev->mem_resource) /
480c752998bSGaetan Rivet 		sizeof(dev->mem_resource[0]); i++) {
481c752998bSGaetan Rivet 		fprintf(f, "   %16.16"PRIx64" %16.16"PRIx64"\n",
482c752998bSGaetan Rivet 			dev->mem_resource[i].phys_addr,
483c752998bSGaetan Rivet 			dev->mem_resource[i].len);
484c752998bSGaetan Rivet 	}
485c752998bSGaetan Rivet 	return 0;
486c752998bSGaetan Rivet }
487c752998bSGaetan Rivet 
488c752998bSGaetan Rivet /* dump devices on the bus */
489c752998bSGaetan Rivet void
rte_pci_dump(FILE * f)490c752998bSGaetan Rivet rte_pci_dump(FILE *f)
491c752998bSGaetan Rivet {
492c752998bSGaetan Rivet 	struct rte_pci_device *dev = NULL;
493c752998bSGaetan Rivet 
494c752998bSGaetan Rivet 	FOREACH_DEVICE_ON_PCIBUS(dev) {
495c752998bSGaetan Rivet 		pci_dump_one_device(f, dev);
496c752998bSGaetan Rivet 	}
497c752998bSGaetan Rivet }
498c752998bSGaetan Rivet 
499c752998bSGaetan Rivet static int
pci_parse(const char * name,void * addr)500c752998bSGaetan Rivet pci_parse(const char *name, void *addr)
501c752998bSGaetan Rivet {
502c752998bSGaetan Rivet 	struct rte_pci_addr *out = addr;
503c752998bSGaetan Rivet 	struct rte_pci_addr pci_addr;
504c752998bSGaetan Rivet 	bool parse;
505c752998bSGaetan Rivet 
5060e3ef055SGaetan Rivet 	parse = (rte_pci_addr_parse(name, &pci_addr) == 0);
507c752998bSGaetan Rivet 	if (parse && addr != NULL)
508c752998bSGaetan Rivet 		*out = pci_addr;
509c752998bSGaetan Rivet 	return parse == false;
510c752998bSGaetan Rivet }
511c752998bSGaetan Rivet 
512c752998bSGaetan Rivet /* register a driver */
513c752998bSGaetan Rivet void
rte_pci_register(struct rte_pci_driver * driver)514c752998bSGaetan Rivet rte_pci_register(struct rte_pci_driver *driver)
515c752998bSGaetan Rivet {
516c752998bSGaetan Rivet 	TAILQ_INSERT_TAIL(&rte_pci_bus.driver_list, driver, next);
517c752998bSGaetan Rivet }
518c752998bSGaetan Rivet 
519c752998bSGaetan Rivet /* unregister a driver */
520c752998bSGaetan Rivet void
rte_pci_unregister(struct rte_pci_driver * driver)521c752998bSGaetan Rivet rte_pci_unregister(struct rte_pci_driver *driver)
522c752998bSGaetan Rivet {
523c752998bSGaetan Rivet 	TAILQ_REMOVE(&rte_pci_bus.driver_list, driver, next);
524c752998bSGaetan Rivet }
525c752998bSGaetan Rivet 
526c752998bSGaetan Rivet /* Add a device to PCI bus */
527c752998bSGaetan Rivet void
rte_pci_add_device(struct rte_pci_device * pci_dev)528c752998bSGaetan Rivet rte_pci_add_device(struct rte_pci_device *pci_dev)
529c752998bSGaetan Rivet {
530c752998bSGaetan Rivet 	TAILQ_INSERT_TAIL(&rte_pci_bus.device_list, pci_dev, next);
531c752998bSGaetan Rivet }
532c752998bSGaetan Rivet 
533c752998bSGaetan Rivet /* Insert a device into a predefined position in PCI bus */
534c752998bSGaetan Rivet void
rte_pci_insert_device(struct rte_pci_device * exist_pci_dev,struct rte_pci_device * new_pci_dev)535c752998bSGaetan Rivet rte_pci_insert_device(struct rte_pci_device *exist_pci_dev,
536c752998bSGaetan Rivet 		      struct rte_pci_device *new_pci_dev)
537c752998bSGaetan Rivet {
538c752998bSGaetan Rivet 	TAILQ_INSERT_BEFORE(exist_pci_dev, new_pci_dev, next);
539c752998bSGaetan Rivet }
540c752998bSGaetan Rivet 
541c752998bSGaetan Rivet /* Remove a device from PCI bus */
542607514e7SStephen Hemminger static void
rte_pci_remove_device(struct rte_pci_device * pci_dev)543c752998bSGaetan Rivet rte_pci_remove_device(struct rte_pci_device *pci_dev)
544c752998bSGaetan Rivet {
545c752998bSGaetan Rivet 	TAILQ_REMOVE(&rte_pci_bus.device_list, pci_dev, next);
546c752998bSGaetan Rivet }
547c752998bSGaetan Rivet 
548c752998bSGaetan Rivet static struct rte_device *
pci_find_device(const struct rte_device * start,rte_dev_cmp_t cmp,const void * data)549c752998bSGaetan Rivet pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
550c752998bSGaetan Rivet 		const void *data)
551c752998bSGaetan Rivet {
55264de7e40SGaetan Rivet 	const struct rte_pci_device *pstart;
55364de7e40SGaetan Rivet 	struct rte_pci_device *pdev;
554c752998bSGaetan Rivet 
55564de7e40SGaetan Rivet 	if (start != NULL) {
55664de7e40SGaetan Rivet 		pstart = RTE_DEV_TO_PCI_CONST(start);
55764de7e40SGaetan Rivet 		pdev = TAILQ_NEXT(pstart, next);
55864de7e40SGaetan Rivet 	} else {
55964de7e40SGaetan Rivet 		pdev = TAILQ_FIRST(&rte_pci_bus.device_list);
560c752998bSGaetan Rivet 	}
56164de7e40SGaetan Rivet 	while (pdev != NULL) {
56264de7e40SGaetan Rivet 		if (cmp(&pdev->device, data) == 0)
56364de7e40SGaetan Rivet 			return &pdev->device;
56464de7e40SGaetan Rivet 		pdev = TAILQ_NEXT(pdev, next);
565c752998bSGaetan Rivet 	}
566c752998bSGaetan Rivet 	return NULL;
567c752998bSGaetan Rivet }
568c752998bSGaetan Rivet 
5695c96a299SJeff Guo /*
5705c96a299SJeff Guo  * find the device which encounter the failure, by iterate over all device on
5715c96a299SJeff Guo  * PCI bus to check if the memory failure address is located in the range
5725c96a299SJeff Guo  * of the BARs of the device.
5735c96a299SJeff Guo  */
5745c96a299SJeff Guo static struct rte_pci_device *
pci_find_device_by_addr(const void * failure_addr)5755c96a299SJeff Guo pci_find_device_by_addr(const void *failure_addr)
5765c96a299SJeff Guo {
5775c96a299SJeff Guo 	struct rte_pci_device *pdev = NULL;
5785c96a299SJeff Guo 	uint64_t check_point, start, end, len;
5795c96a299SJeff Guo 	int i;
5805c96a299SJeff Guo 
5815c96a299SJeff Guo 	check_point = (uint64_t)(uintptr_t)failure_addr;
5825c96a299SJeff Guo 
5835c96a299SJeff Guo 	FOREACH_DEVICE_ON_PCIBUS(pdev) {
5845c96a299SJeff Guo 		for (i = 0; i != RTE_DIM(pdev->mem_resource); i++) {
5855c96a299SJeff Guo 			start = (uint64_t)(uintptr_t)pdev->mem_resource[i].addr;
5865c96a299SJeff Guo 			len = pdev->mem_resource[i].len;
5875c96a299SJeff Guo 			end = start + len;
5885c96a299SJeff Guo 			if (check_point >= start && check_point < end) {
589*849f773bSDavid Marchand 				PCI_LOG(DEBUG, "Failure address %16.16"
590*849f773bSDavid Marchand 					PRIx64" belongs to device %s!",
5915c96a299SJeff Guo 					check_point, pdev->device.name);
5925c96a299SJeff Guo 				return pdev;
5935c96a299SJeff Guo 			}
5945c96a299SJeff Guo 		}
5955c96a299SJeff Guo 	}
5965c96a299SJeff Guo 	return NULL;
5975c96a299SJeff Guo }
5985c96a299SJeff Guo 
599c752998bSGaetan Rivet static int
pci_hot_unplug_handler(struct rte_device * dev)600b01dc3daSJeff Guo pci_hot_unplug_handler(struct rte_device *dev)
601b01dc3daSJeff Guo {
602b01dc3daSJeff Guo 	struct rte_pci_device *pdev = NULL;
603b01dc3daSJeff Guo 	int ret = 0;
604b01dc3daSJeff Guo 
605b01dc3daSJeff Guo 	pdev = RTE_DEV_TO_PCI(dev);
606b01dc3daSJeff Guo 	if (!pdev)
607b01dc3daSJeff Guo 		return -1;
608b01dc3daSJeff Guo 
609b01dc3daSJeff Guo 	switch (pdev->kdrv) {
610cda94419SJeff Guo #ifdef HAVE_VFIO_DEV_REQ_INTERFACE
6117c0d798aSDavid Marchand 	case RTE_PCI_KDRV_VFIO:
612c115fd00SJeff Guo 		/*
613c115fd00SJeff Guo 		 * vfio kernel module guaranty the pci device would not be
614c115fd00SJeff Guo 		 * deleted until the user space release the resource, so no
615c115fd00SJeff Guo 		 * need to remap BARs resource here, just directly notify
616c115fd00SJeff Guo 		 * the req event to the user space to handle it.
617c115fd00SJeff Guo 		 */
618c115fd00SJeff Guo 		rte_dev_event_callback_process(dev->name,
619c115fd00SJeff Guo 					       RTE_DEV_EVENT_REMOVE);
620c115fd00SJeff Guo 		break;
621cda94419SJeff Guo #endif
6227c0d798aSDavid Marchand 	case RTE_PCI_KDRV_IGB_UIO:
6237c0d798aSDavid Marchand 	case RTE_PCI_KDRV_UIO_GENERIC:
6247c0d798aSDavid Marchand 	case RTE_PCI_KDRV_NIC_UIO:
625b01dc3daSJeff Guo 		/* BARs resource is invalid, remap it to be safe. */
626b01dc3daSJeff Guo 		ret = pci_uio_remap_resource(pdev);
627b01dc3daSJeff Guo 		break;
628b01dc3daSJeff Guo 	default:
629*849f773bSDavid Marchand 		PCI_LOG(DEBUG, "Not managed by a supported kernel driver, skipped");
630b01dc3daSJeff Guo 		ret = -1;
631b01dc3daSJeff Guo 		break;
632b01dc3daSJeff Guo 	}
633b01dc3daSJeff Guo 
634b01dc3daSJeff Guo 	return ret;
635b01dc3daSJeff Guo }
636b01dc3daSJeff Guo 
637b01dc3daSJeff Guo static int
pci_sigbus_handler(const void * failure_addr)6385c96a299SJeff Guo pci_sigbus_handler(const void *failure_addr)
6395c96a299SJeff Guo {
6405c96a299SJeff Guo 	struct rte_pci_device *pdev = NULL;
6415c96a299SJeff Guo 	int ret = 0;
6425c96a299SJeff Guo 
6435c96a299SJeff Guo 	pdev = pci_find_device_by_addr(failure_addr);
6445c96a299SJeff Guo 	if (!pdev) {
6455c96a299SJeff Guo 		/* It is a generic sigbus error, no bus would handle it. */
6465c96a299SJeff Guo 		ret = 1;
6475c96a299SJeff Guo 	} else {
6485c96a299SJeff Guo 		/* The sigbus error is caused of hot-unplug. */
6495c96a299SJeff Guo 		ret = pci_hot_unplug_handler(&pdev->device);
6505c96a299SJeff Guo 		if (ret) {
651*849f773bSDavid Marchand 			PCI_LOG(ERR, "Failed to handle hot-unplug for device %s",
6525c96a299SJeff Guo 				pdev->name);
6535c96a299SJeff Guo 			ret = -1;
6545c96a299SJeff Guo 		}
6555c96a299SJeff Guo 	}
6565c96a299SJeff Guo 	return ret;
6575c96a299SJeff Guo }
6585c96a299SJeff Guo 
6595c96a299SJeff Guo static int
pci_plug(struct rte_device * dev)660c752998bSGaetan Rivet pci_plug(struct rte_device *dev)
661c752998bSGaetan Rivet {
662c752998bSGaetan Rivet 	return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev));
663c752998bSGaetan Rivet }
664c752998bSGaetan Rivet 
665c752998bSGaetan Rivet static int
pci_unplug(struct rte_device * dev)666c752998bSGaetan Rivet pci_unplug(struct rte_device *dev)
667c752998bSGaetan Rivet {
668c752998bSGaetan Rivet 	struct rte_pci_device *pdev;
669c752998bSGaetan Rivet 	int ret;
670c752998bSGaetan Rivet 
671c752998bSGaetan Rivet 	pdev = RTE_DEV_TO_PCI(dev);
672c752998bSGaetan Rivet 	ret = rte_pci_detach_dev(pdev);
673c752998bSGaetan Rivet 	if (ret == 0) {
674c752998bSGaetan Rivet 		rte_pci_remove_device(pdev);
675739e13bcSThomas Monjalon 		rte_devargs_remove(dev->devargs);
67687a02023SChenbo Xia 		pci_free(RTE_PCI_DEVICE_INTERNAL(pdev));
677c752998bSGaetan Rivet 	}
678c752998bSGaetan Rivet 	return ret;
679c752998bSGaetan Rivet }
680c752998bSGaetan Rivet 
681c33a675bSShahaf Shuler static int
pci_dma_map(struct rte_device * dev,void * addr,uint64_t iova,size_t len)682c33a675bSShahaf Shuler pci_dma_map(struct rte_device *dev, void *addr, uint64_t iova, size_t len)
683c33a675bSShahaf Shuler {
684c33a675bSShahaf Shuler 	struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev);
685c33a675bSShahaf Shuler 
686c33a675bSShahaf Shuler 	if (!pdev || !pdev->driver) {
687c33a675bSShahaf Shuler 		rte_errno = EINVAL;
688c33a675bSShahaf Shuler 		return -1;
689c33a675bSShahaf Shuler 	}
690c33a675bSShahaf Shuler 	if (pdev->driver->dma_map)
691c33a675bSShahaf Shuler 		return pdev->driver->dma_map(pdev, addr, iova, len);
692c33a675bSShahaf Shuler 	/**
693c33a675bSShahaf Shuler 	 *  In case driver don't provides any specific mapping
694c33a675bSShahaf Shuler 	 *  try fallback to VFIO.
695c33a675bSShahaf Shuler 	 */
6967c0d798aSDavid Marchand 	if (pdev->kdrv == RTE_PCI_KDRV_VFIO)
697c33a675bSShahaf Shuler 		return rte_vfio_container_dma_map
698c33a675bSShahaf Shuler 				(RTE_VFIO_DEFAULT_CONTAINER_FD, (uintptr_t)addr,
699c33a675bSShahaf Shuler 				 iova, len);
700c33a675bSShahaf Shuler 	rte_errno = ENOTSUP;
701c33a675bSShahaf Shuler 	return -1;
702c33a675bSShahaf Shuler }
703c33a675bSShahaf Shuler 
704c33a675bSShahaf Shuler static int
pci_dma_unmap(struct rte_device * dev,void * addr,uint64_t iova,size_t len)705c33a675bSShahaf Shuler pci_dma_unmap(struct rte_device *dev, void *addr, uint64_t iova, size_t len)
706c33a675bSShahaf Shuler {
707c33a675bSShahaf Shuler 	struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev);
708c33a675bSShahaf Shuler 
709c33a675bSShahaf Shuler 	if (!pdev || !pdev->driver) {
710c33a675bSShahaf Shuler 		rte_errno = EINVAL;
711c33a675bSShahaf Shuler 		return -1;
712c33a675bSShahaf Shuler 	}
713c33a675bSShahaf Shuler 	if (pdev->driver->dma_unmap)
714c33a675bSShahaf Shuler 		return pdev->driver->dma_unmap(pdev, addr, iova, len);
715c33a675bSShahaf Shuler 	/**
716c33a675bSShahaf Shuler 	 *  In case driver don't provides any specific mapping
717c33a675bSShahaf Shuler 	 *  try fallback to VFIO.
718c33a675bSShahaf Shuler 	 */
7197c0d798aSDavid Marchand 	if (pdev->kdrv == RTE_PCI_KDRV_VFIO)
720c33a675bSShahaf Shuler 		return rte_vfio_container_dma_unmap
721c33a675bSShahaf Shuler 				(RTE_VFIO_DEFAULT_CONTAINER_FD, (uintptr_t)addr,
722c33a675bSShahaf Shuler 				 iova, len);
723c33a675bSShahaf Shuler 	rte_errno = ENOTSUP;
724c33a675bSShahaf Shuler 	return -1;
725c33a675bSShahaf Shuler }
726c33a675bSShahaf Shuler 
727463a5245SSunil Kumar Kori bool
rte_pci_ignore_device(const struct rte_pci_addr * pci_addr)728463a5245SSunil Kumar Kori rte_pci_ignore_device(const struct rte_pci_addr *pci_addr)
729703458e1SBen Walker {
730463a5245SSunil Kumar Kori 	struct rte_devargs *devargs = pci_devargs_lookup(pci_addr);
731703458e1SBen Walker 
732703458e1SBen Walker 	switch (rte_pci_bus.bus.conf.scan_mode) {
733a65a34a8SStephen Hemminger 	case RTE_BUS_SCAN_ALLOWLIST:
734a65a34a8SStephen Hemminger 		if (devargs && devargs->policy == RTE_DEV_ALLOWED)
735703458e1SBen Walker 			return false;
736703458e1SBen Walker 		break;
737703458e1SBen Walker 	case RTE_BUS_SCAN_UNDEFINED:
738a65a34a8SStephen Hemminger 	case RTE_BUS_SCAN_BLOCKLIST:
739a65a34a8SStephen Hemminger 		if (devargs == NULL || devargs->policy != RTE_DEV_BLOCKED)
740703458e1SBen Walker 			return false;
741703458e1SBen Walker 		break;
742703458e1SBen Walker 	}
743703458e1SBen Walker 	return true;
744703458e1SBen Walker }
745703458e1SBen Walker 
746703458e1SBen Walker enum rte_iova_mode
rte_pci_get_iommu_class(void)747703458e1SBen Walker rte_pci_get_iommu_class(void)
748703458e1SBen Walker {
749703458e1SBen Walker 	enum rte_iova_mode iova_mode = RTE_IOVA_DC;
750703458e1SBen Walker 	const struct rte_pci_device *dev;
751703458e1SBen Walker 	const struct rte_pci_driver *drv;
752703458e1SBen Walker 	bool devices_want_va = false;
753703458e1SBen Walker 	bool devices_want_pa = false;
75466d3724bSDavid Marchand 	int iommu_no_va = -1;
755703458e1SBen Walker 
756703458e1SBen Walker 	FOREACH_DEVICE_ON_PCIBUS(dev) {
75766d3724bSDavid Marchand 		/*
75866d3724bSDavid Marchand 		 * We can check this only once, because the IOMMU hardware is
75966d3724bSDavid Marchand 		 * the same for all of them.
76066d3724bSDavid Marchand 		 */
76166d3724bSDavid Marchand 		if (iommu_no_va == -1)
76266d3724bSDavid Marchand 			iommu_no_va = pci_device_iommu_support_va(dev)
76366d3724bSDavid Marchand 					? 0 : 1;
764463a5245SSunil Kumar Kori 
7657c0d798aSDavid Marchand 		if (dev->kdrv == RTE_PCI_KDRV_UNKNOWN ||
7667c0d798aSDavid Marchand 		    dev->kdrv == RTE_PCI_KDRV_NONE)
767703458e1SBen Walker 			continue;
768703458e1SBen Walker 		FOREACH_DRIVER_ON_PCIBUS(drv) {
769703458e1SBen Walker 			enum rte_iova_mode dev_iova_mode;
770703458e1SBen Walker 
771703458e1SBen Walker 			if (!rte_pci_match(drv, dev))
772703458e1SBen Walker 				continue;
773703458e1SBen Walker 
774703458e1SBen Walker 			dev_iova_mode = pci_device_iova_mode(drv, dev);
775*849f773bSDavid Marchand 			PCI_LOG(DEBUG, "PCI driver %s for device "PCI_PRI_FMT" wants IOVA as '%s'",
776703458e1SBen Walker 				drv->driver.name,
777703458e1SBen Walker 				dev->addr.domain, dev->addr.bus,
778703458e1SBen Walker 				dev->addr.devid, dev->addr.function,
779703458e1SBen Walker 				dev_iova_mode == RTE_IOVA_DC ? "DC" :
780703458e1SBen Walker 				(dev_iova_mode == RTE_IOVA_PA ? "PA" : "VA"));
781703458e1SBen Walker 			if (dev_iova_mode == RTE_IOVA_PA)
782703458e1SBen Walker 				devices_want_pa = true;
783703458e1SBen Walker 			else if (dev_iova_mode == RTE_IOVA_VA)
784703458e1SBen Walker 				devices_want_va = true;
785703458e1SBen Walker 		}
786703458e1SBen Walker 	}
78766d3724bSDavid Marchand 	if (iommu_no_va == 1) {
78866d3724bSDavid Marchand 		iova_mode = RTE_IOVA_PA;
78966d3724bSDavid Marchand 		if (devices_want_va) {
790*849f773bSDavid Marchand 			PCI_LOG(WARNING, "Some devices want 'VA' but IOMMU does not support 'VA'.");
791*849f773bSDavid Marchand 			PCI_LOG(WARNING, "The devices that want 'VA' won't initialize.");
79266d3724bSDavid Marchand 		}
79366d3724bSDavid Marchand 	} else if (devices_want_va && !devices_want_pa) {
794703458e1SBen Walker 		iova_mode = RTE_IOVA_VA;
795b76fafb1SDavid Marchand 	} else if (devices_want_pa && !devices_want_va) {
796b76fafb1SDavid Marchand 		iova_mode = RTE_IOVA_PA;
797b76fafb1SDavid Marchand 	} else {
798b76fafb1SDavid Marchand 		iova_mode = RTE_IOVA_DC;
799b76fafb1SDavid Marchand 		if (devices_want_va) {
800*849f773bSDavid Marchand 			PCI_LOG(WARNING, "Some devices want 'VA' but forcing 'DC' because other devices want 'PA'.");
801*849f773bSDavid Marchand 			PCI_LOG(WARNING, "Depending on the final decision by the EAL, not all devices may be able to initialize.");
802b76fafb1SDavid Marchand 		}
803703458e1SBen Walker 	}
804703458e1SBen Walker 	return iova_mode;
805703458e1SBen Walker }
806703458e1SBen Walker 
807a10b6e53SDavid Marchand bool
rte_pci_has_capability_list(const struct rte_pci_device * dev)808a10b6e53SDavid Marchand rte_pci_has_capability_list(const struct rte_pci_device *dev)
809a10b6e53SDavid Marchand {
810a10b6e53SDavid Marchand 	uint16_t status;
811a10b6e53SDavid Marchand 
812a10b6e53SDavid Marchand 	if (rte_pci_read_config(dev, &status, sizeof(status), RTE_PCI_STATUS) != sizeof(status))
813a10b6e53SDavid Marchand 		return false;
814a10b6e53SDavid Marchand 
815a10b6e53SDavid Marchand 	return (status & RTE_PCI_STATUS_CAP_LIST) != 0;
816a10b6e53SDavid Marchand }
817a10b6e53SDavid Marchand 
818a10b6e53SDavid Marchand off_t
rte_pci_find_capability(const struct rte_pci_device * dev,uint8_t cap)819a10b6e53SDavid Marchand rte_pci_find_capability(const struct rte_pci_device *dev, uint8_t cap)
820a10b6e53SDavid Marchand {
821a10b6e53SDavid Marchand 	return rte_pci_find_next_capability(dev, cap, 0);
822a10b6e53SDavid Marchand }
823a10b6e53SDavid Marchand 
824a10b6e53SDavid Marchand off_t
rte_pci_find_next_capability(const struct rte_pci_device * dev,uint8_t cap,off_t offset)825a10b6e53SDavid Marchand rte_pci_find_next_capability(const struct rte_pci_device *dev, uint8_t cap,
826a10b6e53SDavid Marchand 	off_t offset)
827a10b6e53SDavid Marchand {
828a10b6e53SDavid Marchand 	uint8_t pos;
829a10b6e53SDavid Marchand 	int ttl;
830a10b6e53SDavid Marchand 
831a10b6e53SDavid Marchand 	if (offset == 0)
832a10b6e53SDavid Marchand 		offset = RTE_PCI_CAPABILITY_LIST;
833a10b6e53SDavid Marchand 	else
834a10b6e53SDavid Marchand 		offset += RTE_PCI_CAP_NEXT;
835a10b6e53SDavid Marchand 	ttl = (RTE_PCI_CFG_SPACE_SIZE - RTE_PCI_STD_HEADER_SIZEOF) / RTE_PCI_CAP_SIZEOF;
836a10b6e53SDavid Marchand 
837a10b6e53SDavid Marchand 	if (rte_pci_read_config(dev, &pos, sizeof(pos), offset) < 0)
838a10b6e53SDavid Marchand 		return -1;
839a10b6e53SDavid Marchand 
840a10b6e53SDavid Marchand 	while (pos && ttl--) {
841a10b6e53SDavid Marchand 		uint16_t ent;
842a10b6e53SDavid Marchand 		uint8_t id;
843a10b6e53SDavid Marchand 
844a10b6e53SDavid Marchand 		offset = pos;
845a10b6e53SDavid Marchand 		if (rte_pci_read_config(dev, &ent, sizeof(ent), offset) < 0)
846a10b6e53SDavid Marchand 			return -1;
847a10b6e53SDavid Marchand 
848a10b6e53SDavid Marchand 		id = ent & 0xff;
849a10b6e53SDavid Marchand 		if (id == 0xff)
850a10b6e53SDavid Marchand 			break;
851a10b6e53SDavid Marchand 
852a10b6e53SDavid Marchand 		if (id == cap)
853a10b6e53SDavid Marchand 			return offset;
854a10b6e53SDavid Marchand 
855a10b6e53SDavid Marchand 		pos = (ent >> 8);
856a10b6e53SDavid Marchand 	}
857a10b6e53SDavid Marchand 
858a10b6e53SDavid Marchand 	return 0;
859a10b6e53SDavid Marchand }
860a10b6e53SDavid Marchand 
861e00d2b4cSManish Chopra off_t
rte_pci_find_ext_capability(const struct rte_pci_device * dev,uint32_t cap)862bd75b496SDavid Marchand rte_pci_find_ext_capability(const struct rte_pci_device *dev, uint32_t cap)
863e00d2b4cSManish Chopra {
864e00d2b4cSManish Chopra 	off_t offset = RTE_PCI_CFG_SPACE_SIZE;
865e00d2b4cSManish Chopra 	uint32_t header;
866e00d2b4cSManish Chopra 	int ttl;
867e00d2b4cSManish Chopra 
868e00d2b4cSManish Chopra 	/* minimum 8 bytes per capability */
869e00d2b4cSManish Chopra 	ttl = (RTE_PCI_CFG_SPACE_EXP_SIZE - RTE_PCI_CFG_SPACE_SIZE) / 8;
870e00d2b4cSManish Chopra 
871e00d2b4cSManish Chopra 	if (rte_pci_read_config(dev, &header, 4, offset) < 0) {
872*849f773bSDavid Marchand 		PCI_LOG(ERR, "error in reading extended capabilities");
873e00d2b4cSManish Chopra 		return -1;
874e00d2b4cSManish Chopra 	}
875e00d2b4cSManish Chopra 
876e00d2b4cSManish Chopra 	/*
877e00d2b4cSManish Chopra 	 * If we have no capabilities, this is indicated by cap ID,
878e00d2b4cSManish Chopra 	 * cap version and next pointer all being 0.
879e00d2b4cSManish Chopra 	 */
880e00d2b4cSManish Chopra 	if (header == 0)
881e00d2b4cSManish Chopra 		return 0;
882e00d2b4cSManish Chopra 
883e00d2b4cSManish Chopra 	while (ttl != 0) {
884e00d2b4cSManish Chopra 		if (RTE_PCI_EXT_CAP_ID(header) == cap)
885e00d2b4cSManish Chopra 			return offset;
886e00d2b4cSManish Chopra 
887e00d2b4cSManish Chopra 		offset = RTE_PCI_EXT_CAP_NEXT(header);
888e00d2b4cSManish Chopra 
889e00d2b4cSManish Chopra 		if (offset < RTE_PCI_CFG_SPACE_SIZE)
890e00d2b4cSManish Chopra 			break;
891e00d2b4cSManish Chopra 
892e00d2b4cSManish Chopra 		if (rte_pci_read_config(dev, &header, 4, offset) < 0) {
893*849f773bSDavid Marchand 			PCI_LOG(ERR, "error in reading extended capabilities");
894e00d2b4cSManish Chopra 			return -1;
895e00d2b4cSManish Chopra 		}
896e00d2b4cSManish Chopra 
897e00d2b4cSManish Chopra 		ttl--;
898e00d2b4cSManish Chopra 	}
899e00d2b4cSManish Chopra 
900e00d2b4cSManish Chopra 	return 0;
901e00d2b4cSManish Chopra }
902e00d2b4cSManish Chopra 
90321f6adecSHaiyue Wang int
rte_pci_set_bus_master(const struct rte_pci_device * dev,bool enable)904bd75b496SDavid Marchand rte_pci_set_bus_master(const struct rte_pci_device *dev, bool enable)
90521f6adecSHaiyue Wang {
90621f6adecSHaiyue Wang 	uint16_t old_cmd, cmd;
90721f6adecSHaiyue Wang 
90821f6adecSHaiyue Wang 	if (rte_pci_read_config(dev, &old_cmd, sizeof(old_cmd),
90921f6adecSHaiyue Wang 				RTE_PCI_COMMAND) < 0) {
910*849f773bSDavid Marchand 		PCI_LOG(ERR, "error in reading PCI command register");
91121f6adecSHaiyue Wang 		return -1;
91221f6adecSHaiyue Wang 	}
91321f6adecSHaiyue Wang 
91421f6adecSHaiyue Wang 	if (enable)
91521f6adecSHaiyue Wang 		cmd = old_cmd | RTE_PCI_COMMAND_MASTER;
91621f6adecSHaiyue Wang 	else
91721f6adecSHaiyue Wang 		cmd = old_cmd & ~RTE_PCI_COMMAND_MASTER;
91821f6adecSHaiyue Wang 
91921f6adecSHaiyue Wang 	if (cmd == old_cmd)
92021f6adecSHaiyue Wang 		return 0;
92121f6adecSHaiyue Wang 
92221f6adecSHaiyue Wang 	if (rte_pci_write_config(dev, &cmd, sizeof(cmd),
92321f6adecSHaiyue Wang 				 RTE_PCI_COMMAND) < 0) {
924*849f773bSDavid Marchand 		PCI_LOG(ERR, "error in writing PCI command register");
92521f6adecSHaiyue Wang 		return -1;
92621f6adecSHaiyue Wang 	}
92721f6adecSHaiyue Wang 
92821f6adecSHaiyue Wang 	return 0;
92921f6adecSHaiyue Wang }
93021f6adecSHaiyue Wang 
93160ea1960SAbdullah Sevincer int
rte_pci_pasid_set_state(const struct rte_pci_device * dev,off_t offset,bool enable)93260ea1960SAbdullah Sevincer rte_pci_pasid_set_state(const struct rte_pci_device *dev,
93360ea1960SAbdullah Sevincer 		off_t offset, bool enable)
93460ea1960SAbdullah Sevincer {
93560ea1960SAbdullah Sevincer 	uint16_t pasid = enable;
936baaa446dSAbdullah Sevincer 	return rte_pci_write_config(dev, &pasid, sizeof(pasid),
937baaa446dSAbdullah Sevincer 			offset + RTE_PCI_PASID_CTRL) != sizeof(pasid) ? -1 : 0;
93860ea1960SAbdullah Sevincer }
93960ea1960SAbdullah Sevincer 
940c752998bSGaetan Rivet struct rte_pci_bus rte_pci_bus = {
941c752998bSGaetan Rivet 	.bus = {
942c752998bSGaetan Rivet 		.scan = rte_pci_scan,
94387db93e0SDavid Marchand 		.probe = pci_probe,
9441cab1a40SKevin Laatz 		.cleanup = pci_cleanup,
945c752998bSGaetan Rivet 		.find_device = pci_find_device,
946c752998bSGaetan Rivet 		.plug = pci_plug,
947c752998bSGaetan Rivet 		.unplug = pci_unplug,
948c752998bSGaetan Rivet 		.parse = pci_parse,
949d2a66ad7SXueming Li 		.devargs_parse = rte_pci_devargs_parse,
950c33a675bSShahaf Shuler 		.dma_map = pci_dma_map,
951c33a675bSShahaf Shuler 		.dma_unmap = pci_dma_unmap,
952c752998bSGaetan Rivet 		.get_iommu_class = rte_pci_get_iommu_class,
95346521ca2SGaetan Rivet 		.dev_iterate = rte_pci_dev_iterate,
954b01dc3daSJeff Guo 		.hot_unplug_handler = pci_hot_unplug_handler,
9555c96a299SJeff Guo 		.sigbus_handler = pci_sigbus_handler,
956c752998bSGaetan Rivet 	},
957c752998bSGaetan Rivet 	.device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
958c752998bSGaetan Rivet 	.driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
959c752998bSGaetan Rivet };
960c752998bSGaetan Rivet 
961c752998bSGaetan Rivet RTE_REGISTER_BUS(pci, rte_pci_bus.bus);
962*849f773bSDavid Marchand RTE_LOG_REGISTER_DEFAULT(pci_bus_logtype, NOTICE);
963