xref: /spdk/lib/env_dpdk/pci.c (revision ceea3088870a3919d6bdfe61d7adba11b9733fb7)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "env_internal.h"
35 
36 #include <rte_alarm.h>
37 #include <rte_devargs.h>
38 #include "spdk/env.h"
39 #include "spdk/log.h"
40 
41 #define SYSFS_PCI_DRIVERS	"/sys/bus/pci/drivers"
42 
43 #define PCI_CFG_SIZE		256
44 #define PCI_EXT_CAP_ID_SN	0x03
45 
46 /* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time
47  * might cause the internal IPC to misbehave. Just retry in such case.
48  */
49 #define DPDK_HOTPLUG_RETRY_COUNT 4
50 
51 /* DPDK alarm/interrupt thread */
52 static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER;
53 static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices);
54 /* devices hotplugged on a dpdk thread */
55 static TAILQ_HEAD(, spdk_pci_device) g_pci_hotplugged_devices =
56 	TAILQ_HEAD_INITIALIZER(g_pci_hotplugged_devices);
57 static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers);
58 
59 static int
60 map_bar_rte(struct spdk_pci_device *device, uint32_t bar,
61 	    void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
62 {
63 	struct rte_pci_device *dev = device->dev_handle;
64 
65 	*mapped_addr = dev->mem_resource[bar].addr;
66 	*phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr;
67 	*size = (uint64_t)dev->mem_resource[bar].len;
68 
69 	return 0;
70 }
71 
72 static int
73 unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr)
74 {
75 	return 0;
76 }
77 
78 static int
79 cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
80 {
81 	int rc;
82 
83 	rc = rte_pci_read_config(dev->dev_handle, value, len, offset);
84 
85 	return (rc > 0 && (uint32_t) rc == len) ? 0 : -1;
86 }
87 
88 static int
89 cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
90 {
91 	int rc;
92 
93 	rc = rte_pci_write_config(dev->dev_handle, value, len, offset);
94 
95 #ifdef __FreeBSD__
96 	/* DPDK returns 0 on success and -1 on failure */
97 	return rc;
98 #endif
99 	return (rc > 0 && (uint32_t) rc == len) ? 0 : -1;
100 }
101 
102 static void
103 remove_rte_dev(struct rte_pci_device *rte_dev)
104 {
105 	char bdf[32];
106 	int i = 0, rc;
107 
108 	snprintf(bdf, sizeof(bdf), "%s", rte_dev->device.name);
109 	do {
110 		rc = rte_eal_hotplug_remove("pci", bdf);
111 	} while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT);
112 }
113 
114 static void
115 detach_rte_cb(void *_dev)
116 {
117 	remove_rte_dev(_dev);
118 }
119 
120 static void
121 detach_rte(struct spdk_pci_device *dev)
122 {
123 	struct rte_pci_device *rte_dev = dev->dev_handle;
124 	int i;
125 	bool removed;
126 
127 	if (!spdk_process_is_primary()) {
128 		remove_rte_dev(rte_dev);
129 		return;
130 	}
131 
132 	pthread_mutex_lock(&g_pci_mutex);
133 	dev->internal.attached = false;
134 	/* prevent the hotremove notification from removing this device */
135 	dev->internal.pending_removal = true;
136 	pthread_mutex_unlock(&g_pci_mutex);
137 
138 	rte_eal_alarm_set(1, detach_rte_cb, rte_dev);
139 
140 	/* wait up to 2s for the cb to execute */
141 	for (i = 2000; i > 0; i--) {
142 
143 		spdk_delay_us(1000);
144 		pthread_mutex_lock(&g_pci_mutex);
145 		removed = dev->internal.removed;
146 		pthread_mutex_unlock(&g_pci_mutex);
147 
148 		if (removed) {
149 			break;
150 		}
151 	}
152 
153 	/* besides checking the removed flag, we also need to wait
154 	 * for the dpdk detach function to unwind, as it's doing some
155 	 * operations even after calling our detach callback. Simply
156 	 * cancel the alarm - if it started executing already, this
157 	 * call will block and wait for it to finish.
158 	 */
159 	rte_eal_alarm_cancel(detach_rte_cb, rte_dev);
160 
161 	/* the device could have been finally removed, so just check
162 	 * it again.
163 	 */
164 	pthread_mutex_lock(&g_pci_mutex);
165 	removed = dev->internal.removed;
166 	pthread_mutex_unlock(&g_pci_mutex);
167 	if (!removed) {
168 		SPDK_ERRLOG("Timeout waiting for DPDK to remove PCI device %s.\n",
169 			    rte_dev->name);
170 		/* If we reach this state, then the device couldn't be removed and most likely
171 		   a subsequent hot add of a device in the same BDF will fail */
172 	}
173 }
174 
175 void
176 spdk_pci_driver_register(const char *name, struct spdk_pci_id *id_table, uint32_t flags)
177 {
178 	struct spdk_pci_driver *driver;
179 
180 	driver = calloc(1, sizeof(*driver));
181 	if (!driver) {
182 		/* we can't do any better than bailing atm */
183 		return;
184 	}
185 
186 	driver->name = name;
187 	driver->id_table = id_table;
188 	driver->drv_flags = flags;
189 	TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq);
190 }
191 
192 struct spdk_pci_driver *
193 spdk_pci_nvme_get_driver(void)
194 {
195 	return spdk_pci_get_driver("nvme");
196 }
197 
198 struct spdk_pci_driver *
199 spdk_pci_get_driver(const char *name)
200 {
201 	struct spdk_pci_driver *driver;
202 
203 	TAILQ_FOREACH(driver, &g_pci_drivers, tailq) {
204 		if (strcmp(driver->name, name) == 0) {
205 			return driver;
206 		}
207 	}
208 
209 	return NULL;
210 }
211 
212 static void
213 pci_device_rte_dev_event(const char *device_name,
214 			 enum rte_dev_event_type event,
215 			 void *cb_arg)
216 {
217 	struct spdk_pci_device *dev;
218 	bool can_detach = false;
219 
220 	switch (event) {
221 	default:
222 	case RTE_DEV_EVENT_ADD:
223 		/* Nothing to do here yet. */
224 		break;
225 	case RTE_DEV_EVENT_REMOVE:
226 		pthread_mutex_lock(&g_pci_mutex);
227 		TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
228 			struct rte_pci_device *rte_dev = dev->dev_handle;
229 
230 			if (strcmp(rte_dev->name, device_name) == 0 &&
231 			    !dev->internal.pending_removal) {
232 				can_detach = !dev->internal.attached;
233 				/* prevent any further attaches */
234 				dev->internal.pending_removal = true;
235 				break;
236 			}
237 		}
238 		pthread_mutex_unlock(&g_pci_mutex);
239 
240 		if (dev != NULL && can_detach) {
241 			/* if device is not attached we can remove it right away.
242 			* Otherwise it will be removed at detach. */
243 			remove_rte_dev(dev->dev_handle);
244 		}
245 		break;
246 	}
247 }
248 
249 static void
250 cleanup_pci_devices(void)
251 {
252 	struct spdk_pci_device *dev, *tmp;
253 
254 	pthread_mutex_lock(&g_pci_mutex);
255 	/* cleanup removed devices */
256 	TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) {
257 		if (!dev->internal.removed) {
258 			continue;
259 		}
260 
261 		vtophys_pci_device_removed(dev->dev_handle);
262 		TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq);
263 		free(dev);
264 	}
265 
266 	/* add newly-attached devices */
267 	TAILQ_FOREACH_SAFE(dev, &g_pci_hotplugged_devices, internal.tailq, tmp) {
268 		TAILQ_REMOVE(&g_pci_hotplugged_devices, dev, internal.tailq);
269 		TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq);
270 		vtophys_pci_device_added(dev->dev_handle);
271 	}
272 	pthread_mutex_unlock(&g_pci_mutex);
273 }
274 
275 static int scan_pci_bus(bool delay_init);
276 
277 /* translate spdk_pci_driver to an rte_pci_driver and register it to dpdk */
278 static int
279 register_rte_driver(struct spdk_pci_driver *driver)
280 {
281 	unsigned pci_id_count = 0;
282 	struct rte_pci_id *rte_id_table;
283 	char *rte_name;
284 	size_t rte_name_len;
285 	uint32_t rte_flags;
286 
287 	assert(driver->id_table);
288 	while (driver->id_table[pci_id_count].vendor_id) {
289 		pci_id_count++;
290 	}
291 	assert(pci_id_count > 0);
292 
293 	rte_id_table = calloc(pci_id_count + 1, sizeof(*rte_id_table));
294 	if (!rte_id_table) {
295 		return -ENOMEM;
296 	}
297 
298 	while (pci_id_count > 0) {
299 		struct rte_pci_id *rte_id = &rte_id_table[pci_id_count - 1];
300 		const struct spdk_pci_id *spdk_id = &driver->id_table[pci_id_count - 1];
301 
302 		rte_id->class_id = spdk_id->class_id;
303 		rte_id->vendor_id = spdk_id->vendor_id;
304 		rte_id->device_id = spdk_id->device_id;
305 		rte_id->subsystem_vendor_id = spdk_id->subvendor_id;
306 		rte_id->subsystem_device_id = spdk_id->subdevice_id;
307 		pci_id_count--;
308 	}
309 
310 	assert(driver->name);
311 	rte_name_len = strlen(driver->name) + strlen("spdk_") + 1;
312 	rte_name = calloc(rte_name_len, 1);
313 	if (!rte_name) {
314 		free(rte_id_table);
315 		return -ENOMEM;
316 	}
317 
318 	snprintf(rte_name, rte_name_len, "spdk_%s", driver->name);
319 	driver->driver.driver.name = rte_name;
320 	driver->driver.id_table = rte_id_table;
321 
322 	rte_flags = 0;
323 	if (driver->drv_flags & SPDK_PCI_DRIVER_NEED_MAPPING) {
324 		rte_flags |= RTE_PCI_DRV_NEED_MAPPING;
325 	}
326 	if (driver->drv_flags & SPDK_PCI_DRIVER_WC_ACTIVATE) {
327 		rte_flags |= RTE_PCI_DRV_WC_ACTIVATE;
328 	}
329 	driver->driver.drv_flags = rte_flags;
330 
331 	driver->driver.probe = pci_device_init;
332 	driver->driver.remove = pci_device_fini;
333 
334 	rte_pci_register(&driver->driver);
335 	return 0;
336 }
337 
338 static inline void
339 _pci_env_init(void)
340 {
341 	/* We assume devices were present on the bus for more than 2 seconds
342 	 * before initializing SPDK and there's no need to wait more. We scan
343 	 * the bus, but we don't blacklist any devices.
344 	 */
345 	scan_pci_bus(false);
346 
347 	/* Register a single hotremove callback for all devices. */
348 	if (spdk_process_is_primary()) {
349 		rte_dev_event_callback_register(NULL, pci_device_rte_dev_event, NULL);
350 	}
351 }
352 
353 void
354 pci_env_init(void)
355 {
356 	struct spdk_pci_driver *driver;
357 
358 	TAILQ_FOREACH(driver, &g_pci_drivers, tailq) {
359 		register_rte_driver(driver);
360 	}
361 
362 	_pci_env_init();
363 }
364 
365 void
366 pci_env_reinit(void)
367 {
368 	/* There is no need to register pci drivers again, since they were
369 	 * already pre-registered in pci_env_init.
370 	 */
371 
372 	_pci_env_init();
373 }
374 
375 void
376 pci_env_fini(void)
377 {
378 	struct spdk_pci_device *dev;
379 	char bdf[32];
380 
381 	cleanup_pci_devices();
382 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
383 		if (dev->internal.attached) {
384 			spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->addr);
385 			SPDK_ERRLOG("Device %s is still attached at shutdown!\n", bdf);
386 		}
387 	}
388 
389 	if (spdk_process_is_primary()) {
390 		rte_dev_event_callback_unregister(NULL, pci_device_rte_dev_event, NULL);
391 	}
392 }
393 
394 int
395 pci_device_init(struct rte_pci_driver *_drv,
396 		struct rte_pci_device *_dev)
397 {
398 	struct spdk_pci_driver *driver = (struct spdk_pci_driver *)_drv;
399 	struct spdk_pci_device *dev;
400 	int rc;
401 
402 	dev = calloc(1, sizeof(*dev));
403 	if (dev == NULL) {
404 		return -1;
405 	}
406 
407 	dev->dev_handle = _dev;
408 
409 	dev->addr.domain = _dev->addr.domain;
410 	dev->addr.bus = _dev->addr.bus;
411 	dev->addr.dev = _dev->addr.devid;
412 	dev->addr.func = _dev->addr.function;
413 	dev->id.class_id = _dev->id.class_id;
414 	dev->id.vendor_id = _dev->id.vendor_id;
415 	dev->id.device_id = _dev->id.device_id;
416 	dev->id.subvendor_id = _dev->id.subsystem_vendor_id;
417 	dev->id.subdevice_id = _dev->id.subsystem_device_id;
418 	dev->socket_id = _dev->device.numa_node;
419 	dev->type = "pci";
420 
421 	dev->map_bar = map_bar_rte;
422 	dev->unmap_bar = unmap_bar_rte;
423 	dev->cfg_read = cfg_read_rte;
424 	dev->cfg_write = cfg_write_rte;
425 
426 	dev->internal.driver = driver;
427 	dev->internal.claim_fd = -1;
428 
429 	if (driver->cb_fn != NULL) {
430 		rc = driver->cb_fn(driver->cb_arg, dev);
431 		if (rc != 0) {
432 			free(dev);
433 			return rc;
434 		}
435 		dev->internal.attached = true;
436 	}
437 
438 	pthread_mutex_lock(&g_pci_mutex);
439 	TAILQ_INSERT_TAIL(&g_pci_hotplugged_devices, dev, internal.tailq);
440 	pthread_mutex_unlock(&g_pci_mutex);
441 	return 0;
442 }
443 
444 int
445 pci_device_fini(struct rte_pci_device *_dev)
446 {
447 	struct spdk_pci_device *dev;
448 
449 	pthread_mutex_lock(&g_pci_mutex);
450 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
451 		if (dev->dev_handle == _dev) {
452 			break;
453 		}
454 	}
455 
456 	if (dev == NULL || dev->internal.attached) {
457 		/* The device might be still referenced somewhere in SPDK. */
458 		pthread_mutex_unlock(&g_pci_mutex);
459 		return -1;
460 	}
461 
462 	/* remove our whitelist_at option */
463 	if (_dev->device.devargs) {
464 		_dev->device.devargs->data = NULL;
465 	}
466 
467 	assert(!dev->internal.removed);
468 	dev->internal.removed = true;
469 	pthread_mutex_unlock(&g_pci_mutex);
470 	return 0;
471 
472 }
473 
474 void
475 spdk_pci_device_detach(struct spdk_pci_device *dev)
476 {
477 	assert(dev->internal.attached);
478 
479 	if (dev->internal.claim_fd >= 0) {
480 		spdk_pci_device_unclaim(dev);
481 	}
482 
483 	if (strcmp(dev->type, "pci") == 0) {
484 		/* if it's a physical device we need to deal with DPDK on
485 		 * a different process and we can't just unset one flag
486 		 * here. We also want to stop using any device resources
487 		 * so that the device isn't "in use" by the userspace driver
488 		 * once we detach it. This would allow attaching the device
489 		 * to a different process, or to a kernel driver like nvme.
490 		 */
491 		detach_rte(dev);
492 	} else {
493 		dev->internal.attached = false;
494 	}
495 
496 	cleanup_pci_devices();
497 }
498 
499 static int
500 scan_pci_bus(bool delay_init)
501 {
502 	struct spdk_pci_driver *driver;
503 	struct rte_pci_device *rte_dev;
504 	uint64_t now;
505 
506 	rte_bus_scan();
507 	now = spdk_get_ticks();
508 
509 	driver = TAILQ_FIRST(&g_pci_drivers);
510 	if (!driver) {
511 		return 0;
512 	}
513 
514 	TAILQ_FOREACH(rte_dev, &driver->driver.bus->device_list, next) {
515 		struct rte_devargs *da;
516 
517 		da = rte_dev->device.devargs;
518 		if (!da) {
519 			char devargs_str[128];
520 
521 			/* the device was never blacklisted or whitelisted */
522 			da = calloc(1, sizeof(*da));
523 			if (!da) {
524 				return -1;
525 			}
526 
527 			snprintf(devargs_str, sizeof(devargs_str), "pci:%s", rte_dev->device.name);
528 			if (rte_devargs_parse(da, devargs_str) != 0) {
529 				free(da);
530 				return -1;
531 			}
532 
533 			rte_devargs_insert(&da);
534 			rte_dev->device.devargs = da;
535 		}
536 
537 		if (da->data) {
538 			uint64_t whitelist_at = (uint64_t)(uintptr_t)da->data;
539 
540 			/* this device was seen by spdk before... */
541 			if (da->policy == RTE_DEV_BLACKLISTED && whitelist_at <= now) {
542 				da->policy = RTE_DEV_WHITELISTED;
543 			}
544 		} else if ((driver->driver.bus->bus.conf.scan_mode == RTE_BUS_SCAN_WHITELIST &&
545 			    da->policy == RTE_DEV_WHITELISTED) || da->policy != RTE_DEV_BLACKLISTED) {
546 			/* override the policy only if not permanently blacklisted */
547 
548 			if (delay_init) {
549 				da->policy = RTE_DEV_BLACKLISTED;
550 				da->data = (void *)(now + 2 * spdk_get_ticks_hz());
551 			} else {
552 				da->policy = RTE_DEV_WHITELISTED;
553 				da->data = (void *)(uintptr_t)now;
554 			}
555 		}
556 	}
557 
558 	return 0;
559 }
560 
561 int
562 spdk_pci_device_attach(struct spdk_pci_driver *driver,
563 		       spdk_pci_enum_cb enum_cb,
564 		       void *enum_ctx, struct spdk_pci_addr *pci_address)
565 {
566 	struct spdk_pci_device *dev;
567 	struct rte_pci_device *rte_dev;
568 	struct rte_devargs *da;
569 	int rc;
570 	char bdf[32];
571 
572 	spdk_pci_addr_fmt(bdf, sizeof(bdf), pci_address);
573 
574 	cleanup_pci_devices();
575 
576 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
577 		if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) {
578 			break;
579 		}
580 	}
581 
582 	if (dev != NULL && dev->internal.driver == driver) {
583 		pthread_mutex_lock(&g_pci_mutex);
584 		if (dev->internal.attached || dev->internal.pending_removal) {
585 			pthread_mutex_unlock(&g_pci_mutex);
586 			return -1;
587 		}
588 
589 		rc = enum_cb(enum_ctx, dev);
590 		if (rc == 0) {
591 			dev->internal.attached = true;
592 		}
593 		pthread_mutex_unlock(&g_pci_mutex);
594 		return rc;
595 	}
596 
597 	driver->cb_fn = enum_cb;
598 	driver->cb_arg = enum_ctx;
599 
600 	int i = 0;
601 
602 	do {
603 		rc = rte_eal_hotplug_add("pci", bdf, "");
604 	} while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT);
605 
606 	if (i > 1 && rc == -EEXIST) {
607 		/* Even though the previous request timed out, the device
608 		 * was attached successfully.
609 		 */
610 		rc = 0;
611 	}
612 
613 	driver->cb_arg = NULL;
614 	driver->cb_fn = NULL;
615 
616 	cleanup_pci_devices();
617 
618 	if (rc != 0) {
619 		return -1;
620 	}
621 
622 	/* explicit attach ignores the whitelist, so if we blacklisted this
623 	 * device before let's enable it now - just for clarity.
624 	 */
625 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
626 		if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) {
627 			break;
628 		}
629 	}
630 	assert(dev != NULL);
631 
632 	rte_dev = dev->dev_handle;
633 	da = rte_dev->device.devargs;
634 	if (da && da->data) {
635 		da->data = (void *)(uintptr_t)spdk_get_ticks();
636 		da->policy = RTE_DEV_WHITELISTED;
637 	}
638 
639 	return 0;
640 }
641 
642 /* Note: You can call spdk_pci_enumerate from more than one thread
643  *       simultaneously safely, but you cannot call spdk_pci_enumerate
644  *       and rte_eal_pci_probe simultaneously.
645  */
646 int
647 spdk_pci_enumerate(struct spdk_pci_driver *driver,
648 		   spdk_pci_enum_cb enum_cb,
649 		   void *enum_ctx)
650 {
651 	struct spdk_pci_device *dev;
652 	int rc;
653 
654 	cleanup_pci_devices();
655 
656 	pthread_mutex_lock(&g_pci_mutex);
657 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
658 		if (dev->internal.attached ||
659 		    dev->internal.driver != driver ||
660 		    dev->internal.pending_removal) {
661 			continue;
662 		}
663 
664 		rc = enum_cb(enum_ctx, dev);
665 		if (rc == 0) {
666 			dev->internal.attached = true;
667 		} else if (rc < 0) {
668 			pthread_mutex_unlock(&g_pci_mutex);
669 			return -1;
670 		}
671 	}
672 	pthread_mutex_unlock(&g_pci_mutex);
673 
674 	if (scan_pci_bus(true) != 0) {
675 		return -1;
676 	}
677 
678 	driver->cb_fn = enum_cb;
679 	driver->cb_arg = enum_ctx;
680 
681 	if (rte_bus_probe() != 0) {
682 		driver->cb_arg = NULL;
683 		driver->cb_fn = NULL;
684 		return -1;
685 	}
686 
687 	driver->cb_arg = NULL;
688 	driver->cb_fn = NULL;
689 
690 	cleanup_pci_devices();
691 	return 0;
692 }
693 
694 struct spdk_pci_device *
695 spdk_pci_get_first_device(void)
696 {
697 	return TAILQ_FIRST(&g_pci_devices);
698 }
699 
700 struct spdk_pci_device *
701 spdk_pci_get_next_device(struct spdk_pci_device *prev)
702 {
703 	return TAILQ_NEXT(prev, internal.tailq);
704 }
705 
706 int
707 spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar,
708 			void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
709 {
710 	return dev->map_bar(dev, bar, mapped_addr, phys_addr, size);
711 }
712 
713 int
714 spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr)
715 {
716 	return dev->unmap_bar(dev, bar, addr);
717 }
718 
719 uint32_t
720 spdk_pci_device_get_domain(struct spdk_pci_device *dev)
721 {
722 	return dev->addr.domain;
723 }
724 
725 uint8_t
726 spdk_pci_device_get_bus(struct spdk_pci_device *dev)
727 {
728 	return dev->addr.bus;
729 }
730 
731 uint8_t
732 spdk_pci_device_get_dev(struct spdk_pci_device *dev)
733 {
734 	return dev->addr.dev;
735 }
736 
737 uint8_t
738 spdk_pci_device_get_func(struct spdk_pci_device *dev)
739 {
740 	return dev->addr.func;
741 }
742 
743 uint16_t
744 spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev)
745 {
746 	return dev->id.vendor_id;
747 }
748 
749 uint16_t
750 spdk_pci_device_get_device_id(struct spdk_pci_device *dev)
751 {
752 	return dev->id.device_id;
753 }
754 
755 uint16_t
756 spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev)
757 {
758 	return dev->id.subvendor_id;
759 }
760 
761 uint16_t
762 spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev)
763 {
764 	return dev->id.subdevice_id;
765 }
766 
767 struct spdk_pci_id
768 spdk_pci_device_get_id(struct spdk_pci_device *dev)
769 {
770 	return dev->id;
771 }
772 
773 int
774 spdk_pci_device_get_socket_id(struct spdk_pci_device *dev)
775 {
776 	return dev->socket_id;
777 }
778 
779 int
780 spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
781 {
782 	return dev->cfg_read(dev, value, len, offset);
783 }
784 
785 int
786 spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
787 {
788 	return dev->cfg_write(dev, value, len, offset);
789 }
790 
791 int
792 spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset)
793 {
794 	return spdk_pci_device_cfg_read(dev, value, 1, offset);
795 }
796 
797 int
798 spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset)
799 {
800 	return spdk_pci_device_cfg_write(dev, &value, 1, offset);
801 }
802 
803 int
804 spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset)
805 {
806 	return spdk_pci_device_cfg_read(dev, value, 2, offset);
807 }
808 
809 int
810 spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset)
811 {
812 	return spdk_pci_device_cfg_write(dev, &value, 2, offset);
813 }
814 
815 int
816 spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset)
817 {
818 	return spdk_pci_device_cfg_read(dev, value, 4, offset);
819 }
820 
821 int
822 spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset)
823 {
824 	return spdk_pci_device_cfg_write(dev, &value, 4, offset);
825 }
826 
827 int
828 spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len)
829 {
830 	int err;
831 	uint32_t pos, header = 0;
832 	uint32_t i, buf[2];
833 
834 	if (len < 17) {
835 		return -1;
836 	}
837 
838 	err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE);
839 	if (err || !header) {
840 		return -1;
841 	}
842 
843 	pos = PCI_CFG_SIZE;
844 	while (1) {
845 		if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) {
846 			if (pos) {
847 				/* skip the header */
848 				pos += 4;
849 				for (i = 0; i < 2; i++) {
850 					err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i);
851 					if (err) {
852 						return -1;
853 					}
854 				}
855 				snprintf(sn, len, "%08x%08x", buf[1], buf[0]);
856 				return 0;
857 			}
858 		}
859 		pos = (header >> 20) & 0xffc;
860 		/* 0 if no other items exist */
861 		if (pos < PCI_CFG_SIZE) {
862 			return -1;
863 		}
864 		err = spdk_pci_device_cfg_read32(dev, &header, pos);
865 		if (err) {
866 			return -1;
867 		}
868 	}
869 	return -1;
870 }
871 
872 struct spdk_pci_addr
873 spdk_pci_device_get_addr(struct spdk_pci_device *dev)
874 {
875 	return dev->addr;
876 }
877 
878 bool
879 spdk_pci_device_is_removed(struct spdk_pci_device *dev)
880 {
881 	return dev->internal.pending_removal;
882 }
883 
884 int
885 spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2)
886 {
887 	if (a1->domain > a2->domain) {
888 		return 1;
889 	} else if (a1->domain < a2->domain) {
890 		return -1;
891 	} else if (a1->bus > a2->bus) {
892 		return 1;
893 	} else if (a1->bus < a2->bus) {
894 		return -1;
895 	} else if (a1->dev > a2->dev) {
896 		return 1;
897 	} else if (a1->dev < a2->dev) {
898 		return -1;
899 	} else if (a1->func > a2->func) {
900 		return 1;
901 	} else if (a1->func < a2->func) {
902 		return -1;
903 	}
904 
905 	return 0;
906 }
907 
908 #ifdef __linux__
909 int
910 spdk_pci_device_claim(struct spdk_pci_device *dev)
911 {
912 	int dev_fd;
913 	char dev_name[64];
914 	int pid;
915 	void *dev_map;
916 	struct flock pcidev_lock = {
917 		.l_type = F_WRLCK,
918 		.l_whence = SEEK_SET,
919 		.l_start = 0,
920 		.l_len = 0,
921 	};
922 
923 	snprintf(dev_name, sizeof(dev_name), "/tmp/spdk_pci_lock_%04x:%02x:%02x.%x",
924 		 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func);
925 
926 	dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
927 	if (dev_fd == -1) {
928 		SPDK_ERRLOG("could not open %s\n", dev_name);
929 		return -errno;
930 	}
931 
932 	if (ftruncate(dev_fd, sizeof(int)) != 0) {
933 		SPDK_ERRLOG("could not truncate %s\n", dev_name);
934 		close(dev_fd);
935 		return -errno;
936 	}
937 
938 	dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
939 		       MAP_SHARED, dev_fd, 0);
940 	if (dev_map == MAP_FAILED) {
941 		SPDK_ERRLOG("could not mmap dev %s (%d)\n", dev_name, errno);
942 		close(dev_fd);
943 		return -errno;
944 	}
945 
946 	if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) {
947 		pid = *(int *)dev_map;
948 		SPDK_ERRLOG("Cannot create lock on device %s, probably"
949 			    " process %d has claimed it\n", dev_name, pid);
950 		munmap(dev_map, sizeof(int));
951 		close(dev_fd);
952 		/* F_SETLK returns unspecified errnos, normalize them */
953 		return -EACCES;
954 	}
955 
956 	*(int *)dev_map = (int)getpid();
957 	munmap(dev_map, sizeof(int));
958 	dev->internal.claim_fd = dev_fd;
959 	/* Keep dev_fd open to maintain the lock. */
960 	return 0;
961 }
962 
963 void
964 spdk_pci_device_unclaim(struct spdk_pci_device *dev)
965 {
966 	char dev_name[64];
967 
968 	snprintf(dev_name, sizeof(dev_name), "/tmp/spdk_pci_lock_%04x:%02x:%02x.%x",
969 		 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func);
970 
971 	close(dev->internal.claim_fd);
972 	dev->internal.claim_fd = -1;
973 	unlink(dev_name);
974 }
975 #endif /* __linux__ */
976 
977 #ifdef __FreeBSD__
978 int
979 spdk_pci_device_claim(struct spdk_pci_device *dev)
980 {
981 	/* TODO */
982 	return 0;
983 }
984 
985 void
986 spdk_pci_device_unclaim(struct spdk_pci_device *dev)
987 {
988 	/* TODO */
989 }
990 #endif /* __FreeBSD__ */
991 
992 int
993 spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf)
994 {
995 	unsigned domain, bus, dev, func;
996 
997 	if (addr == NULL || bdf == NULL) {
998 		return -EINVAL;
999 	}
1000 
1001 	if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) ||
1002 	    (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) {
1003 		/* Matched a full address - all variables are initialized */
1004 	} else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) {
1005 		func = 0;
1006 	} else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) ||
1007 		   (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) {
1008 		domain = 0;
1009 	} else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) ||
1010 		   (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) {
1011 		domain = 0;
1012 		func = 0;
1013 	} else {
1014 		return -EINVAL;
1015 	}
1016 
1017 	if (bus > 0xFF || dev > 0x1F || func > 7) {
1018 		return -EINVAL;
1019 	}
1020 
1021 	addr->domain = domain;
1022 	addr->bus = bus;
1023 	addr->dev = dev;
1024 	addr->func = func;
1025 
1026 	return 0;
1027 }
1028 
1029 int
1030 spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr)
1031 {
1032 	int rc;
1033 
1034 	rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x",
1035 		      addr->domain, addr->bus,
1036 		      addr->dev, addr->func);
1037 
1038 	if (rc > 0 && (size_t)rc < sz) {
1039 		return 0;
1040 	}
1041 
1042 	return -1;
1043 }
1044 
1045 void
1046 spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev)
1047 {
1048 	assert(dev->map_bar != NULL);
1049 	assert(dev->unmap_bar != NULL);
1050 	assert(dev->cfg_read != NULL);
1051 	assert(dev->cfg_write != NULL);
1052 	dev->internal.driver = drv;
1053 	TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq);
1054 }
1055 
1056 void
1057 spdk_pci_unhook_device(struct spdk_pci_device *dev)
1058 {
1059 	assert(!dev->internal.attached);
1060 	TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq);
1061 }
1062 
1063 const char *
1064 spdk_pci_device_get_type(const struct spdk_pci_device *dev)
1065 {
1066 	return dev->type;
1067 }
1068