xref: /spdk/lib/env_dpdk/pci.c (revision 6a9e923da298dda69cbdaf73eed6fb0e85ec2ad2)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "env_internal.h"
35 
36 #include <rte_alarm.h>
37 #include <rte_devargs.h>
38 #include "spdk/env.h"
39 
40 #define SYSFS_PCI_DRIVERS	"/sys/bus/pci/drivers"
41 
42 #define PCI_CFG_SIZE		256
43 #define PCI_EXT_CAP_ID_SN	0x03
44 
45 /* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time
46  * might cause the internal IPC to misbehave. Just retry in such case.
47  */
48 #define DPDK_HOTPLUG_RETRY_COUNT 4
49 
50 /* DPDK alarm/interrupt thread */
51 static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER;
52 static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices);
53 /* devices hotplugged on a dpdk thread */
54 static TAILQ_HEAD(, spdk_pci_device) g_pci_hotplugged_devices =
55 	TAILQ_HEAD_INITIALIZER(g_pci_hotplugged_devices);
56 static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers);
57 
58 static int
59 map_bar_rte(struct spdk_pci_device *device, uint32_t bar,
60 	    void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
61 {
62 	struct rte_pci_device *dev = device->dev_handle;
63 
64 	*mapped_addr = dev->mem_resource[bar].addr;
65 	*phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr;
66 	*size = (uint64_t)dev->mem_resource[bar].len;
67 
68 	return 0;
69 }
70 
71 static int
72 unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr)
73 {
74 	return 0;
75 }
76 
77 static int
78 cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
79 {
80 	int rc;
81 
82 	rc = rte_pci_read_config(dev->dev_handle, value, len, offset);
83 
84 	return (rc > 0 && (uint32_t) rc == len) ? 0 : -1;
85 }
86 
87 static int
88 cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
89 {
90 	int rc;
91 
92 	rc = rte_pci_write_config(dev->dev_handle, value, len, offset);
93 
94 #ifdef __FreeBSD__
95 	/* DPDK returns 0 on success and -1 on failure */
96 	return rc;
97 #endif
98 	return (rc > 0 && (uint32_t) rc == len) ? 0 : -1;
99 }
100 
101 static void
102 remove_rte_dev(struct rte_pci_device *rte_dev)
103 {
104 	char bdf[32];
105 	int i = 0, rc;
106 
107 	snprintf(bdf, sizeof(bdf), "%s", rte_dev->device.name);
108 	do {
109 		rc = rte_eal_hotplug_remove("pci", bdf);
110 	} while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT);
111 }
112 
113 static void
114 detach_rte_cb(void *_dev)
115 {
116 	remove_rte_dev(_dev);
117 }
118 
119 static void
120 detach_rte(struct spdk_pci_device *dev)
121 {
122 	struct rte_pci_device *rte_dev = dev->dev_handle;
123 	int i;
124 	bool removed;
125 
126 	if (!spdk_process_is_primary()) {
127 		remove_rte_dev(rte_dev);
128 		return;
129 	}
130 
131 	pthread_mutex_lock(&g_pci_mutex);
132 	dev->internal.attached = false;
133 	/* prevent the hotremove notification from removing this device */
134 	dev->internal.pending_removal = true;
135 	pthread_mutex_unlock(&g_pci_mutex);
136 
137 	rte_eal_alarm_set(1, detach_rte_cb, rte_dev);
138 
139 	/* wait up to 2s for the cb to execute */
140 	for (i = 2000; i > 0; i--) {
141 
142 		spdk_delay_us(1000);
143 		pthread_mutex_lock(&g_pci_mutex);
144 		removed = dev->internal.removed;
145 		pthread_mutex_unlock(&g_pci_mutex);
146 
147 		if (removed) {
148 			break;
149 		}
150 	}
151 
152 	/* besides checking the removed flag, we also need to wait
153 	 * for the dpdk detach function to unwind, as it's doing some
154 	 * operations even after calling our detach callback. Simply
155 	 * cancel the alarm - if it started executing already, this
156 	 * call will block and wait for it to finish.
157 	 */
158 	rte_eal_alarm_cancel(detach_rte_cb, rte_dev);
159 
160 	/* the device could have been finally removed, so just check
161 	 * it again.
162 	 */
163 	pthread_mutex_lock(&g_pci_mutex);
164 	removed = dev->internal.removed;
165 	pthread_mutex_unlock(&g_pci_mutex);
166 	if (!removed) {
167 		fprintf(stderr, "Timeout waiting for DPDK to remove PCI device %s.\n",
168 			rte_dev->name);
169 		/* If we reach this state, then the device couldn't be removed and most likely
170 		   a subsequent hot add of a device in the same BDF will fail */
171 	}
172 }
173 
174 void
175 pci_driver_register(struct spdk_pci_driver *driver)
176 {
177 	TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq);
178 }
179 
180 static void
181 pci_device_rte_hotremove(const char *device_name,
182 			 enum rte_dev_event_type event,
183 			 void *cb_arg)
184 {
185 	struct spdk_pci_device *dev;
186 	bool can_detach = false;
187 
188 	if (event != RTE_DEV_EVENT_REMOVE) {
189 		return;
190 	}
191 
192 	pthread_mutex_lock(&g_pci_mutex);
193 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
194 		struct rte_pci_device *rte_dev = dev->dev_handle;
195 
196 		if (strcmp(rte_dev->name, device_name) == 0 &&
197 		    !dev->internal.pending_removal) {
198 			can_detach = !dev->internal.attached;
199 			/* prevent any further attaches */
200 			dev->internal.pending_removal = true;
201 			break;
202 		}
203 	}
204 	pthread_mutex_unlock(&g_pci_mutex);
205 
206 	if (dev != NULL && can_detach) {
207 		/* if device is not attached we can remove it right away.
208 		 * Otherwise it will be removed at detach.
209 		 */
210 		remove_rte_dev(dev->dev_handle);
211 	}
212 }
213 
214 static void
215 cleanup_pci_devices(void)
216 {
217 	struct spdk_pci_device *dev, *tmp;
218 
219 	pthread_mutex_lock(&g_pci_mutex);
220 	/* cleanup removed devices */
221 	TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) {
222 		if (!dev->internal.removed) {
223 			continue;
224 		}
225 
226 		vtophys_pci_device_removed(dev->dev_handle);
227 		TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq);
228 		free(dev);
229 	}
230 
231 	/* add newly-attached devices */
232 	TAILQ_FOREACH_SAFE(dev, &g_pci_hotplugged_devices, internal.tailq, tmp) {
233 		TAILQ_REMOVE(&g_pci_hotplugged_devices, dev, internal.tailq);
234 		TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq);
235 		vtophys_pci_device_added(dev->dev_handle);
236 	}
237 	pthread_mutex_unlock(&g_pci_mutex);
238 }
239 
240 static int scan_pci_bus(bool delay_init);
241 
242 void
243 pci_env_init(void)
244 {
245 	struct spdk_pci_driver *driver;
246 
247 	TAILQ_FOREACH(driver, &g_pci_drivers, tailq) {
248 		rte_pci_register(&driver->driver);
249 	}
250 
251 	/* We assume devices were present on the bus for more than 2 seconds
252 	 * before initializing SPDK and there's no need to wait more. We scan
253 	 * the bus, but we don't blacklist any devices.
254 	 */
255 	scan_pci_bus(false);
256 
257 	/* Register a single hotremove callback for all devices. */
258 	if (spdk_process_is_primary()) {
259 		rte_dev_event_callback_register(NULL, pci_device_rte_hotremove, NULL);
260 	}
261 }
262 
263 void
264 pci_env_fini(void)
265 {
266 	struct spdk_pci_device *dev;
267 	char bdf[32];
268 
269 	cleanup_pci_devices();
270 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
271 		if (dev->internal.attached) {
272 			spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->addr);
273 			fprintf(stderr, "Device %s is still attached at shutdown!\n", bdf);
274 		}
275 	}
276 
277 	if (spdk_process_is_primary()) {
278 		rte_dev_event_callback_unregister(NULL, pci_device_rte_hotremove, NULL);
279 	}
280 }
281 
282 int
283 pci_device_init(struct rte_pci_driver *_drv,
284 		struct rte_pci_device *_dev)
285 {
286 	struct spdk_pci_driver *driver = (struct spdk_pci_driver *)_drv;
287 	struct spdk_pci_device *dev;
288 	int rc;
289 
290 	dev = calloc(1, sizeof(*dev));
291 	if (dev == NULL) {
292 		return -1;
293 	}
294 
295 	dev->dev_handle = _dev;
296 
297 	dev->addr.domain = _dev->addr.domain;
298 	dev->addr.bus = _dev->addr.bus;
299 	dev->addr.dev = _dev->addr.devid;
300 	dev->addr.func = _dev->addr.function;
301 	dev->id.class_id = _dev->id.class_id;
302 	dev->id.vendor_id = _dev->id.vendor_id;
303 	dev->id.device_id = _dev->id.device_id;
304 	dev->id.subvendor_id = _dev->id.subsystem_vendor_id;
305 	dev->id.subdevice_id = _dev->id.subsystem_device_id;
306 	dev->socket_id = _dev->device.numa_node;
307 	dev->type = "pci";
308 
309 	dev->map_bar = map_bar_rte;
310 	dev->unmap_bar = unmap_bar_rte;
311 	dev->cfg_read = cfg_read_rte;
312 	dev->cfg_write = cfg_write_rte;
313 
314 	dev->internal.driver = driver;
315 	dev->internal.claim_fd = -1;
316 
317 	if (driver->cb_fn != NULL) {
318 		rc = driver->cb_fn(driver->cb_arg, dev);
319 		if (rc != 0) {
320 			free(dev);
321 			return rc;
322 		}
323 		dev->internal.attached = true;
324 	}
325 
326 	pthread_mutex_lock(&g_pci_mutex);
327 	TAILQ_INSERT_TAIL(&g_pci_hotplugged_devices, dev, internal.tailq);
328 	pthread_mutex_unlock(&g_pci_mutex);
329 	return 0;
330 }
331 
332 int
333 pci_device_fini(struct rte_pci_device *_dev)
334 {
335 	struct spdk_pci_device *dev;
336 
337 	pthread_mutex_lock(&g_pci_mutex);
338 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
339 		if (dev->dev_handle == _dev) {
340 			break;
341 		}
342 	}
343 
344 	if (dev == NULL || dev->internal.attached) {
345 		/* The device might be still referenced somewhere in SPDK. */
346 		pthread_mutex_unlock(&g_pci_mutex);
347 		return -1;
348 	}
349 
350 	/* remove our whitelist_at option */
351 	if (_dev->device.devargs) {
352 		_dev->device.devargs->data = NULL;
353 	}
354 
355 	assert(!dev->internal.removed);
356 	dev->internal.removed = true;
357 	pthread_mutex_unlock(&g_pci_mutex);
358 	return 0;
359 
360 }
361 
362 void
363 spdk_pci_device_detach(struct spdk_pci_device *dev)
364 {
365 	assert(dev->internal.attached);
366 
367 	if (dev->internal.claim_fd >= 0) {
368 		spdk_pci_device_unclaim(dev);
369 	}
370 
371 	if (strcmp(dev->type, "pci") == 0) {
372 		/* if it's a physical device we need to deal with DPDK on
373 		 * a different process and we can't just unset one flag
374 		 * here. We also want to stop using any device resources
375 		 * so that the device isn't "in use" by the userspace driver
376 		 * once we detach it. This would allow attaching the device
377 		 * to a different process, or to a kernel driver like nvme.
378 		 */
379 		detach_rte(dev);
380 	} else {
381 		dev->internal.attached = false;
382 	}
383 
384 	cleanup_pci_devices();
385 }
386 
387 static int
388 scan_pci_bus(bool delay_init)
389 {
390 	struct spdk_pci_driver *driver;
391 	struct rte_pci_device *rte_dev;
392 	uint64_t now;
393 
394 	rte_bus_scan();
395 	now = spdk_get_ticks();
396 
397 	driver = TAILQ_FIRST(&g_pci_drivers);
398 	if (!driver) {
399 		return 0;
400 	}
401 
402 	TAILQ_FOREACH(rte_dev, &driver->driver.bus->device_list, next) {
403 		struct rte_devargs *da;
404 
405 		da = rte_dev->device.devargs;
406 		if (!da) {
407 			char devargs_str[128];
408 
409 			/* the device was never blacklisted or whitelisted */
410 			da = calloc(1, sizeof(*da));
411 			if (!da) {
412 				return -1;
413 			}
414 
415 			snprintf(devargs_str, sizeof(devargs_str), "pci:%s", rte_dev->device.name);
416 			if (rte_devargs_parse(da, devargs_str) != 0) {
417 				free(da);
418 				return -1;
419 			}
420 
421 			rte_devargs_insert(&da);
422 			rte_dev->device.devargs = da;
423 		}
424 
425 		if (da->data) {
426 			uint64_t whitelist_at = (uint64_t)(uintptr_t)da->data;
427 
428 			/* this device was seen by spdk before... */
429 			if (da->policy == RTE_DEV_BLACKLISTED && whitelist_at <= now) {
430 				da->policy = RTE_DEV_WHITELISTED;
431 			}
432 		} else if ((driver->driver.bus->bus.conf.scan_mode == RTE_BUS_SCAN_WHITELIST &&
433 			    da->policy == RTE_DEV_WHITELISTED) || da->policy != RTE_DEV_BLACKLISTED) {
434 			/* override the policy only if not permanently blacklisted */
435 
436 			if (delay_init) {
437 				da->policy = RTE_DEV_BLACKLISTED;
438 				da->data = (void *)(now + 2 * spdk_get_ticks_hz());
439 			} else {
440 				da->policy = RTE_DEV_WHITELISTED;
441 				da->data = (void *)(uintptr_t)now;
442 			}
443 		}
444 	}
445 
446 	return 0;
447 }
448 
449 int
450 spdk_pci_device_attach(struct spdk_pci_driver *driver,
451 		       spdk_pci_enum_cb enum_cb,
452 		       void *enum_ctx, struct spdk_pci_addr *pci_address)
453 {
454 	struct spdk_pci_device *dev;
455 	struct rte_pci_device *rte_dev;
456 	struct rte_devargs *da;
457 	int rc;
458 	char bdf[32];
459 
460 	spdk_pci_addr_fmt(bdf, sizeof(bdf), pci_address);
461 
462 	cleanup_pci_devices();
463 
464 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
465 		if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) {
466 			break;
467 		}
468 	}
469 
470 	if (dev != NULL && dev->internal.driver == driver) {
471 		pthread_mutex_lock(&g_pci_mutex);
472 		if (dev->internal.attached || dev->internal.pending_removal) {
473 			pthread_mutex_unlock(&g_pci_mutex);
474 			return -1;
475 		}
476 
477 		rc = enum_cb(enum_ctx, dev);
478 		if (rc == 0) {
479 			dev->internal.attached = true;
480 		}
481 		pthread_mutex_unlock(&g_pci_mutex);
482 		return rc;
483 	}
484 
485 	driver->cb_fn = enum_cb;
486 	driver->cb_arg = enum_ctx;
487 
488 	int i = 0;
489 
490 	do {
491 		rc = rte_eal_hotplug_add("pci", bdf, "");
492 	} while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT);
493 
494 	if (i > 1 && rc == -EEXIST) {
495 		/* Even though the previous request timed out, the device
496 		 * was attached successfully.
497 		 */
498 		rc = 0;
499 	}
500 
501 	driver->cb_arg = NULL;
502 	driver->cb_fn = NULL;
503 
504 	cleanup_pci_devices();
505 
506 	if (rc != 0) {
507 		return -1;
508 	}
509 
510 	/* explicit attach ignores the whitelist, so if we blacklisted this
511 	 * device before let's enable it now - just for clarity.
512 	 */
513 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
514 		if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) {
515 			break;
516 		}
517 	}
518 	assert(dev != NULL);
519 
520 	rte_dev = dev->dev_handle;
521 	da = rte_dev->device.devargs;
522 	if (da && da->data) {
523 		da->data = (void *)(uintptr_t)spdk_get_ticks();
524 		da->policy = RTE_DEV_WHITELISTED;
525 	}
526 
527 	return 0;
528 }
529 
530 /* Note: You can call spdk_pci_enumerate from more than one thread
531  *       simultaneously safely, but you cannot call spdk_pci_enumerate
532  *       and rte_eal_pci_probe simultaneously.
533  */
534 int
535 spdk_pci_enumerate(struct spdk_pci_driver *driver,
536 		   spdk_pci_enum_cb enum_cb,
537 		   void *enum_ctx)
538 {
539 	struct spdk_pci_device *dev;
540 	int rc;
541 
542 	cleanup_pci_devices();
543 
544 	pthread_mutex_lock(&g_pci_mutex);
545 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
546 		if (dev->internal.attached ||
547 		    dev->internal.driver != driver ||
548 		    dev->internal.pending_removal) {
549 			continue;
550 		}
551 
552 		rc = enum_cb(enum_ctx, dev);
553 		if (rc == 0) {
554 			dev->internal.attached = true;
555 		} else if (rc < 0) {
556 			pthread_mutex_unlock(&g_pci_mutex);
557 			return -1;
558 		}
559 	}
560 	pthread_mutex_unlock(&g_pci_mutex);
561 
562 	if (scan_pci_bus(true) != 0) {
563 		return -1;
564 	}
565 
566 	driver->cb_fn = enum_cb;
567 	driver->cb_arg = enum_ctx;
568 
569 	if (rte_bus_probe() != 0) {
570 		driver->cb_arg = NULL;
571 		driver->cb_fn = NULL;
572 		return -1;
573 	}
574 
575 	driver->cb_arg = NULL;
576 	driver->cb_fn = NULL;
577 
578 	cleanup_pci_devices();
579 	return 0;
580 }
581 
582 struct spdk_pci_device *
583 spdk_pci_get_first_device(void)
584 {
585 	return TAILQ_FIRST(&g_pci_devices);
586 }
587 
588 struct spdk_pci_device *
589 spdk_pci_get_next_device(struct spdk_pci_device *prev)
590 {
591 	return TAILQ_NEXT(prev, internal.tailq);
592 }
593 
594 int
595 spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar,
596 			void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
597 {
598 	return dev->map_bar(dev, bar, mapped_addr, phys_addr, size);
599 }
600 
601 int
602 spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr)
603 {
604 	return dev->unmap_bar(dev, bar, addr);
605 }
606 
607 uint32_t
608 spdk_pci_device_get_domain(struct spdk_pci_device *dev)
609 {
610 	return dev->addr.domain;
611 }
612 
613 uint8_t
614 spdk_pci_device_get_bus(struct spdk_pci_device *dev)
615 {
616 	return dev->addr.bus;
617 }
618 
619 uint8_t
620 spdk_pci_device_get_dev(struct spdk_pci_device *dev)
621 {
622 	return dev->addr.dev;
623 }
624 
625 uint8_t
626 spdk_pci_device_get_func(struct spdk_pci_device *dev)
627 {
628 	return dev->addr.func;
629 }
630 
631 uint16_t
632 spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev)
633 {
634 	return dev->id.vendor_id;
635 }
636 
637 uint16_t
638 spdk_pci_device_get_device_id(struct spdk_pci_device *dev)
639 {
640 	return dev->id.device_id;
641 }
642 
643 uint16_t
644 spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev)
645 {
646 	return dev->id.subvendor_id;
647 }
648 
649 uint16_t
650 spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev)
651 {
652 	return dev->id.subdevice_id;
653 }
654 
655 struct spdk_pci_id
656 spdk_pci_device_get_id(struct spdk_pci_device *dev)
657 {
658 	return dev->id;
659 }
660 
661 int
662 spdk_pci_device_get_socket_id(struct spdk_pci_device *dev)
663 {
664 	return dev->socket_id;
665 }
666 
667 int
668 spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
669 {
670 	return dev->cfg_read(dev, value, len, offset);
671 }
672 
673 int
674 spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
675 {
676 	return dev->cfg_write(dev, value, len, offset);
677 }
678 
679 int
680 spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset)
681 {
682 	return spdk_pci_device_cfg_read(dev, value, 1, offset);
683 }
684 
685 int
686 spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset)
687 {
688 	return spdk_pci_device_cfg_write(dev, &value, 1, offset);
689 }
690 
691 int
692 spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset)
693 {
694 	return spdk_pci_device_cfg_read(dev, value, 2, offset);
695 }
696 
697 int
698 spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset)
699 {
700 	return spdk_pci_device_cfg_write(dev, &value, 2, offset);
701 }
702 
703 int
704 spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset)
705 {
706 	return spdk_pci_device_cfg_read(dev, value, 4, offset);
707 }
708 
709 int
710 spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset)
711 {
712 	return spdk_pci_device_cfg_write(dev, &value, 4, offset);
713 }
714 
715 int
716 spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len)
717 {
718 	int err;
719 	uint32_t pos, header = 0;
720 	uint32_t i, buf[2];
721 
722 	if (len < 17) {
723 		return -1;
724 	}
725 
726 	err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE);
727 	if (err || !header) {
728 		return -1;
729 	}
730 
731 	pos = PCI_CFG_SIZE;
732 	while (1) {
733 		if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) {
734 			if (pos) {
735 				/* skip the header */
736 				pos += 4;
737 				for (i = 0; i < 2; i++) {
738 					err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i);
739 					if (err) {
740 						return -1;
741 					}
742 				}
743 				snprintf(sn, len, "%08x%08x", buf[1], buf[0]);
744 				return 0;
745 			}
746 		}
747 		pos = (header >> 20) & 0xffc;
748 		/* 0 if no other items exist */
749 		if (pos < PCI_CFG_SIZE) {
750 			return -1;
751 		}
752 		err = spdk_pci_device_cfg_read32(dev, &header, pos);
753 		if (err) {
754 			return -1;
755 		}
756 	}
757 	return -1;
758 }
759 
760 struct spdk_pci_addr
761 spdk_pci_device_get_addr(struct spdk_pci_device *dev)
762 {
763 	return dev->addr;
764 }
765 
766 bool
767 spdk_pci_device_is_removed(struct spdk_pci_device *dev)
768 {
769 	return dev->internal.pending_removal;
770 }
771 
772 int
773 spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2)
774 {
775 	if (a1->domain > a2->domain) {
776 		return 1;
777 	} else if (a1->domain < a2->domain) {
778 		return -1;
779 	} else if (a1->bus > a2->bus) {
780 		return 1;
781 	} else if (a1->bus < a2->bus) {
782 		return -1;
783 	} else if (a1->dev > a2->dev) {
784 		return 1;
785 	} else if (a1->dev < a2->dev) {
786 		return -1;
787 	} else if (a1->func > a2->func) {
788 		return 1;
789 	} else if (a1->func < a2->func) {
790 		return -1;
791 	}
792 
793 	return 0;
794 }
795 
796 #ifdef __linux__
797 int
798 spdk_pci_device_claim(struct spdk_pci_device *dev)
799 {
800 	int dev_fd;
801 	char dev_name[64];
802 	int pid;
803 	void *dev_map;
804 	struct flock pcidev_lock = {
805 		.l_type = F_WRLCK,
806 		.l_whence = SEEK_SET,
807 		.l_start = 0,
808 		.l_len = 0,
809 	};
810 
811 	snprintf(dev_name, sizeof(dev_name), "/tmp/spdk_pci_lock_%04x:%02x:%02x.%x",
812 		 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func);
813 
814 	dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
815 	if (dev_fd == -1) {
816 		fprintf(stderr, "could not open %s\n", dev_name);
817 		return -errno;
818 	}
819 
820 	if (ftruncate(dev_fd, sizeof(int)) != 0) {
821 		fprintf(stderr, "could not truncate %s\n", dev_name);
822 		close(dev_fd);
823 		return -errno;
824 	}
825 
826 	dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
827 		       MAP_SHARED, dev_fd, 0);
828 	if (dev_map == MAP_FAILED) {
829 		fprintf(stderr, "could not mmap dev %s (%d)\n", dev_name, errno);
830 		close(dev_fd);
831 		return -errno;
832 	}
833 
834 	if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) {
835 		pid = *(int *)dev_map;
836 		fprintf(stderr, "Cannot create lock on device %s, probably"
837 			" process %d has claimed it\n", dev_name, pid);
838 		munmap(dev_map, sizeof(int));
839 		close(dev_fd);
840 		/* F_SETLK returns unspecified errnos, normalize them */
841 		return -EACCES;
842 	}
843 
844 	*(int *)dev_map = (int)getpid();
845 	munmap(dev_map, sizeof(int));
846 	dev->internal.claim_fd = dev_fd;
847 	/* Keep dev_fd open to maintain the lock. */
848 	return 0;
849 }
850 
851 void
852 spdk_pci_device_unclaim(struct spdk_pci_device *dev)
853 {
854 	char dev_name[64];
855 
856 	snprintf(dev_name, sizeof(dev_name), "/tmp/spdk_pci_lock_%04x:%02x:%02x.%x",
857 		 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func);
858 
859 	close(dev->internal.claim_fd);
860 	dev->internal.claim_fd = -1;
861 	unlink(dev_name);
862 }
863 #endif /* __linux__ */
864 
865 #ifdef __FreeBSD__
866 int
867 spdk_pci_device_claim(struct spdk_pci_device *dev)
868 {
869 	/* TODO */
870 	return 0;
871 }
872 
873 void
874 spdk_pci_device_unclaim(struct spdk_pci_device *dev)
875 {
876 	/* TODO */
877 }
878 #endif /* __FreeBSD__ */
879 
880 int
881 spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf)
882 {
883 	unsigned domain, bus, dev, func;
884 
885 	if (addr == NULL || bdf == NULL) {
886 		return -EINVAL;
887 	}
888 
889 	if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) ||
890 	    (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) {
891 		/* Matched a full address - all variables are initialized */
892 	} else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) {
893 		func = 0;
894 	} else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) ||
895 		   (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) {
896 		domain = 0;
897 	} else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) ||
898 		   (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) {
899 		domain = 0;
900 		func = 0;
901 	} else {
902 		return -EINVAL;
903 	}
904 
905 	if (bus > 0xFF || dev > 0x1F || func > 7) {
906 		return -EINVAL;
907 	}
908 
909 	addr->domain = domain;
910 	addr->bus = bus;
911 	addr->dev = dev;
912 	addr->func = func;
913 
914 	return 0;
915 }
916 
917 int
918 spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr)
919 {
920 	int rc;
921 
922 	rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x",
923 		      addr->domain, addr->bus,
924 		      addr->dev, addr->func);
925 
926 	if (rc > 0 && (size_t)rc < sz) {
927 		return 0;
928 	}
929 
930 	return -1;
931 }
932 
933 void
934 spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev)
935 {
936 	assert(dev->map_bar != NULL);
937 	assert(dev->unmap_bar != NULL);
938 	assert(dev->cfg_read != NULL);
939 	assert(dev->cfg_write != NULL);
940 	dev->internal.driver = drv;
941 	TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq);
942 }
943 
944 void
945 spdk_pci_unhook_device(struct spdk_pci_device *dev)
946 {
947 	assert(!dev->internal.attached);
948 	TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq);
949 }
950 
951 const char *
952 spdk_pci_device_get_type(const struct spdk_pci_device *dev)
953 {
954 	return dev->type;
955 }
956