xref: /spdk/lib/env_dpdk/pci.c (revision 03e3fc4f5835983a4e6602b4e770922e798ce263)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "env_internal.h"
35 
36 #include <rte_alarm.h>
37 #include "spdk/env.h"
38 
39 #define SYSFS_PCI_DRIVERS	"/sys/bus/pci/drivers"
40 
41 #define PCI_CFG_SIZE		256
42 #define PCI_EXT_CAP_ID_SN	0x03
43 
44 /* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time
45  * might cause the internal IPC to misbehave. Just retry in such case.
46  */
47 #define DPDK_HOTPLUG_RETRY_COUNT 4
48 
49 /* DPDK alarm/interrupt thread */
50 static pthread_t g_dpdk_tid;
51 static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER;
52 static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices);
53 /* devices hotplugged on a dpdk thread */
54 static TAILQ_HEAD(, spdk_pci_device) g_pci_hotplugged_devices =
55 	TAILQ_HEAD_INITIALIZER(g_pci_hotplugged_devices);
56 static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers);
57 
58 static int
59 map_bar_rte(struct spdk_pci_device *device, uint32_t bar,
60 	    void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
61 {
62 	struct rte_pci_device *dev = device->dev_handle;
63 
64 	*mapped_addr = dev->mem_resource[bar].addr;
65 	*phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr;
66 	*size = (uint64_t)dev->mem_resource[bar].len;
67 
68 	return 0;
69 }
70 
71 static int
72 unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr)
73 {
74 	return 0;
75 }
76 
77 static int
78 cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
79 {
80 	int rc;
81 
82 	rc = rte_pci_read_config(dev->dev_handle, value, len, offset);
83 
84 #if defined(__FreeBSD__) && RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0)
85 	/* Older DPDKs return 0 on success and -1 on failure */
86 	return rc;
87 #endif
88 	return (rc > 0 && (uint32_t) rc == len) ? 0 : -1;
89 }
90 
91 static int
92 cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
93 {
94 	int rc;
95 
96 	rc = rte_pci_write_config(dev->dev_handle, value, len, offset);
97 
98 #ifdef __FreeBSD__
99 	/* DPDK returns 0 on success and -1 on failure */
100 	return rc;
101 #endif
102 	return (rc > 0 && (uint32_t) rc == len) ? 0 : -1;
103 }
104 
105 static void
106 detach_rte_cb(void *_dev)
107 {
108 	struct rte_pci_device *rte_dev = _dev;
109 
110 #if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0)
111 	char bdf[32];
112 	int i = 0, rc;
113 
114 	snprintf(bdf, sizeof(bdf), "%s", rte_dev->device.name);
115 	do {
116 		rc = rte_eal_hotplug_remove("pci", bdf);
117 	} while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT);
118 #else
119 	rte_eal_dev_detach(&rte_dev->device);
120 #endif
121 }
122 
123 static void
124 detach_rte(struct spdk_pci_device *dev)
125 {
126 	struct rte_pci_device *rte_dev = dev->dev_handle;
127 	int i;
128 	bool removed;
129 
130 	/* The device was already marked as available and could be attached
131 	 * again while we go asynchronous, so we explicitly forbid that.
132 	 */
133 	dev->internal.pending_removal = true;
134 	if (!spdk_process_is_primary() || pthread_equal(g_dpdk_tid, pthread_self())) {
135 		detach_rte_cb(rte_dev);
136 		return;
137 	}
138 
139 	rte_eal_alarm_set(1, detach_rte_cb, rte_dev);
140 	/* wait up to 2s for the cb to finish executing */
141 	for (i = 2000; i > 0; i--) {
142 
143 		spdk_delay_us(1000);
144 		pthread_mutex_lock(&g_pci_mutex);
145 		removed = dev->internal.removed;
146 		pthread_mutex_unlock(&g_pci_mutex);
147 
148 		if (removed) {
149 			break;
150 		}
151 	}
152 
153 	/* besides checking the removed flag, we also need to wait
154 	 * for the dpdk detach function to unwind, as it's doing some
155 	 * operations even after calling our detach callback. Simply
156 	 * cancel the alarm - if it started executing already, this
157 	 * call will block and wait for it to finish.
158 	 */
159 	rte_eal_alarm_cancel(detach_rte_cb, rte_dev);
160 
161 	/* the device could have been finally removed, so just check
162 	 * it again.
163 	 */
164 	pthread_mutex_lock(&g_pci_mutex);
165 	removed = dev->internal.removed;
166 	pthread_mutex_unlock(&g_pci_mutex);
167 	if (!removed) {
168 		fprintf(stderr, "Timeout waiting for DPDK to remove PCI device %s.\n",
169 			rte_dev->name);
170 		/* If we reach this state, then the device couldn't be removed and most likely
171 		   a subsequent hot add of a device in the same BDF will fail */
172 	}
173 }
174 
175 void
176 pci_driver_register(struct spdk_pci_driver *driver)
177 {
178 	TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq);
179 }
180 
181 #if RTE_VERSION >= RTE_VERSION_NUM(18, 5, 0, 0)
182 static void
183 pci_device_rte_hotremove_cb(void *dev)
184 {
185 	detach_rte((struct spdk_pci_device *)dev);
186 }
187 
188 static void
189 pci_device_rte_hotremove(const char *device_name,
190 			 enum rte_dev_event_type event,
191 			 void *cb_arg)
192 {
193 	struct spdk_pci_device *dev;
194 	bool can_detach = false;
195 
196 	if (event != RTE_DEV_EVENT_REMOVE) {
197 		return;
198 	}
199 
200 	pthread_mutex_lock(&g_pci_mutex);
201 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
202 		struct rte_pci_device *rte_dev = dev->dev_handle;
203 		if (strcmp(rte_dev->name, device_name) == 0 &&
204 		    !dev->internal.pending_removal) {
205 			can_detach = !dev->internal.attached;
206 			/* prevent any further attaches */
207 			dev->internal.pending_removal = true;
208 			break;
209 		}
210 	}
211 	pthread_mutex_unlock(&g_pci_mutex);
212 
213 	if (dev != NULL && can_detach) {
214 		/* If device is not attached, we can remove it right away.
215 		 *
216 		 * Because the user's callback is invoked in eal interrupt
217 		 * callback, the interrupt callback need to be finished before
218 		 * it can be unregistered when detaching device. So finish
219 		 * callback soon and use a deferred removal to detach device
220 		 * is need. It is a workaround, once the device detaching be
221 		 * moved into the eal in the future, the deferred removal could
222 		 * be deleted.
223 		 */
224 		rte_eal_alarm_set(1, pci_device_rte_hotremove_cb, dev);
225 	}
226 }
227 #endif
228 
229 static void
230 cleanup_pci_devices(void)
231 {
232 	struct spdk_pci_device *dev, *tmp;
233 
234 	pthread_mutex_lock(&g_pci_mutex);
235 	/* cleanup removed devices */
236 	TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) {
237 		if (!dev->internal.removed) {
238 			continue;
239 		}
240 
241 		vtophys_pci_device_removed(dev->dev_handle);
242 		TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq);
243 		free(dev);
244 	}
245 
246 	/* add newly-attached devices */
247 	TAILQ_FOREACH_SAFE(dev, &g_pci_hotplugged_devices, internal.tailq, tmp) {
248 		TAILQ_REMOVE(&g_pci_hotplugged_devices, dev, internal.tailq);
249 		TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq);
250 		vtophys_pci_device_added(dev->dev_handle);
251 	}
252 	pthread_mutex_unlock(&g_pci_mutex);
253 }
254 
255 static void
256 _get_alarm_thread_cb(void *unused)
257 {
258 	g_dpdk_tid = pthread_self();
259 }
260 
261 void
262 pci_env_init(void)
263 {
264 #if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0)
265 	struct spdk_pci_driver *driver;
266 
267 	/* We need to pre-register pci drivers for the pci devices to be
268 	 * attachable in multi-process with DPDK 18.11+.
269 	 *
270 	 * DPDK 18.11+ does its best to ensure all devices are equally
271 	 * attached or detached in all processes within a shared memory group.
272 	 * For SPDK it means that if a device is hotplugged in the primary,
273 	 * then DPDK will automatically send an IPC hotplug request to all other
274 	 * processes. Those other processes may not have the same SPDK PCI
275 	 * driver registered and may fail to attach the device. DPDK will send
276 	 * back the failure status, and the the primary process will also fail
277 	 * to hotplug the device. To prevent that, we need to pre-register the
278 	 * pci drivers here.
279 	 */
280 	TAILQ_FOREACH(driver, &g_pci_drivers, tailq) {
281 		assert(!driver->is_registered);
282 		driver->is_registered = true;
283 		rte_pci_register(&driver->driver);
284 	}
285 #endif
286 
287 #if RTE_VERSION >= RTE_VERSION_NUM(18, 5, 0, 0)
288 	/* Register a single hotremove callback for all devices. */
289 	if (spdk_process_is_primary()) {
290 		rte_dev_event_callback_register(NULL, pci_device_rte_hotremove, NULL);
291 	}
292 #endif
293 
294 	rte_eal_alarm_set(1, _get_alarm_thread_cb, NULL);
295 	/* alarms are executed in order, so this one will be always executed
296 	 * before any real hotremove alarms and we don't need to wait for it.
297 	 */
298 }
299 
300 void
301 pci_env_fini(void)
302 {
303 	struct spdk_pci_device *dev;
304 	char bdf[32];
305 
306 	cleanup_pci_devices();
307 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
308 		if (dev->internal.attached) {
309 			spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->addr);
310 			fprintf(stderr, "Device %s is still attached at shutdown!\n", bdf);
311 		}
312 	}
313 
314 #if RTE_VERSION >= RTE_VERSION_NUM(18, 5, 0, 0)
315 	if (spdk_process_is_primary()) {
316 		rte_dev_event_callback_unregister(NULL, pci_device_rte_hotremove, NULL);
317 	}
318 #endif
319 }
320 
321 int
322 pci_device_init(struct rte_pci_driver *_drv,
323 		struct rte_pci_device *_dev)
324 {
325 	struct spdk_pci_driver *driver = (struct spdk_pci_driver *)_drv;
326 	struct spdk_pci_device *dev;
327 	int rc;
328 
329 #if RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0)
330 	if (!driver->cb_fn) {
331 		/* Return a positive value to indicate that this device does
332 		 * not belong to this driver, but this isn't an error.
333 		 */
334 		return 1;
335 	}
336 #endif
337 
338 	dev = calloc(1, sizeof(*dev));
339 	if (dev == NULL) {
340 		return -1;
341 	}
342 
343 	dev->dev_handle = _dev;
344 
345 	dev->addr.domain = _dev->addr.domain;
346 	dev->addr.bus = _dev->addr.bus;
347 	dev->addr.dev = _dev->addr.devid;
348 	dev->addr.func = _dev->addr.function;
349 	dev->id.vendor_id = _dev->id.vendor_id;
350 	dev->id.device_id = _dev->id.device_id;
351 	dev->id.subvendor_id = _dev->id.subsystem_vendor_id;
352 	dev->id.subdevice_id = _dev->id.subsystem_device_id;
353 	dev->socket_id = _dev->device.numa_node;
354 	dev->type = "pci";
355 
356 	dev->map_bar = map_bar_rte;
357 	dev->unmap_bar = unmap_bar_rte;
358 	dev->cfg_read = cfg_read_rte;
359 	dev->cfg_write = cfg_write_rte;
360 	dev->detach = detach_rte;
361 
362 	dev->internal.driver = driver;
363 	dev->internal.claim_fd = -1;
364 
365 	if (driver->cb_fn != NULL) {
366 		rc = driver->cb_fn(driver->cb_arg, dev);
367 		if (rc != 0) {
368 			free(dev);
369 			return rc;
370 		}
371 		dev->internal.attached = true;
372 	}
373 
374 	pthread_mutex_lock(&g_pci_mutex);
375 	TAILQ_INSERT_TAIL(&g_pci_hotplugged_devices, dev, internal.tailq);
376 	pthread_mutex_unlock(&g_pci_mutex);
377 	return 0;
378 }
379 
380 int
381 pci_device_fini(struct rte_pci_device *_dev)
382 {
383 	struct spdk_pci_device *dev;
384 
385 	pthread_mutex_lock(&g_pci_mutex);
386 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
387 		if (dev->dev_handle == _dev) {
388 			break;
389 		}
390 	}
391 
392 	if (dev == NULL || dev->internal.attached) {
393 		/* The device might be still referenced somewhere in SPDK. */
394 		pthread_mutex_unlock(&g_pci_mutex);
395 		return -1;
396 	}
397 
398 	assert(!dev->internal.removed);
399 	dev->internal.removed = true;
400 	pthread_mutex_unlock(&g_pci_mutex);
401 	return 0;
402 
403 }
404 
405 void
406 spdk_pci_device_detach(struct spdk_pci_device *dev)
407 {
408 	assert(dev->internal.attached);
409 
410 	if (dev->internal.claim_fd >= 0) {
411 		spdk_pci_device_unclaim(dev);
412 	}
413 
414 	dev->internal.attached = false;
415 	dev->detach(dev);
416 
417 	cleanup_pci_devices();
418 }
419 
420 int
421 spdk_pci_device_attach(struct spdk_pci_driver *driver,
422 		       spdk_pci_enum_cb enum_cb,
423 		       void *enum_ctx, struct spdk_pci_addr *pci_address)
424 {
425 	struct spdk_pci_device *dev;
426 	int rc;
427 	char bdf[32];
428 
429 	spdk_pci_addr_fmt(bdf, sizeof(bdf), pci_address);
430 
431 	cleanup_pci_devices();
432 
433 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
434 		if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) {
435 			break;
436 		}
437 	}
438 
439 	if (dev != NULL && dev->internal.driver == driver) {
440 		pthread_mutex_lock(&g_pci_mutex);
441 		if (dev->internal.attached || dev->internal.pending_removal) {
442 			pthread_mutex_unlock(&g_pci_mutex);
443 			return -1;
444 		}
445 
446 		rc = enum_cb(enum_ctx, dev);
447 		if (rc == 0) {
448 			dev->internal.attached = true;
449 		}
450 		pthread_mutex_unlock(&g_pci_mutex);
451 		return rc;
452 	}
453 
454 	if (!driver->is_registered) {
455 		driver->is_registered = true;
456 		rte_pci_register(&driver->driver);
457 	}
458 
459 	driver->cb_fn = enum_cb;
460 	driver->cb_arg = enum_ctx;
461 
462 #if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0)
463 	int i = 0;
464 
465 	do {
466 		rc = rte_eal_hotplug_add("pci", bdf, "");
467 	} while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT);
468 
469 	if (i > 1 && rc == -EEXIST) {
470 		/* Even though the previous request timed out, the device
471 		 * was attached successfully.
472 		 */
473 		rc = 0;
474 	}
475 #else
476 	rc = rte_eal_dev_attach(bdf, "");
477 #endif
478 
479 	driver->cb_arg = NULL;
480 	driver->cb_fn = NULL;
481 
482 	cleanup_pci_devices();
483 	return rc == 0 ? 0 : -1;
484 }
485 
486 /* Note: You can call spdk_pci_enumerate from more than one thread
487  *       simultaneously safely, but you cannot call spdk_pci_enumerate
488  *       and rte_eal_pci_probe simultaneously.
489  */
490 int
491 spdk_pci_enumerate(struct spdk_pci_driver *driver,
492 		   spdk_pci_enum_cb enum_cb,
493 		   void *enum_ctx)
494 {
495 	struct spdk_pci_device *dev;
496 	int rc;
497 
498 	cleanup_pci_devices();
499 
500 	pthread_mutex_lock(&g_pci_mutex);
501 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
502 		if (dev->internal.attached ||
503 		    dev->internal.driver != driver ||
504 		    dev->internal.pending_removal) {
505 			continue;
506 		}
507 
508 		rc = enum_cb(enum_ctx, dev);
509 		if (rc == 0) {
510 			dev->internal.attached = true;
511 		} else if (rc < 0) {
512 			pthread_mutex_unlock(&g_pci_mutex);
513 			return -1;
514 		}
515 	}
516 	pthread_mutex_unlock(&g_pci_mutex);
517 
518 	if (!driver->is_registered) {
519 		driver->is_registered = true;
520 		rte_pci_register(&driver->driver);
521 	}
522 
523 	driver->cb_fn = enum_cb;
524 	driver->cb_arg = enum_ctx;
525 
526 	if (rte_bus_scan() != 0 || rte_bus_probe() != 0) {
527 		driver->cb_arg = NULL;
528 		driver->cb_fn = NULL;
529 		return -1;
530 	}
531 
532 	driver->cb_arg = NULL;
533 	driver->cb_fn = NULL;
534 
535 	cleanup_pci_devices();
536 	return 0;
537 }
538 
539 struct spdk_pci_device *
540 spdk_pci_get_first_device(void)
541 {
542 	return TAILQ_FIRST(&g_pci_devices);
543 }
544 
545 struct spdk_pci_device *
546 spdk_pci_get_next_device(struct spdk_pci_device *prev)
547 {
548 	return TAILQ_NEXT(prev, internal.tailq);
549 }
550 
551 int
552 spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar,
553 			void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
554 {
555 	return dev->map_bar(dev, bar, mapped_addr, phys_addr, size);
556 }
557 
558 int
559 spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr)
560 {
561 	return dev->unmap_bar(dev, bar, addr);
562 }
563 
564 uint32_t
565 spdk_pci_device_get_domain(struct spdk_pci_device *dev)
566 {
567 	return dev->addr.domain;
568 }
569 
570 uint8_t
571 spdk_pci_device_get_bus(struct spdk_pci_device *dev)
572 {
573 	return dev->addr.bus;
574 }
575 
576 uint8_t
577 spdk_pci_device_get_dev(struct spdk_pci_device *dev)
578 {
579 	return dev->addr.dev;
580 }
581 
582 uint8_t
583 spdk_pci_device_get_func(struct spdk_pci_device *dev)
584 {
585 	return dev->addr.func;
586 }
587 
588 uint16_t
589 spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev)
590 {
591 	return dev->id.vendor_id;
592 }
593 
594 uint16_t
595 spdk_pci_device_get_device_id(struct spdk_pci_device *dev)
596 {
597 	return dev->id.device_id;
598 }
599 
600 uint16_t
601 spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev)
602 {
603 	return dev->id.subvendor_id;
604 }
605 
606 uint16_t
607 spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev)
608 {
609 	return dev->id.subdevice_id;
610 }
611 
612 struct spdk_pci_id
613 spdk_pci_device_get_id(struct spdk_pci_device *dev)
614 {
615 	return dev->id;
616 }
617 
618 int
619 spdk_pci_device_get_socket_id(struct spdk_pci_device *dev)
620 {
621 	return dev->socket_id;
622 }
623 
624 int
625 spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
626 {
627 	return dev->cfg_read(dev, value, len, offset);
628 }
629 
630 int
631 spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
632 {
633 	return dev->cfg_write(dev, value, len, offset);
634 }
635 
636 int
637 spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset)
638 {
639 	return spdk_pci_device_cfg_read(dev, value, 1, offset);
640 }
641 
642 int
643 spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset)
644 {
645 	return spdk_pci_device_cfg_write(dev, &value, 1, offset);
646 }
647 
648 int
649 spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset)
650 {
651 	return spdk_pci_device_cfg_read(dev, value, 2, offset);
652 }
653 
654 int
655 spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset)
656 {
657 	return spdk_pci_device_cfg_write(dev, &value, 2, offset);
658 }
659 
660 int
661 spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset)
662 {
663 	return spdk_pci_device_cfg_read(dev, value, 4, offset);
664 }
665 
666 int
667 spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset)
668 {
669 	return spdk_pci_device_cfg_write(dev, &value, 4, offset);
670 }
671 
672 int
673 spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len)
674 {
675 	int err;
676 	uint32_t pos, header = 0;
677 	uint32_t i, buf[2];
678 
679 	if (len < 17) {
680 		return -1;
681 	}
682 
683 	err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE);
684 	if (err || !header) {
685 		return -1;
686 	}
687 
688 	pos = PCI_CFG_SIZE;
689 	while (1) {
690 		if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) {
691 			if (pos) {
692 				/* skip the header */
693 				pos += 4;
694 				for (i = 0; i < 2; i++) {
695 					err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i);
696 					if (err) {
697 						return -1;
698 					}
699 				}
700 				snprintf(sn, len, "%08x%08x", buf[1], buf[0]);
701 				return 0;
702 			}
703 		}
704 		pos = (header >> 20) & 0xffc;
705 		/* 0 if no other items exist */
706 		if (pos < PCI_CFG_SIZE) {
707 			return -1;
708 		}
709 		err = spdk_pci_device_cfg_read32(dev, &header, pos);
710 		if (err) {
711 			return -1;
712 		}
713 	}
714 	return -1;
715 }
716 
717 struct spdk_pci_addr
718 spdk_pci_device_get_addr(struct spdk_pci_device *dev)
719 {
720 	return dev->addr;
721 }
722 
723 bool
724 spdk_pci_device_is_removed(struct spdk_pci_device *dev)
725 {
726 	return dev->internal.pending_removal;
727 }
728 
729 int
730 spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2)
731 {
732 	if (a1->domain > a2->domain) {
733 		return 1;
734 	} else if (a1->domain < a2->domain) {
735 		return -1;
736 	} else if (a1->bus > a2->bus) {
737 		return 1;
738 	} else if (a1->bus < a2->bus) {
739 		return -1;
740 	} else if (a1->dev > a2->dev) {
741 		return 1;
742 	} else if (a1->dev < a2->dev) {
743 		return -1;
744 	} else if (a1->func > a2->func) {
745 		return 1;
746 	} else if (a1->func < a2->func) {
747 		return -1;
748 	}
749 
750 	return 0;
751 }
752 
753 #ifdef __linux__
754 int
755 spdk_pci_device_claim(struct spdk_pci_device *dev)
756 {
757 	int dev_fd;
758 	char dev_name[64];
759 	int pid;
760 	void *dev_map;
761 	struct flock pcidev_lock = {
762 		.l_type = F_WRLCK,
763 		.l_whence = SEEK_SET,
764 		.l_start = 0,
765 		.l_len = 0,
766 	};
767 
768 	snprintf(dev_name, sizeof(dev_name), "/tmp/spdk_pci_lock_%04x:%02x:%02x.%x",
769 		 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func);
770 
771 	dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
772 	if (dev_fd == -1) {
773 		fprintf(stderr, "could not open %s\n", dev_name);
774 		return -errno;
775 	}
776 
777 	if (ftruncate(dev_fd, sizeof(int)) != 0) {
778 		fprintf(stderr, "could not truncate %s\n", dev_name);
779 		close(dev_fd);
780 		return -errno;
781 	}
782 
783 	dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
784 		       MAP_SHARED, dev_fd, 0);
785 	if (dev_map == MAP_FAILED) {
786 		fprintf(stderr, "could not mmap dev %s (%d)\n", dev_name, errno);
787 		close(dev_fd);
788 		return -errno;
789 	}
790 
791 	if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) {
792 		pid = *(int *)dev_map;
793 		fprintf(stderr, "Cannot create lock on device %s, probably"
794 			" process %d has claimed it\n", dev_name, pid);
795 		munmap(dev_map, sizeof(int));
796 		close(dev_fd);
797 		/* F_SETLK returns unspecified errnos, normalize them */
798 		return -EACCES;
799 	}
800 
801 	*(int *)dev_map = (int)getpid();
802 	munmap(dev_map, sizeof(int));
803 	dev->internal.claim_fd = dev_fd;
804 	/* Keep dev_fd open to maintain the lock. */
805 	return 0;
806 }
807 
808 void
809 spdk_pci_device_unclaim(struct spdk_pci_device *dev)
810 {
811 	char dev_name[64];
812 
813 	snprintf(dev_name, sizeof(dev_name), "/tmp/spdk_pci_lock_%04x:%02x:%02x.%x",
814 		 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func);
815 
816 	close(dev->internal.claim_fd);
817 	dev->internal.claim_fd = -1;
818 	unlink(dev_name);
819 }
820 #endif /* __linux__ */
821 
822 #ifdef __FreeBSD__
823 int
824 spdk_pci_device_claim(struct spdk_pci_device *dev)
825 {
826 	/* TODO */
827 	return 0;
828 }
829 
830 void
831 spdk_pci_device_unclaim(struct spdk_pci_device *dev)
832 {
833 	/* TODO */
834 }
835 #endif /* __FreeBSD__ */
836 
837 int
838 spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf)
839 {
840 	unsigned domain, bus, dev, func;
841 
842 	if (addr == NULL || bdf == NULL) {
843 		return -EINVAL;
844 	}
845 
846 	if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) ||
847 	    (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) {
848 		/* Matched a full address - all variables are initialized */
849 	} else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) {
850 		func = 0;
851 	} else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) ||
852 		   (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) {
853 		domain = 0;
854 	} else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) ||
855 		   (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) {
856 		domain = 0;
857 		func = 0;
858 	} else {
859 		return -EINVAL;
860 	}
861 
862 	if (bus > 0xFF || dev > 0x1F || func > 7) {
863 		return -EINVAL;
864 	}
865 
866 	addr->domain = domain;
867 	addr->bus = bus;
868 	addr->dev = dev;
869 	addr->func = func;
870 
871 	return 0;
872 }
873 
874 int
875 spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr)
876 {
877 	int rc;
878 
879 	rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x",
880 		      addr->domain, addr->bus,
881 		      addr->dev, addr->func);
882 
883 	if (rc > 0 && (size_t)rc < sz) {
884 		return 0;
885 	}
886 
887 	return -1;
888 }
889 
890 void
891 spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev)
892 {
893 	assert(dev->map_bar != NULL);
894 	assert(dev->unmap_bar != NULL);
895 	assert(dev->cfg_read != NULL);
896 	assert(dev->cfg_write != NULL);
897 	assert(dev->detach != NULL);
898 	dev->internal.driver = drv;
899 	TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq);
900 }
901 
902 void
903 spdk_pci_unhook_device(struct spdk_pci_device *dev)
904 {
905 	assert(!dev->internal.attached);
906 	TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq);
907 }
908 
909 const char *
910 spdk_pci_device_get_type(const struct spdk_pci_device *dev)
911 {
912 	return dev->type;
913 }
914