xref: /spdk/lib/env_dpdk/pci.c (revision 9889ab2dc80e40dae92dcef361d53dcba722043d)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "env_internal.h"
35 
36 #include <rte_alarm.h>
37 #include "spdk/env.h"
38 
39 #define SYSFS_PCI_DRIVERS	"/sys/bus/pci/drivers"
40 
41 #define PCI_CFG_SIZE		256
42 #define PCI_EXT_CAP_ID_SN	0x03
43 
44 /* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time
45  * might cause the internal IPC to misbehave. Just retry in such case.
46  */
47 #define DPDK_HOTPLUG_RETRY_COUNT 4
48 
49 /* DPDK alarm/interrupt thread */
50 static pthread_t g_dpdk_tid;
51 static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER;
52 static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices);
53 /* devices hotplugged on a dpdk thread */
54 static TAILQ_HEAD(, spdk_pci_device) g_pci_hotplugged_devices =
55 	TAILQ_HEAD_INITIALIZER(g_pci_hotplugged_devices);
56 static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers);
57 
58 static int
59 spdk_map_bar_rte(struct spdk_pci_device *device, uint32_t bar,
60 		 void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
61 {
62 	struct rte_pci_device *dev = device->dev_handle;
63 
64 	*mapped_addr = dev->mem_resource[bar].addr;
65 	*phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr;
66 	*size = (uint64_t)dev->mem_resource[bar].len;
67 
68 	return 0;
69 }
70 
71 static int
72 spdk_unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr)
73 {
74 	return 0;
75 }
76 
77 static int
78 spdk_cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
79 {
80 	int rc;
81 
82 	rc = rte_pci_read_config(dev->dev_handle, value, len, offset);
83 
84 #if defined(__FreeBSD__) && RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0)
85 	/* Older DPDKs return 0 on success and -1 on failure */
86 	return rc;
87 #endif
88 	return (rc > 0 && (uint32_t) rc == len) ? 0 : -1;
89 }
90 
91 static int
92 spdk_cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
93 {
94 	int rc;
95 
96 	rc = rte_pci_write_config(dev->dev_handle, value, len, offset);
97 
98 #ifdef __FreeBSD__
99 	/* DPDK returns 0 on success and -1 on failure */
100 	return rc;
101 #endif
102 	return (rc > 0 && (uint32_t) rc == len) ? 0 : -1;
103 }
104 
105 static void
106 spdk_detach_rte_cb(void *_dev)
107 {
108 	struct rte_pci_device *rte_dev = _dev;
109 
110 #if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0)
111 	char bdf[32];
112 	int i = 0, rc;
113 
114 	snprintf(bdf, sizeof(bdf), "%s", rte_dev->device.name);
115 	do {
116 		rc = rte_eal_hotplug_remove("pci", bdf);
117 	} while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT);
118 #else
119 	rte_eal_dev_detach(&rte_dev->device);
120 #endif
121 }
122 
123 static void
124 spdk_detach_rte(struct spdk_pci_device *dev)
125 {
126 	struct rte_pci_device *rte_dev = dev->dev_handle;
127 	int i;
128 	bool removed;
129 
130 	/* The device was already marked as available and could be attached
131 	 * again while we go asynchronous, so we explicitly forbid that.
132 	 */
133 	dev->internal.pending_removal = true;
134 	if (spdk_process_is_primary() && !pthread_equal(g_dpdk_tid, pthread_self())) {
135 		rte_eal_alarm_set(1, spdk_detach_rte_cb, rte_dev);
136 		/* wait up to 20ms for the cb to start executing */
137 		for (i = 20; i > 0; i--) {
138 
139 			spdk_delay_us(1000);
140 			pthread_mutex_lock(&g_pci_mutex);
141 			removed = dev->internal.removed;
142 			pthread_mutex_unlock(&g_pci_mutex);
143 
144 			if (removed) {
145 				break;
146 			}
147 		}
148 
149 		/* besides checking the removed flag, we also need to wait
150 		 * for the dpdk detach function to unwind, as it's doing some
151 		 * operations even after calling our detach callback. Simply
152 		 * cancell the alarm - if it started executing already, this
153 		 * call will block and wait for it to finish.
154 		 */
155 		rte_eal_alarm_cancel(spdk_detach_rte_cb, rte_dev);
156 
157 		/* the device could have been finally removed, so just check
158 		 * it again.
159 		 */
160 		pthread_mutex_lock(&g_pci_mutex);
161 		removed = dev->internal.removed;
162 		pthread_mutex_unlock(&g_pci_mutex);
163 		if (!removed) {
164 			fprintf(stderr, "Timeout waiting for DPDK to remove PCI device %s.\n",
165 				rte_dev->name);
166 		}
167 	} else {
168 		spdk_detach_rte_cb(rte_dev);
169 	}
170 }
171 
172 void
173 spdk_pci_driver_register(struct spdk_pci_driver *driver)
174 {
175 	TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq);
176 }
177 
178 #if RTE_VERSION >= RTE_VERSION_NUM(18, 5, 0, 0)
179 static void
180 spdk_pci_device_rte_hotremove(const char *device_name,
181 			      enum rte_dev_event_type event,
182 			      void *cb_arg)
183 {
184 	struct spdk_pci_device *dev;
185 	bool can_detach = false;
186 
187 	if (event != RTE_DEV_EVENT_REMOVE) {
188 		return;
189 	}
190 
191 	pthread_mutex_lock(&g_pci_mutex);
192 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
193 		struct rte_pci_device *rte_dev = dev->dev_handle;
194 
195 		if (strcmp(rte_dev->name, device_name) == 0 &&
196 		    !dev->internal.pending_removal) {
197 			can_detach = !dev->internal.attached;
198 			/* prevent any further attaches */
199 			dev->internal.pending_removal = true;
200 			break;
201 		}
202 	}
203 	pthread_mutex_unlock(&g_pci_mutex);
204 
205 	if (dev != NULL && can_detach) {
206 		/* if device is not attached, we can remove it right away. */
207 		spdk_detach_rte(dev);
208 	}
209 }
210 #endif
211 
212 static void
213 cleanup_pci_devices(void)
214 {
215 	struct spdk_pci_device *dev, *tmp;
216 
217 	pthread_mutex_lock(&g_pci_mutex);
218 	/* cleanup removed devices */
219 	TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) {
220 		if (!dev->internal.removed) {
221 			continue;
222 		}
223 
224 		spdk_vtophys_pci_device_removed(dev->dev_handle);
225 		TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq);
226 		free(dev);
227 	}
228 
229 	/* add newly-attached devices */
230 	TAILQ_FOREACH_SAFE(dev, &g_pci_hotplugged_devices, internal.tailq, tmp) {
231 		TAILQ_REMOVE(&g_pci_hotplugged_devices, dev, internal.tailq);
232 		TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq);
233 		spdk_vtophys_pci_device_added(dev->dev_handle);
234 	}
235 	pthread_mutex_unlock(&g_pci_mutex);
236 }
237 
238 static void
239 _get_alarm_thread_cb(void *unused)
240 {
241 	g_dpdk_tid = pthread_self();
242 }
243 
244 void
245 spdk_pci_init(void)
246 {
247 #if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0)
248 	struct spdk_pci_driver *driver;
249 
250 	/* We need to pre-register pci drivers for the pci devices to be
251 	 * attachable in multi-process with DPDK 18.11+.
252 	 *
253 	 * DPDK 18.11+ does its best to ensure all devices are equally
254 	 * attached or detached in all processes within a shared memory group.
255 	 * For SPDK it means that if a device is hotplugged in the primary,
256 	 * then DPDK will automatically send an IPC hotplug request to all other
257 	 * processes. Those other processes may not have the same SPDK PCI
258 	 * driver registered and may fail to attach the device. DPDK will send
259 	 * back the failure status, and the the primary process will also fail
260 	 * to hotplug the device. To prevent that, we need to pre-register the
261 	 * pci drivers here.
262 	 */
263 	TAILQ_FOREACH(driver, &g_pci_drivers, tailq) {
264 		assert(!driver->is_registered);
265 		driver->is_registered = true;
266 		rte_pci_register(&driver->driver);
267 	}
268 #endif
269 
270 #if RTE_VERSION >= RTE_VERSION_NUM(18, 5, 0, 0)
271 	/* Register a single hotremove callback for all devices. */
272 	if (spdk_process_is_primary()) {
273 		rte_dev_event_callback_register(NULL, spdk_pci_device_rte_hotremove, NULL);
274 	}
275 #endif
276 
277 	rte_eal_alarm_set(1, _get_alarm_thread_cb, NULL);
278 	/* alarms are executed in order, so this one will be always executed
279 	 * before any real hotremove alarms and we don't need to wait for it.
280 	 */
281 }
282 
283 void
284 spdk_pci_fini(void)
285 {
286 	struct spdk_pci_device *dev;
287 	char bdf[32];
288 
289 	cleanup_pci_devices();
290 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
291 		if (dev->internal.attached) {
292 			spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->addr);
293 			fprintf(stderr, "Device %s is still attached at shutdown!\n", bdf);
294 		}
295 	}
296 
297 #if RTE_VERSION >= RTE_VERSION_NUM(18, 5, 0, 0)
298 	if (spdk_process_is_primary()) {
299 		rte_dev_event_callback_unregister(NULL, spdk_pci_device_rte_hotremove, NULL);
300 	}
301 #endif
302 }
303 
304 int
305 spdk_pci_device_init(struct rte_pci_driver *_drv,
306 		     struct rte_pci_device *_dev)
307 {
308 	struct spdk_pci_driver *driver = (struct spdk_pci_driver *)_drv;
309 	struct spdk_pci_device *dev;
310 	int rc;
311 
312 #if RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0)
313 	if (!driver->cb_fn) {
314 		/* Return a positive value to indicate that this device does
315 		 * not belong to this driver, but this isn't an error.
316 		 */
317 		return 1;
318 	}
319 #endif
320 
321 	dev = calloc(1, sizeof(*dev));
322 	if (dev == NULL) {
323 		return -1;
324 	}
325 
326 	dev->dev_handle = _dev;
327 
328 	dev->addr.domain = _dev->addr.domain;
329 	dev->addr.bus = _dev->addr.bus;
330 	dev->addr.dev = _dev->addr.devid;
331 	dev->addr.func = _dev->addr.function;
332 	dev->id.vendor_id = _dev->id.vendor_id;
333 	dev->id.device_id = _dev->id.device_id;
334 	dev->id.subvendor_id = _dev->id.subsystem_vendor_id;
335 	dev->id.subdevice_id = _dev->id.subsystem_device_id;
336 	dev->socket_id = _dev->device.numa_node;
337 	dev->type = "pci";
338 
339 	dev->map_bar = spdk_map_bar_rte;
340 	dev->unmap_bar = spdk_unmap_bar_rte;
341 	dev->cfg_read = spdk_cfg_read_rte;
342 	dev->cfg_write = spdk_cfg_write_rte;
343 	dev->detach = spdk_detach_rte;
344 
345 	dev->internal.driver = driver;
346 	dev->internal.claim_fd = -1;
347 
348 	if (driver->cb_fn != NULL) {
349 		rc = driver->cb_fn(driver->cb_arg, dev);
350 		if (rc != 0) {
351 			free(dev);
352 			return rc;
353 		}
354 		dev->internal.attached = true;
355 	}
356 
357 	pthread_mutex_lock(&g_pci_mutex);
358 	TAILQ_INSERT_TAIL(&g_pci_hotplugged_devices, dev, internal.tailq);
359 	pthread_mutex_unlock(&g_pci_mutex);
360 	return 0;
361 }
362 
363 int
364 spdk_pci_device_fini(struct rte_pci_device *_dev)
365 {
366 	struct spdk_pci_device *dev;
367 
368 	pthread_mutex_lock(&g_pci_mutex);
369 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
370 		if (dev->dev_handle == _dev) {
371 			break;
372 		}
373 	}
374 
375 	if (dev == NULL || dev->internal.attached) {
376 		/* The device might be still referenced somewhere in SPDK. */
377 		pthread_mutex_unlock(&g_pci_mutex);
378 		return -1;
379 	}
380 
381 	assert(!dev->internal.removed);
382 	dev->internal.removed = true;
383 	pthread_mutex_unlock(&g_pci_mutex);
384 	return 0;
385 
386 }
387 
388 void
389 spdk_pci_device_detach(struct spdk_pci_device *dev)
390 {
391 	assert(dev->internal.attached);
392 
393 	if (dev->internal.claim_fd >= 0) {
394 		spdk_pci_device_unclaim(dev);
395 	}
396 
397 	dev->internal.attached = false;
398 	dev->detach(dev);
399 
400 	cleanup_pci_devices();
401 }
402 
403 int
404 spdk_pci_device_attach(struct spdk_pci_driver *driver,
405 		       spdk_pci_enum_cb enum_cb,
406 		       void *enum_ctx, struct spdk_pci_addr *pci_address)
407 {
408 	struct spdk_pci_device *dev;
409 	int rc;
410 	char bdf[32];
411 
412 	spdk_pci_addr_fmt(bdf, sizeof(bdf), pci_address);
413 
414 	cleanup_pci_devices();
415 
416 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
417 		if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) {
418 			break;
419 		}
420 	}
421 
422 	if (dev != NULL && dev->internal.driver == driver) {
423 		pthread_mutex_lock(&g_pci_mutex);
424 		if (dev->internal.attached || dev->internal.pending_removal) {
425 			pthread_mutex_unlock(&g_pci_mutex);
426 			return -1;
427 		}
428 
429 		rc = enum_cb(enum_ctx, dev);
430 		if (rc == 0) {
431 			dev->internal.attached = true;
432 		}
433 		pthread_mutex_unlock(&g_pci_mutex);
434 		return rc;
435 	}
436 
437 	if (!driver->is_registered) {
438 		driver->is_registered = true;
439 		rte_pci_register(&driver->driver);
440 	}
441 
442 	driver->cb_fn = enum_cb;
443 	driver->cb_arg = enum_ctx;
444 
445 #if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0)
446 	int i = 0;
447 
448 	do {
449 		rc = rte_eal_hotplug_add("pci", bdf, "");
450 	} while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT);
451 
452 	if (i > 1 && rc == -EEXIST) {
453 		/* Even though the previous request timed out, the device
454 		 * was attached successfully.
455 		 */
456 		rc = 0;
457 	}
458 #else
459 	rc = rte_eal_dev_attach(bdf, "");
460 #endif
461 
462 	driver->cb_arg = NULL;
463 	driver->cb_fn = NULL;
464 
465 	cleanup_pci_devices();
466 	return rc == 0 ? 0 : -1;
467 }
468 
469 /* Note: You can call spdk_pci_enumerate from more than one thread
470  *       simultaneously safely, but you cannot call spdk_pci_enumerate
471  *       and rte_eal_pci_probe simultaneously.
472  */
473 int
474 spdk_pci_enumerate(struct spdk_pci_driver *driver,
475 		   spdk_pci_enum_cb enum_cb,
476 		   void *enum_ctx)
477 {
478 	struct spdk_pci_device *dev;
479 	int rc;
480 
481 	cleanup_pci_devices();
482 
483 	pthread_mutex_lock(&g_pci_mutex);
484 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
485 		if (dev->internal.attached ||
486 		    dev->internal.driver != driver ||
487 		    dev->internal.pending_removal) {
488 			continue;
489 		}
490 
491 		rc = enum_cb(enum_ctx, dev);
492 		if (rc == 0) {
493 			dev->internal.attached = true;
494 		} else if (rc < 0) {
495 			pthread_mutex_unlock(&g_pci_mutex);
496 			return -1;
497 		}
498 	}
499 	pthread_mutex_unlock(&g_pci_mutex);
500 
501 	if (!driver->is_registered) {
502 		driver->is_registered = true;
503 		rte_pci_register(&driver->driver);
504 	}
505 
506 	driver->cb_fn = enum_cb;
507 	driver->cb_arg = enum_ctx;
508 
509 	if (rte_bus_scan() != 0 || rte_bus_probe() != 0) {
510 		driver->cb_arg = NULL;
511 		driver->cb_fn = NULL;
512 		return -1;
513 	}
514 
515 	driver->cb_arg = NULL;
516 	driver->cb_fn = NULL;
517 
518 	cleanup_pci_devices();
519 	return 0;
520 }
521 
522 struct spdk_pci_device *
523 spdk_pci_get_first_device(void)
524 {
525 	return TAILQ_FIRST(&g_pci_devices);
526 }
527 
528 struct spdk_pci_device *
529 spdk_pci_get_next_device(struct spdk_pci_device *prev)
530 {
531 	return TAILQ_NEXT(prev, internal.tailq);
532 }
533 
534 int
535 spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar,
536 			void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
537 {
538 	return dev->map_bar(dev, bar, mapped_addr, phys_addr, size);
539 }
540 
541 int
542 spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr)
543 {
544 	return dev->unmap_bar(dev, bar, addr);
545 }
546 
547 uint32_t
548 spdk_pci_device_get_domain(struct spdk_pci_device *dev)
549 {
550 	return dev->addr.domain;
551 }
552 
553 uint8_t
554 spdk_pci_device_get_bus(struct spdk_pci_device *dev)
555 {
556 	return dev->addr.bus;
557 }
558 
559 uint8_t
560 spdk_pci_device_get_dev(struct spdk_pci_device *dev)
561 {
562 	return dev->addr.dev;
563 }
564 
565 uint8_t
566 spdk_pci_device_get_func(struct spdk_pci_device *dev)
567 {
568 	return dev->addr.func;
569 }
570 
571 uint16_t
572 spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev)
573 {
574 	return dev->id.vendor_id;
575 }
576 
577 uint16_t
578 spdk_pci_device_get_device_id(struct spdk_pci_device *dev)
579 {
580 	return dev->id.device_id;
581 }
582 
583 uint16_t
584 spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev)
585 {
586 	return dev->id.subvendor_id;
587 }
588 
589 uint16_t
590 spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev)
591 {
592 	return dev->id.subdevice_id;
593 }
594 
595 struct spdk_pci_id
596 spdk_pci_device_get_id(struct spdk_pci_device *dev)
597 {
598 	return dev->id;
599 }
600 
601 int
602 spdk_pci_device_get_socket_id(struct spdk_pci_device *dev)
603 {
604 	return dev->socket_id;
605 }
606 
607 int
608 spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
609 {
610 	return dev->cfg_read(dev, value, len, offset);
611 }
612 
613 int
614 spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
615 {
616 	return dev->cfg_write(dev, value, len, offset);
617 }
618 
619 int
620 spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset)
621 {
622 	return spdk_pci_device_cfg_read(dev, value, 1, offset);
623 }
624 
625 int
626 spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset)
627 {
628 	return spdk_pci_device_cfg_write(dev, &value, 1, offset);
629 }
630 
631 int
632 spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset)
633 {
634 	return spdk_pci_device_cfg_read(dev, value, 2, offset);
635 }
636 
637 int
638 spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset)
639 {
640 	return spdk_pci_device_cfg_write(dev, &value, 2, offset);
641 }
642 
643 int
644 spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset)
645 {
646 	return spdk_pci_device_cfg_read(dev, value, 4, offset);
647 }
648 
649 int
650 spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset)
651 {
652 	return spdk_pci_device_cfg_write(dev, &value, 4, offset);
653 }
654 
655 int
656 spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len)
657 {
658 	int err;
659 	uint32_t pos, header = 0;
660 	uint32_t i, buf[2];
661 
662 	if (len < 17) {
663 		return -1;
664 	}
665 
666 	err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE);
667 	if (err || !header) {
668 		return -1;
669 	}
670 
671 	pos = PCI_CFG_SIZE;
672 	while (1) {
673 		if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) {
674 			if (pos) {
675 				/* skip the header */
676 				pos += 4;
677 				for (i = 0; i < 2; i++) {
678 					err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i);
679 					if (err) {
680 						return -1;
681 					}
682 				}
683 				snprintf(sn, len, "%08x%08x", buf[1], buf[0]);
684 				return 0;
685 			}
686 		}
687 		pos = (header >> 20) & 0xffc;
688 		/* 0 if no other items exist */
689 		if (pos < PCI_CFG_SIZE) {
690 			return -1;
691 		}
692 		err = spdk_pci_device_cfg_read32(dev, &header, pos);
693 		if (err) {
694 			return -1;
695 		}
696 	}
697 	return -1;
698 }
699 
700 struct spdk_pci_addr
701 spdk_pci_device_get_addr(struct spdk_pci_device *dev)
702 {
703 	return dev->addr;
704 }
705 
706 bool
707 spdk_pci_device_is_removed(struct spdk_pci_device *dev)
708 {
709 	return dev->internal.pending_removal;
710 }
711 
712 int
713 spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2)
714 {
715 	if (a1->domain > a2->domain) {
716 		return 1;
717 	} else if (a1->domain < a2->domain) {
718 		return -1;
719 	} else if (a1->bus > a2->bus) {
720 		return 1;
721 	} else if (a1->bus < a2->bus) {
722 		return -1;
723 	} else if (a1->dev > a2->dev) {
724 		return 1;
725 	} else if (a1->dev < a2->dev) {
726 		return -1;
727 	} else if (a1->func > a2->func) {
728 		return 1;
729 	} else if (a1->func < a2->func) {
730 		return -1;
731 	}
732 
733 	return 0;
734 }
735 
736 #ifdef __linux__
737 int
738 spdk_pci_device_claim(struct spdk_pci_device *dev)
739 {
740 	int dev_fd;
741 	char dev_name[64];
742 	int pid;
743 	void *dev_map;
744 	struct flock pcidev_lock = {
745 		.l_type = F_WRLCK,
746 		.l_whence = SEEK_SET,
747 		.l_start = 0,
748 		.l_len = 0,
749 	};
750 
751 	snprintf(dev_name, sizeof(dev_name), "/tmp/spdk_pci_lock_%04x:%02x:%02x.%x",
752 		 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func);
753 
754 	dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
755 	if (dev_fd == -1) {
756 		fprintf(stderr, "could not open %s\n", dev_name);
757 		return -errno;
758 	}
759 
760 	if (ftruncate(dev_fd, sizeof(int)) != 0) {
761 		fprintf(stderr, "could not truncate %s\n", dev_name);
762 		close(dev_fd);
763 		return -errno;
764 	}
765 
766 	dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
767 		       MAP_SHARED, dev_fd, 0);
768 	if (dev_map == MAP_FAILED) {
769 		fprintf(stderr, "could not mmap dev %s (%d)\n", dev_name, errno);
770 		close(dev_fd);
771 		return -errno;
772 	}
773 
774 	if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) {
775 		pid = *(int *)dev_map;
776 		fprintf(stderr, "Cannot create lock on device %s, probably"
777 			" process %d has claimed it\n", dev_name, pid);
778 		munmap(dev_map, sizeof(int));
779 		close(dev_fd);
780 		/* F_SETLK returns unspecified errnos, normalize them */
781 		return -EACCES;
782 	}
783 
784 	*(int *)dev_map = (int)getpid();
785 	munmap(dev_map, sizeof(int));
786 	dev->internal.claim_fd = dev_fd;
787 	/* Keep dev_fd open to maintain the lock. */
788 	return 0;
789 }
790 
791 void
792 spdk_pci_device_unclaim(struct spdk_pci_device *dev)
793 {
794 	char dev_name[64];
795 
796 	snprintf(dev_name, sizeof(dev_name), "/tmp/spdk_pci_lock_%04x:%02x:%02x.%x",
797 		 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func);
798 
799 	close(dev->internal.claim_fd);
800 	dev->internal.claim_fd = -1;
801 	unlink(dev_name);
802 }
803 #endif /* __linux__ */
804 
805 #ifdef __FreeBSD__
806 int
807 spdk_pci_device_claim(struct spdk_pci_device *dev)
808 {
809 	/* TODO */
810 	return 0;
811 }
812 
813 void
814 spdk_pci_device_unclaim(struct spdk_pci_device *dev)
815 {
816 	/* TODO */
817 }
818 #endif /* __FreeBSD__ */
819 
820 int
821 spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf)
822 {
823 	unsigned domain, bus, dev, func;
824 
825 	if (addr == NULL || bdf == NULL) {
826 		return -EINVAL;
827 	}
828 
829 	if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) ||
830 	    (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) {
831 		/* Matched a full address - all variables are initialized */
832 	} else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) {
833 		func = 0;
834 	} else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) ||
835 		   (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) {
836 		domain = 0;
837 	} else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) ||
838 		   (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) {
839 		domain = 0;
840 		func = 0;
841 	} else {
842 		return -EINVAL;
843 	}
844 
845 	if (bus > 0xFF || dev > 0x1F || func > 7) {
846 		return -EINVAL;
847 	}
848 
849 	addr->domain = domain;
850 	addr->bus = bus;
851 	addr->dev = dev;
852 	addr->func = func;
853 
854 	return 0;
855 }
856 
857 int
858 spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr)
859 {
860 	int rc;
861 
862 	rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x",
863 		      addr->domain, addr->bus,
864 		      addr->dev, addr->func);
865 
866 	if (rc > 0 && (size_t)rc < sz) {
867 		return 0;
868 	}
869 
870 	return -1;
871 }
872 
873 void
874 spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev)
875 {
876 	assert(dev->map_bar != NULL);
877 	assert(dev->unmap_bar != NULL);
878 	assert(dev->cfg_read != NULL);
879 	assert(dev->cfg_write != NULL);
880 	assert(dev->detach != NULL);
881 	dev->internal.driver = drv;
882 	TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq);
883 }
884 
885 void
886 spdk_pci_unhook_device(struct spdk_pci_device *dev)
887 {
888 	assert(!dev->internal.attached);
889 	TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq);
890 }
891 
892 const char *
893 spdk_pci_device_get_type(const struct spdk_pci_device *dev)
894 {
895 	return dev->type;
896 }
897