xref: /spdk/lib/env_dpdk/pci.c (revision 94a84ae98590bea46939eb1dcd7a9876bd393b54)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "env_internal.h"
35 
36 #include <rte_alarm.h>
37 #include "spdk/env.h"
38 
39 #define SYSFS_PCI_DRIVERS	"/sys/bus/pci/drivers"
40 
41 #define PCI_CFG_SIZE		256
42 #define PCI_EXT_CAP_ID_SN	0x03
43 
44 /* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time
45  * might cause the internal IPC to misbehave. Just retry in such case.
46  */
47 #define DPDK_HOTPLUG_RETRY_COUNT 4
48 
49 /* DPDK alarm/interrupt thread */
50 static pthread_t g_dpdk_tid;
51 static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER;
52 static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices);
53 /* devices hotplugged on a dpdk thread */
54 static TAILQ_HEAD(, spdk_pci_device) g_pci_hotplugged_devices =
55 	TAILQ_HEAD_INITIALIZER(g_pci_hotplugged_devices);
56 static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers);
57 
58 static int
59 spdk_map_bar_rte(struct spdk_pci_device *device, uint32_t bar,
60 		 void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
61 {
62 	struct rte_pci_device *dev = device->dev_handle;
63 
64 	*mapped_addr = dev->mem_resource[bar].addr;
65 	*phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr;
66 	*size = (uint64_t)dev->mem_resource[bar].len;
67 
68 	return 0;
69 }
70 
71 static int
72 spdk_unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr)
73 {
74 	return 0;
75 }
76 
77 static int
78 spdk_cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
79 {
80 	int rc;
81 
82 	rc = rte_pci_read_config(dev->dev_handle, value, len, offset);
83 
84 #if defined(__FreeBSD__) && RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0)
85 	/* Older DPDKs return 0 on success and -1 on failure */
86 	return rc;
87 #endif
88 	return (rc > 0 && (uint32_t) rc == len) ? 0 : -1;
89 }
90 
91 static int
92 spdk_cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
93 {
94 	int rc;
95 
96 	rc = rte_pci_write_config(dev->dev_handle, value, len, offset);
97 
98 #ifdef __FreeBSD__
99 	/* DPDK returns 0 on success and -1 on failure */
100 	return rc;
101 #endif
102 	return (rc > 0 && (uint32_t) rc == len) ? 0 : -1;
103 }
104 
105 static void
106 spdk_detach_rte_cb(void *_dev)
107 {
108 	struct rte_pci_device *rte_dev = _dev;
109 
110 #if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0)
111 	char bdf[32];
112 	int i = 0, rc;
113 
114 	snprintf(bdf, sizeof(bdf), "%s", rte_dev->device.name);
115 	do {
116 		rc = rte_eal_hotplug_remove("pci", bdf);
117 	} while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT);
118 #else
119 	rte_eal_dev_detach(&rte_dev->device);
120 #endif
121 }
122 
123 static void
124 spdk_detach_rte(struct spdk_pci_device *dev)
125 {
126 	struct rte_pci_device *rte_dev = dev->dev_handle;
127 	int i;
128 	bool removed;
129 
130 	/* The device was already marked as available and could be attached
131 	 * again while we go asynchronous, so we explicitly forbid that.
132 	 */
133 	dev->internal.pending_removal = true;
134 	if (spdk_process_is_primary() && !pthread_equal(g_dpdk_tid, pthread_self())) {
135 		rte_eal_alarm_set(1, spdk_detach_rte_cb, rte_dev);
136 		/* wait up to 2s for the cb to finish executing */
137 		for (i = 2000; i > 0; i--) {
138 
139 			spdk_delay_us(1000);
140 			pthread_mutex_lock(&g_pci_mutex);
141 			removed = dev->internal.removed;
142 			pthread_mutex_unlock(&g_pci_mutex);
143 
144 			if (removed) {
145 				break;
146 			}
147 		}
148 
149 		/* besides checking the removed flag, we also need to wait
150 		 * for the dpdk detach function to unwind, as it's doing some
151 		 * operations even after calling our detach callback. Simply
152 		 * cancel the alarm - if it started executing already, this
153 		 * call will block and wait for it to finish.
154 		 */
155 		rte_eal_alarm_cancel(spdk_detach_rte_cb, rte_dev);
156 
157 		/* the device could have been finally removed, so just check
158 		 * it again.
159 		 */
160 		pthread_mutex_lock(&g_pci_mutex);
161 		removed = dev->internal.removed;
162 		pthread_mutex_unlock(&g_pci_mutex);
163 		if (!removed) {
164 			fprintf(stderr, "Timeout waiting for DPDK to remove PCI device %s.\n",
165 				rte_dev->name);
166 			/* If we reach this state, then the device couldn't be removed and most likely
167 			   a subsequent hot add of a device in the same BDF will fail */
168 		}
169 	} else {
170 		spdk_detach_rte_cb(rte_dev);
171 	}
172 }
173 
174 void
175 spdk_pci_driver_register(struct spdk_pci_driver *driver)
176 {
177 	TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq);
178 }
179 
180 #if RTE_VERSION >= RTE_VERSION_NUM(18, 5, 0, 0)
181 static void
182 spdk_pci_device_rte_hotremove_cb(void *dev)
183 {
184 	spdk_detach_rte((struct spdk_pci_device *)dev);
185 }
186 
187 static void
188 spdk_pci_device_rte_hotremove(const char *device_name,
189 			      enum rte_dev_event_type event,
190 			      void *cb_arg)
191 {
192 	struct spdk_pci_device *dev;
193 	bool can_detach = false;
194 
195 	if (event != RTE_DEV_EVENT_REMOVE) {
196 		return;
197 	}
198 
199 	pthread_mutex_lock(&g_pci_mutex);
200 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
201 		struct rte_pci_device *rte_dev = dev->dev_handle;
202 		if (strcmp(rte_dev->name, device_name) == 0 &&
203 		    !dev->internal.pending_removal) {
204 			can_detach = !dev->internal.attached;
205 			/* prevent any further attaches */
206 			dev->internal.pending_removal = true;
207 			break;
208 		}
209 	}
210 	pthread_mutex_unlock(&g_pci_mutex);
211 
212 	if (dev != NULL && can_detach) {
213 		/* If device is not attached, we can remove it right away.
214 		 *
215 		 * Because the user's callback is invoked in eal interrupt
216 		 * callback, the interrupt callback need to be finished before
217 		 * it can be unregistered when detaching device. So finish
218 		 * callback soon and use a deferred removal to detach device
219 		 * is need. It is a workaround, once the device detaching be
220 		 * moved into the eal in the future, the deferred removal could
221 		 * be deleted.
222 		 */
223 		rte_eal_alarm_set(1, spdk_pci_device_rte_hotremove_cb, dev);
224 	}
225 }
226 #endif
227 
228 static void
229 cleanup_pci_devices(void)
230 {
231 	struct spdk_pci_device *dev, *tmp;
232 
233 	pthread_mutex_lock(&g_pci_mutex);
234 	/* cleanup removed devices */
235 	TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) {
236 		if (!dev->internal.removed) {
237 			continue;
238 		}
239 
240 		spdk_vtophys_pci_device_removed(dev->dev_handle);
241 		TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq);
242 		free(dev);
243 	}
244 
245 	/* add newly-attached devices */
246 	TAILQ_FOREACH_SAFE(dev, &g_pci_hotplugged_devices, internal.tailq, tmp) {
247 		TAILQ_REMOVE(&g_pci_hotplugged_devices, dev, internal.tailq);
248 		TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq);
249 		spdk_vtophys_pci_device_added(dev->dev_handle);
250 	}
251 	pthread_mutex_unlock(&g_pci_mutex);
252 }
253 
254 static void
255 _get_alarm_thread_cb(void *unused)
256 {
257 	g_dpdk_tid = pthread_self();
258 }
259 
260 void
261 spdk_pci_init(void)
262 {
263 #if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0)
264 	struct spdk_pci_driver *driver;
265 
266 	/* We need to pre-register pci drivers for the pci devices to be
267 	 * attachable in multi-process with DPDK 18.11+.
268 	 *
269 	 * DPDK 18.11+ does its best to ensure all devices are equally
270 	 * attached or detached in all processes within a shared memory group.
271 	 * For SPDK it means that if a device is hotplugged in the primary,
272 	 * then DPDK will automatically send an IPC hotplug request to all other
273 	 * processes. Those other processes may not have the same SPDK PCI
274 	 * driver registered and may fail to attach the device. DPDK will send
275 	 * back the failure status, and the the primary process will also fail
276 	 * to hotplug the device. To prevent that, we need to pre-register the
277 	 * pci drivers here.
278 	 */
279 	TAILQ_FOREACH(driver, &g_pci_drivers, tailq) {
280 		assert(!driver->is_registered);
281 		driver->is_registered = true;
282 		rte_pci_register(&driver->driver);
283 	}
284 #endif
285 
286 #if RTE_VERSION >= RTE_VERSION_NUM(18, 5, 0, 0)
287 	/* Register a single hotremove callback for all devices. */
288 	if (spdk_process_is_primary()) {
289 		rte_dev_event_callback_register(NULL, spdk_pci_device_rte_hotremove, NULL);
290 	}
291 #endif
292 
293 	rte_eal_alarm_set(1, _get_alarm_thread_cb, NULL);
294 	/* alarms are executed in order, so this one will be always executed
295 	 * before any real hotremove alarms and we don't need to wait for it.
296 	 */
297 }
298 
299 void
300 spdk_pci_fini(void)
301 {
302 	struct spdk_pci_device *dev;
303 	char bdf[32];
304 
305 	cleanup_pci_devices();
306 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
307 		if (dev->internal.attached) {
308 			spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->addr);
309 			fprintf(stderr, "Device %s is still attached at shutdown!\n", bdf);
310 		}
311 	}
312 
313 #if RTE_VERSION >= RTE_VERSION_NUM(18, 5, 0, 0)
314 	if (spdk_process_is_primary()) {
315 		rte_dev_event_callback_unregister(NULL, spdk_pci_device_rte_hotremove, NULL);
316 	}
317 #endif
318 }
319 
320 int
321 spdk_pci_device_init(struct rte_pci_driver *_drv,
322 		     struct rte_pci_device *_dev)
323 {
324 	struct spdk_pci_driver *driver = (struct spdk_pci_driver *)_drv;
325 	struct spdk_pci_device *dev;
326 	int rc;
327 
328 #if RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0)
329 	if (!driver->cb_fn) {
330 		/* Return a positive value to indicate that this device does
331 		 * not belong to this driver, but this isn't an error.
332 		 */
333 		return 1;
334 	}
335 #endif
336 
337 	dev = calloc(1, sizeof(*dev));
338 	if (dev == NULL) {
339 		return -1;
340 	}
341 
342 	dev->dev_handle = _dev;
343 
344 	dev->addr.domain = _dev->addr.domain;
345 	dev->addr.bus = _dev->addr.bus;
346 	dev->addr.dev = _dev->addr.devid;
347 	dev->addr.func = _dev->addr.function;
348 	dev->id.vendor_id = _dev->id.vendor_id;
349 	dev->id.device_id = _dev->id.device_id;
350 	dev->id.subvendor_id = _dev->id.subsystem_vendor_id;
351 	dev->id.subdevice_id = _dev->id.subsystem_device_id;
352 	dev->socket_id = _dev->device.numa_node;
353 	dev->type = "pci";
354 
355 	dev->map_bar = spdk_map_bar_rte;
356 	dev->unmap_bar = spdk_unmap_bar_rte;
357 	dev->cfg_read = spdk_cfg_read_rte;
358 	dev->cfg_write = spdk_cfg_write_rte;
359 	dev->detach = spdk_detach_rte;
360 
361 	dev->internal.driver = driver;
362 	dev->internal.claim_fd = -1;
363 
364 	if (driver->cb_fn != NULL) {
365 		rc = driver->cb_fn(driver->cb_arg, dev);
366 		if (rc != 0) {
367 			free(dev);
368 			return rc;
369 		}
370 		dev->internal.attached = true;
371 	}
372 
373 	pthread_mutex_lock(&g_pci_mutex);
374 	TAILQ_INSERT_TAIL(&g_pci_hotplugged_devices, dev, internal.tailq);
375 	pthread_mutex_unlock(&g_pci_mutex);
376 	return 0;
377 }
378 
379 int
380 spdk_pci_device_fini(struct rte_pci_device *_dev)
381 {
382 	struct spdk_pci_device *dev;
383 
384 	pthread_mutex_lock(&g_pci_mutex);
385 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
386 		if (dev->dev_handle == _dev) {
387 			break;
388 		}
389 	}
390 
391 	if (dev == NULL || dev->internal.attached) {
392 		/* The device might be still referenced somewhere in SPDK. */
393 		pthread_mutex_unlock(&g_pci_mutex);
394 		return -1;
395 	}
396 
397 	assert(!dev->internal.removed);
398 	dev->internal.removed = true;
399 	pthread_mutex_unlock(&g_pci_mutex);
400 	return 0;
401 
402 }
403 
404 void
405 spdk_pci_device_detach(struct spdk_pci_device *dev)
406 {
407 	assert(dev->internal.attached);
408 
409 	if (dev->internal.claim_fd >= 0) {
410 		spdk_pci_device_unclaim(dev);
411 	}
412 
413 	dev->internal.attached = false;
414 	dev->detach(dev);
415 
416 	cleanup_pci_devices();
417 }
418 
419 int
420 spdk_pci_device_attach(struct spdk_pci_driver *driver,
421 		       spdk_pci_enum_cb enum_cb,
422 		       void *enum_ctx, struct spdk_pci_addr *pci_address)
423 {
424 	struct spdk_pci_device *dev;
425 	int rc;
426 	char bdf[32];
427 
428 	spdk_pci_addr_fmt(bdf, sizeof(bdf), pci_address);
429 
430 	cleanup_pci_devices();
431 
432 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
433 		if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) {
434 			break;
435 		}
436 	}
437 
438 	if (dev != NULL && dev->internal.driver == driver) {
439 		pthread_mutex_lock(&g_pci_mutex);
440 		if (dev->internal.attached || dev->internal.pending_removal) {
441 			pthread_mutex_unlock(&g_pci_mutex);
442 			return -1;
443 		}
444 
445 		rc = enum_cb(enum_ctx, dev);
446 		if (rc == 0) {
447 			dev->internal.attached = true;
448 		}
449 		pthread_mutex_unlock(&g_pci_mutex);
450 		return rc;
451 	}
452 
453 	if (!driver->is_registered) {
454 		driver->is_registered = true;
455 		rte_pci_register(&driver->driver);
456 	}
457 
458 	driver->cb_fn = enum_cb;
459 	driver->cb_arg = enum_ctx;
460 
461 #if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0)
462 	int i = 0;
463 
464 	do {
465 		rc = rte_eal_hotplug_add("pci", bdf, "");
466 	} while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT);
467 
468 	if (i > 1 && rc == -EEXIST) {
469 		/* Even though the previous request timed out, the device
470 		 * was attached successfully.
471 		 */
472 		rc = 0;
473 	}
474 #else
475 	rc = rte_eal_dev_attach(bdf, "");
476 #endif
477 
478 	driver->cb_arg = NULL;
479 	driver->cb_fn = NULL;
480 
481 	cleanup_pci_devices();
482 	return rc == 0 ? 0 : -1;
483 }
484 
485 /* Note: You can call spdk_pci_enumerate from more than one thread
486  *       simultaneously safely, but you cannot call spdk_pci_enumerate
487  *       and rte_eal_pci_probe simultaneously.
488  */
489 int
490 spdk_pci_enumerate(struct spdk_pci_driver *driver,
491 		   spdk_pci_enum_cb enum_cb,
492 		   void *enum_ctx)
493 {
494 	struct spdk_pci_device *dev;
495 	int rc;
496 
497 	cleanup_pci_devices();
498 
499 	pthread_mutex_lock(&g_pci_mutex);
500 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
501 		if (dev->internal.attached ||
502 		    dev->internal.driver != driver ||
503 		    dev->internal.pending_removal) {
504 			continue;
505 		}
506 
507 		rc = enum_cb(enum_ctx, dev);
508 		if (rc == 0) {
509 			dev->internal.attached = true;
510 		} else if (rc < 0) {
511 			pthread_mutex_unlock(&g_pci_mutex);
512 			return -1;
513 		}
514 	}
515 	pthread_mutex_unlock(&g_pci_mutex);
516 
517 	if (!driver->is_registered) {
518 		driver->is_registered = true;
519 		rte_pci_register(&driver->driver);
520 	}
521 
522 	driver->cb_fn = enum_cb;
523 	driver->cb_arg = enum_ctx;
524 
525 	if (rte_bus_scan() != 0 || rte_bus_probe() != 0) {
526 		driver->cb_arg = NULL;
527 		driver->cb_fn = NULL;
528 		return -1;
529 	}
530 
531 	driver->cb_arg = NULL;
532 	driver->cb_fn = NULL;
533 
534 	cleanup_pci_devices();
535 	return 0;
536 }
537 
538 struct spdk_pci_device *
539 spdk_pci_get_first_device(void)
540 {
541 	return TAILQ_FIRST(&g_pci_devices);
542 }
543 
544 struct spdk_pci_device *
545 spdk_pci_get_next_device(struct spdk_pci_device *prev)
546 {
547 	return TAILQ_NEXT(prev, internal.tailq);
548 }
549 
550 int
551 spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar,
552 			void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
553 {
554 	return dev->map_bar(dev, bar, mapped_addr, phys_addr, size);
555 }
556 
557 int
558 spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr)
559 {
560 	return dev->unmap_bar(dev, bar, addr);
561 }
562 
563 uint32_t
564 spdk_pci_device_get_domain(struct spdk_pci_device *dev)
565 {
566 	return dev->addr.domain;
567 }
568 
569 uint8_t
570 spdk_pci_device_get_bus(struct spdk_pci_device *dev)
571 {
572 	return dev->addr.bus;
573 }
574 
575 uint8_t
576 spdk_pci_device_get_dev(struct spdk_pci_device *dev)
577 {
578 	return dev->addr.dev;
579 }
580 
581 uint8_t
582 spdk_pci_device_get_func(struct spdk_pci_device *dev)
583 {
584 	return dev->addr.func;
585 }
586 
587 uint16_t
588 spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev)
589 {
590 	return dev->id.vendor_id;
591 }
592 
593 uint16_t
594 spdk_pci_device_get_device_id(struct spdk_pci_device *dev)
595 {
596 	return dev->id.device_id;
597 }
598 
599 uint16_t
600 spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev)
601 {
602 	return dev->id.subvendor_id;
603 }
604 
605 uint16_t
606 spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev)
607 {
608 	return dev->id.subdevice_id;
609 }
610 
611 struct spdk_pci_id
612 spdk_pci_device_get_id(struct spdk_pci_device *dev)
613 {
614 	return dev->id;
615 }
616 
617 int
618 spdk_pci_device_get_socket_id(struct spdk_pci_device *dev)
619 {
620 	return dev->socket_id;
621 }
622 
623 int
624 spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
625 {
626 	return dev->cfg_read(dev, value, len, offset);
627 }
628 
629 int
630 spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
631 {
632 	return dev->cfg_write(dev, value, len, offset);
633 }
634 
635 int
636 spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset)
637 {
638 	return spdk_pci_device_cfg_read(dev, value, 1, offset);
639 }
640 
641 int
642 spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset)
643 {
644 	return spdk_pci_device_cfg_write(dev, &value, 1, offset);
645 }
646 
647 int
648 spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset)
649 {
650 	return spdk_pci_device_cfg_read(dev, value, 2, offset);
651 }
652 
653 int
654 spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset)
655 {
656 	return spdk_pci_device_cfg_write(dev, &value, 2, offset);
657 }
658 
659 int
660 spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset)
661 {
662 	return spdk_pci_device_cfg_read(dev, value, 4, offset);
663 }
664 
665 int
666 spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset)
667 {
668 	return spdk_pci_device_cfg_write(dev, &value, 4, offset);
669 }
670 
671 int
672 spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len)
673 {
674 	int err;
675 	uint32_t pos, header = 0;
676 	uint32_t i, buf[2];
677 
678 	if (len < 17) {
679 		return -1;
680 	}
681 
682 	err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE);
683 	if (err || !header) {
684 		return -1;
685 	}
686 
687 	pos = PCI_CFG_SIZE;
688 	while (1) {
689 		if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) {
690 			if (pos) {
691 				/* skip the header */
692 				pos += 4;
693 				for (i = 0; i < 2; i++) {
694 					err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i);
695 					if (err) {
696 						return -1;
697 					}
698 				}
699 				snprintf(sn, len, "%08x%08x", buf[1], buf[0]);
700 				return 0;
701 			}
702 		}
703 		pos = (header >> 20) & 0xffc;
704 		/* 0 if no other items exist */
705 		if (pos < PCI_CFG_SIZE) {
706 			return -1;
707 		}
708 		err = spdk_pci_device_cfg_read32(dev, &header, pos);
709 		if (err) {
710 			return -1;
711 		}
712 	}
713 	return -1;
714 }
715 
716 struct spdk_pci_addr
717 spdk_pci_device_get_addr(struct spdk_pci_device *dev)
718 {
719 	return dev->addr;
720 }
721 
722 bool
723 spdk_pci_device_is_removed(struct spdk_pci_device *dev)
724 {
725 	return dev->internal.pending_removal;
726 }
727 
728 int
729 spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2)
730 {
731 	if (a1->domain > a2->domain) {
732 		return 1;
733 	} else if (a1->domain < a2->domain) {
734 		return -1;
735 	} else if (a1->bus > a2->bus) {
736 		return 1;
737 	} else if (a1->bus < a2->bus) {
738 		return -1;
739 	} else if (a1->dev > a2->dev) {
740 		return 1;
741 	} else if (a1->dev < a2->dev) {
742 		return -1;
743 	} else if (a1->func > a2->func) {
744 		return 1;
745 	} else if (a1->func < a2->func) {
746 		return -1;
747 	}
748 
749 	return 0;
750 }
751 
752 #ifdef __linux__
753 int
754 spdk_pci_device_claim(struct spdk_pci_device *dev)
755 {
756 	int dev_fd;
757 	char dev_name[64];
758 	int pid;
759 	void *dev_map;
760 	struct flock pcidev_lock = {
761 		.l_type = F_WRLCK,
762 		.l_whence = SEEK_SET,
763 		.l_start = 0,
764 		.l_len = 0,
765 	};
766 
767 	snprintf(dev_name, sizeof(dev_name), "/tmp/spdk_pci_lock_%04x:%02x:%02x.%x",
768 		 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func);
769 
770 	dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
771 	if (dev_fd == -1) {
772 		fprintf(stderr, "could not open %s\n", dev_name);
773 		return -errno;
774 	}
775 
776 	if (ftruncate(dev_fd, sizeof(int)) != 0) {
777 		fprintf(stderr, "could not truncate %s\n", dev_name);
778 		close(dev_fd);
779 		return -errno;
780 	}
781 
782 	dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
783 		       MAP_SHARED, dev_fd, 0);
784 	if (dev_map == MAP_FAILED) {
785 		fprintf(stderr, "could not mmap dev %s (%d)\n", dev_name, errno);
786 		close(dev_fd);
787 		return -errno;
788 	}
789 
790 	if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) {
791 		pid = *(int *)dev_map;
792 		fprintf(stderr, "Cannot create lock on device %s, probably"
793 			" process %d has claimed it\n", dev_name, pid);
794 		munmap(dev_map, sizeof(int));
795 		close(dev_fd);
796 		/* F_SETLK returns unspecified errnos, normalize them */
797 		return -EACCES;
798 	}
799 
800 	*(int *)dev_map = (int)getpid();
801 	munmap(dev_map, sizeof(int));
802 	dev->internal.claim_fd = dev_fd;
803 	/* Keep dev_fd open to maintain the lock. */
804 	return 0;
805 }
806 
807 void
808 spdk_pci_device_unclaim(struct spdk_pci_device *dev)
809 {
810 	char dev_name[64];
811 
812 	snprintf(dev_name, sizeof(dev_name), "/tmp/spdk_pci_lock_%04x:%02x:%02x.%x",
813 		 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func);
814 
815 	close(dev->internal.claim_fd);
816 	dev->internal.claim_fd = -1;
817 	unlink(dev_name);
818 }
819 #endif /* __linux__ */
820 
821 #ifdef __FreeBSD__
822 int
823 spdk_pci_device_claim(struct spdk_pci_device *dev)
824 {
825 	/* TODO */
826 	return 0;
827 }
828 
829 void
830 spdk_pci_device_unclaim(struct spdk_pci_device *dev)
831 {
832 	/* TODO */
833 }
834 #endif /* __FreeBSD__ */
835 
836 int
837 spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf)
838 {
839 	unsigned domain, bus, dev, func;
840 
841 	if (addr == NULL || bdf == NULL) {
842 		return -EINVAL;
843 	}
844 
845 	if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) ||
846 	    (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) {
847 		/* Matched a full address - all variables are initialized */
848 	} else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) {
849 		func = 0;
850 	} else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) ||
851 		   (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) {
852 		domain = 0;
853 	} else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) ||
854 		   (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) {
855 		domain = 0;
856 		func = 0;
857 	} else {
858 		return -EINVAL;
859 	}
860 
861 	if (bus > 0xFF || dev > 0x1F || func > 7) {
862 		return -EINVAL;
863 	}
864 
865 	addr->domain = domain;
866 	addr->bus = bus;
867 	addr->dev = dev;
868 	addr->func = func;
869 
870 	return 0;
871 }
872 
873 int
874 spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr)
875 {
876 	int rc;
877 
878 	rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x",
879 		      addr->domain, addr->bus,
880 		      addr->dev, addr->func);
881 
882 	if (rc > 0 && (size_t)rc < sz) {
883 		return 0;
884 	}
885 
886 	return -1;
887 }
888 
889 void
890 spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev)
891 {
892 	assert(dev->map_bar != NULL);
893 	assert(dev->unmap_bar != NULL);
894 	assert(dev->cfg_read != NULL);
895 	assert(dev->cfg_write != NULL);
896 	assert(dev->detach != NULL);
897 	dev->internal.driver = drv;
898 	TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq);
899 }
900 
901 void
902 spdk_pci_unhook_device(struct spdk_pci_device *dev)
903 {
904 	assert(!dev->internal.attached);
905 	TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq);
906 }
907 
908 const char *
909 spdk_pci_device_get_type(const struct spdk_pci_device *dev)
910 {
911 	return dev->type;
912 }
913