xref: /spdk/lib/env_dpdk/pci.c (revision 712a3f69d32632bf6c862f00200f7f437d3f7529)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "env_internal.h"
35 
36 #include <rte_alarm.h>
37 #include "spdk/env.h"
38 
39 #define SYSFS_PCI_DRIVERS	"/sys/bus/pci/drivers"
40 
41 #define PCI_CFG_SIZE		256
42 #define PCI_EXT_CAP_ID_SN	0x03
43 
44 /* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time
45  * might cause the internal IPC to misbehave. Just retry in such case.
46  */
47 #define DPDK_HOTPLUG_RETRY_COUNT 4
48 
49 /* DPDK alarm/interrupt thread */
50 static pthread_t g_dpdk_tid;
51 static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER;
52 static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices);
53 /* devices hotplugged on a dpdk thread */
54 static TAILQ_HEAD(, spdk_pci_device) g_pci_hotplugged_devices =
55 	TAILQ_HEAD_INITIALIZER(g_pci_hotplugged_devices);
56 static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers);
57 
58 static int
59 spdk_map_bar_rte(struct spdk_pci_device *device, uint32_t bar,
60 		 void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
61 {
62 	struct rte_pci_device *dev = device->dev_handle;
63 
64 	*mapped_addr = dev->mem_resource[bar].addr;
65 	if (*mapped_addr == NULL) {
66 		return -1;
67 	}
68 
69 	*phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr;
70 	if (*phys_addr == 0) {
71 		return -1;
72 	}
73 
74 	*size = (uint64_t)dev->mem_resource[bar].len;
75 
76 	return 0;
77 }
78 
79 static int
80 spdk_unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr)
81 {
82 	return 0;
83 }
84 
85 static int
86 spdk_cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
87 {
88 	int rc;
89 
90 	rc = rte_pci_read_config(dev->dev_handle, value, len, offset);
91 
92 #if defined(__FreeBSD__) && RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0)
93 	/* Older DPDKs return 0 on success and -1 on failure */
94 	return rc;
95 #endif
96 	return (rc > 0 && (uint32_t) rc == len) ? 0 : -1;
97 }
98 
99 static int
100 spdk_cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
101 {
102 	int rc;
103 
104 	rc = rte_pci_write_config(dev->dev_handle, value, len, offset);
105 
106 #ifdef __FreeBSD__
107 	/* DPDK returns 0 on success and -1 on failure */
108 	return rc;
109 #endif
110 	return (rc > 0 && (uint32_t) rc == len) ? 0 : -1;
111 }
112 
113 static void
114 spdk_detach_rte_cb(void *_dev)
115 {
116 	struct rte_pci_device *rte_dev = _dev;
117 
118 #if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0)
119 	char bdf[32];
120 	int i = 0, rc;
121 
122 	snprintf(bdf, sizeof(bdf), "%s", rte_dev->device.name);
123 	do {
124 		rc = rte_eal_hotplug_remove("pci", bdf);
125 	} while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT);
126 #else
127 	rte_eal_dev_detach(&rte_dev->device);
128 #endif
129 }
130 
131 static void
132 spdk_detach_rte(struct spdk_pci_device *dev)
133 {
134 	struct rte_pci_device *rte_dev = dev->dev_handle;
135 	int i;
136 	bool removed;
137 
138 	/* The device was already marked as available and could be attached
139 	 * again while we go asynchronous, so we explicitly forbid that.
140 	 */
141 	dev->internal.pending_removal = true;
142 	if (spdk_process_is_primary() && !pthread_equal(g_dpdk_tid, pthread_self())) {
143 		rte_eal_alarm_set(1, spdk_detach_rte_cb, rte_dev);
144 		/* wait up to 20ms for the cb to start executing */
145 		for (i = 20; i > 0; i--) {
146 
147 			spdk_delay_us(1000);
148 			pthread_mutex_lock(&g_pci_mutex);
149 			removed = dev->internal.removed;
150 			pthread_mutex_unlock(&g_pci_mutex);
151 
152 			if (removed) {
153 				break;
154 			}
155 		}
156 
157 		/* besides checking the removed flag, we also need to wait
158 		 * for the dpdk detach function to unwind, as it's doing some
159 		 * operations even after calling our detach callback. Simply
160 		 * cancell the alarm - if it started executing already, this
161 		 * call will block and wait for it to finish.
162 		 */
163 		rte_eal_alarm_cancel(spdk_detach_rte_cb, rte_dev);
164 
165 		/* the device could have been finally removed, so just check
166 		 * it again.
167 		 */
168 		pthread_mutex_lock(&g_pci_mutex);
169 		removed = dev->internal.removed;
170 		pthread_mutex_unlock(&g_pci_mutex);
171 		if (!removed) {
172 			fprintf(stderr, "Timeout waiting for DPDK to remove PCI device %s.\n",
173 				rte_dev->name);
174 		}
175 	} else {
176 		spdk_detach_rte_cb(rte_dev);
177 	}
178 }
179 
180 void
181 spdk_pci_driver_register(struct spdk_pci_driver *driver)
182 {
183 	TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq);
184 }
185 
186 #if RTE_VERSION >= RTE_VERSION_NUM(18, 5, 0, 0)
187 static void
188 spdk_pci_device_rte_hotremove_cb(void *dev)
189 {
190 	spdk_detach_rte((struct spdk_pci_device *)dev);
191 }
192 
193 static void
194 spdk_pci_device_rte_hotremove(const char *device_name,
195 			      enum rte_dev_event_type event,
196 			      void *cb_arg)
197 {
198 	struct spdk_pci_device *dev;
199 	bool can_detach = false;
200 
201 	if (event != RTE_DEV_EVENT_REMOVE) {
202 		return;
203 	}
204 
205 	pthread_mutex_lock(&g_pci_mutex);
206 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
207 		struct rte_pci_device *rte_dev = dev->dev_handle;
208 		if (strcmp(rte_dev->name, device_name) == 0 &&
209 		    !dev->internal.pending_removal) {
210 			can_detach = !dev->internal.attached;
211 			/* prevent any further attaches */
212 			dev->internal.pending_removal = true;
213 			break;
214 		}
215 	}
216 	pthread_mutex_unlock(&g_pci_mutex);
217 
218 	if (dev != NULL && can_detach) {
219 		/* If device is not attached, we can remove it right away.
220 		 *
221 		 * Because the user's callback is invoked in eal interrupt
222 		 * callback, the interrupt callback need to be finished before
223 		 * it can be unregistered when detaching device. So finish
224 		 * callback soon and use a deferred removal to detach device
225 		 * is need. It is a workaround, once the device detaching be
226 		 * moved into the eal in the future, the deferred removal could
227 		 * be deleted.
228 		 */
229 		rte_eal_alarm_set(1, spdk_pci_device_rte_hotremove_cb, dev);
230 	}
231 }
232 #endif
233 
234 static void
235 cleanup_pci_devices(void)
236 {
237 	struct spdk_pci_device *dev, *tmp;
238 
239 	pthread_mutex_lock(&g_pci_mutex);
240 	/* cleanup removed devices */
241 	TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) {
242 		if (!dev->internal.removed) {
243 			continue;
244 		}
245 
246 		spdk_vtophys_pci_device_removed(dev->dev_handle);
247 		TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq);
248 		free(dev);
249 	}
250 
251 	/* add newly-attached devices */
252 	TAILQ_FOREACH_SAFE(dev, &g_pci_hotplugged_devices, internal.tailq, tmp) {
253 		TAILQ_REMOVE(&g_pci_hotplugged_devices, dev, internal.tailq);
254 		TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq);
255 		spdk_vtophys_pci_device_added(dev->dev_handle);
256 	}
257 	pthread_mutex_unlock(&g_pci_mutex);
258 }
259 
260 static void
261 _get_alarm_thread_cb(void *unused)
262 {
263 	g_dpdk_tid = pthread_self();
264 }
265 
266 void
267 spdk_pci_init(void)
268 {
269 #if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0)
270 	struct spdk_pci_driver *driver;
271 
272 	/* We need to pre-register pci drivers for the pci devices to be
273 	 * attachable in multi-process with DPDK 18.11+.
274 	 *
275 	 * DPDK 18.11+ does its best to ensure all devices are equally
276 	 * attached or detached in all processes within a shared memory group.
277 	 * For SPDK it means that if a device is hotplugged in the primary,
278 	 * then DPDK will automatically send an IPC hotplug request to all other
279 	 * processes. Those other processes may not have the same SPDK PCI
280 	 * driver registered and may fail to attach the device. DPDK will send
281 	 * back the failure status, and the the primary process will also fail
282 	 * to hotplug the device. To prevent that, we need to pre-register the
283 	 * pci drivers here.
284 	 */
285 	TAILQ_FOREACH(driver, &g_pci_drivers, tailq) {
286 		assert(!driver->is_registered);
287 		driver->is_registered = true;
288 		rte_pci_register(&driver->driver);
289 	}
290 #endif
291 
292 #if RTE_VERSION >= RTE_VERSION_NUM(18, 5, 0, 0)
293 	/* Register a single hotremove callback for all devices. */
294 	if (spdk_process_is_primary()) {
295 		rte_dev_event_callback_register(NULL, spdk_pci_device_rte_hotremove, NULL);
296 	}
297 #endif
298 
299 	rte_eal_alarm_set(1, _get_alarm_thread_cb, NULL);
300 	/* alarms are executed in order, so this one will be always executed
301 	 * before any real hotremove alarms and we don't need to wait for it.
302 	 */
303 }
304 
305 void
306 spdk_pci_fini(void)
307 {
308 	struct spdk_pci_device *dev;
309 	char bdf[32];
310 
311 	cleanup_pci_devices();
312 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
313 		if (dev->internal.attached) {
314 			spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->addr);
315 			fprintf(stderr, "Device %s is still attached at shutdown!\n", bdf);
316 		}
317 	}
318 
319 #if RTE_VERSION >= RTE_VERSION_NUM(18, 5, 0, 0)
320 	if (spdk_process_is_primary()) {
321 		rte_dev_event_callback_unregister(NULL, spdk_pci_device_rte_hotremove, NULL);
322 	}
323 #endif
324 }
325 
326 int
327 spdk_pci_device_init(struct rte_pci_driver *_drv,
328 		     struct rte_pci_device *_dev)
329 {
330 	struct spdk_pci_driver *driver = (struct spdk_pci_driver *)_drv;
331 	struct spdk_pci_device *dev;
332 	int rc;
333 
334 #if RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0)
335 	if (!driver->cb_fn) {
336 		/* Return a positive value to indicate that this device does
337 		 * not belong to this driver, but this isn't an error.
338 		 */
339 		return 1;
340 	}
341 #endif
342 
343 	dev = calloc(1, sizeof(*dev));
344 	if (dev == NULL) {
345 		return -1;
346 	}
347 
348 	dev->dev_handle = _dev;
349 
350 	dev->addr.domain = _dev->addr.domain;
351 	dev->addr.bus = _dev->addr.bus;
352 	dev->addr.dev = _dev->addr.devid;
353 	dev->addr.func = _dev->addr.function;
354 	dev->id.vendor_id = _dev->id.vendor_id;
355 	dev->id.device_id = _dev->id.device_id;
356 	dev->id.subvendor_id = _dev->id.subsystem_vendor_id;
357 	dev->id.subdevice_id = _dev->id.subsystem_device_id;
358 	dev->socket_id = _dev->device.numa_node;
359 	dev->type = "pci";
360 
361 	dev->map_bar = spdk_map_bar_rte;
362 	dev->unmap_bar = spdk_unmap_bar_rte;
363 	dev->cfg_read = spdk_cfg_read_rte;
364 	dev->cfg_write = spdk_cfg_write_rte;
365 	dev->detach = spdk_detach_rte;
366 
367 	dev->internal.driver = driver;
368 	dev->internal.claim_fd = -1;
369 
370 	if (driver->cb_fn != NULL) {
371 		rc = driver->cb_fn(driver->cb_arg, dev);
372 		if (rc != 0) {
373 			free(dev);
374 			return rc;
375 		}
376 		dev->internal.attached = true;
377 	}
378 
379 	pthread_mutex_lock(&g_pci_mutex);
380 	TAILQ_INSERT_TAIL(&g_pci_hotplugged_devices, dev, internal.tailq);
381 	pthread_mutex_unlock(&g_pci_mutex);
382 	return 0;
383 }
384 
385 int
386 spdk_pci_device_fini(struct rte_pci_device *_dev)
387 {
388 	struct spdk_pci_device *dev;
389 
390 	pthread_mutex_lock(&g_pci_mutex);
391 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
392 		if (dev->dev_handle == _dev) {
393 			break;
394 		}
395 	}
396 
397 	if (dev == NULL || dev->internal.attached) {
398 		/* The device might be still referenced somewhere in SPDK. */
399 		pthread_mutex_unlock(&g_pci_mutex);
400 		return -1;
401 	}
402 
403 	assert(!dev->internal.removed);
404 	dev->internal.removed = true;
405 	pthread_mutex_unlock(&g_pci_mutex);
406 	return 0;
407 
408 }
409 
410 void
411 spdk_pci_device_detach(struct spdk_pci_device *dev)
412 {
413 	assert(dev->internal.attached);
414 
415 	if (dev->internal.claim_fd >= 0) {
416 		spdk_pci_device_unclaim(dev);
417 	}
418 
419 	dev->internal.attached = false;
420 	dev->detach(dev);
421 
422 	cleanup_pci_devices();
423 }
424 
425 int
426 spdk_pci_device_attach(struct spdk_pci_driver *driver,
427 		       spdk_pci_enum_cb enum_cb,
428 		       void *enum_ctx, struct spdk_pci_addr *pci_address)
429 {
430 	struct spdk_pci_device *dev;
431 	int rc;
432 	char bdf[32];
433 
434 	spdk_pci_addr_fmt(bdf, sizeof(bdf), pci_address);
435 
436 	cleanup_pci_devices();
437 
438 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
439 		if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) {
440 			break;
441 		}
442 	}
443 
444 	if (dev != NULL && dev->internal.driver == driver) {
445 		pthread_mutex_lock(&g_pci_mutex);
446 		if (dev->internal.attached || dev->internal.pending_removal) {
447 			pthread_mutex_unlock(&g_pci_mutex);
448 			return -1;
449 		}
450 
451 		rc = enum_cb(enum_ctx, dev);
452 		if (rc == 0) {
453 			dev->internal.attached = true;
454 		}
455 		pthread_mutex_unlock(&g_pci_mutex);
456 		return rc;
457 	}
458 
459 	if (!driver->is_registered) {
460 		driver->is_registered = true;
461 		rte_pci_register(&driver->driver);
462 	}
463 
464 	driver->cb_fn = enum_cb;
465 	driver->cb_arg = enum_ctx;
466 
467 #if RTE_VERSION >= RTE_VERSION_NUM(18, 11, 0, 0)
468 	int i = 0;
469 
470 	do {
471 		rc = rte_eal_hotplug_add("pci", bdf, "");
472 	} while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT);
473 
474 	if (i > 1 && rc == -EEXIST) {
475 		/* Even though the previous request timed out, the device
476 		 * was attached successfully.
477 		 */
478 		rc = 0;
479 	}
480 #else
481 	rc = rte_eal_dev_attach(bdf, "");
482 #endif
483 
484 	driver->cb_arg = NULL;
485 	driver->cb_fn = NULL;
486 
487 	cleanup_pci_devices();
488 	return rc == 0 ? 0 : -1;
489 }
490 
491 /* Note: You can call spdk_pci_enumerate from more than one thread
492  *       simultaneously safely, but you cannot call spdk_pci_enumerate
493  *       and rte_eal_pci_probe simultaneously.
494  */
495 int
496 spdk_pci_enumerate(struct spdk_pci_driver *driver,
497 		   spdk_pci_enum_cb enum_cb,
498 		   void *enum_ctx)
499 {
500 	struct spdk_pci_device *dev;
501 	int rc;
502 
503 	cleanup_pci_devices();
504 
505 	pthread_mutex_lock(&g_pci_mutex);
506 	TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) {
507 		if (dev->internal.attached ||
508 		    dev->internal.driver != driver ||
509 		    dev->internal.pending_removal) {
510 			continue;
511 		}
512 
513 		rc = enum_cb(enum_ctx, dev);
514 		if (rc == 0) {
515 			dev->internal.attached = true;
516 		} else if (rc < 0) {
517 			pthread_mutex_unlock(&g_pci_mutex);
518 			return -1;
519 		}
520 	}
521 	pthread_mutex_unlock(&g_pci_mutex);
522 
523 	if (!driver->is_registered) {
524 		driver->is_registered = true;
525 		rte_pci_register(&driver->driver);
526 	}
527 
528 	driver->cb_fn = enum_cb;
529 	driver->cb_arg = enum_ctx;
530 
531 	if (rte_bus_scan() != 0 || rte_bus_probe() != 0) {
532 		driver->cb_arg = NULL;
533 		driver->cb_fn = NULL;
534 		return -1;
535 	}
536 
537 	driver->cb_arg = NULL;
538 	driver->cb_fn = NULL;
539 
540 	cleanup_pci_devices();
541 	return 0;
542 }
543 
544 struct spdk_pci_device *
545 spdk_pci_get_first_device(void)
546 {
547 	return TAILQ_FIRST(&g_pci_devices);
548 }
549 
550 struct spdk_pci_device *
551 spdk_pci_get_next_device(struct spdk_pci_device *prev)
552 {
553 	return TAILQ_NEXT(prev, internal.tailq);
554 }
555 
556 int
557 spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar,
558 			void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
559 {
560 	return dev->map_bar(dev, bar, mapped_addr, phys_addr, size);
561 }
562 
563 int
564 spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr)
565 {
566 	return dev->unmap_bar(dev, bar, addr);
567 }
568 
569 uint32_t
570 spdk_pci_device_get_domain(struct spdk_pci_device *dev)
571 {
572 	return dev->addr.domain;
573 }
574 
575 uint8_t
576 spdk_pci_device_get_bus(struct spdk_pci_device *dev)
577 {
578 	return dev->addr.bus;
579 }
580 
581 uint8_t
582 spdk_pci_device_get_dev(struct spdk_pci_device *dev)
583 {
584 	return dev->addr.dev;
585 }
586 
587 uint8_t
588 spdk_pci_device_get_func(struct spdk_pci_device *dev)
589 {
590 	return dev->addr.func;
591 }
592 
593 uint16_t
594 spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev)
595 {
596 	return dev->id.vendor_id;
597 }
598 
599 uint16_t
600 spdk_pci_device_get_device_id(struct spdk_pci_device *dev)
601 {
602 	return dev->id.device_id;
603 }
604 
605 uint16_t
606 spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev)
607 {
608 	return dev->id.subvendor_id;
609 }
610 
611 uint16_t
612 spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev)
613 {
614 	return dev->id.subdevice_id;
615 }
616 
617 struct spdk_pci_id
618 spdk_pci_device_get_id(struct spdk_pci_device *dev)
619 {
620 	return dev->id;
621 }
622 
623 int
624 spdk_pci_device_get_socket_id(struct spdk_pci_device *dev)
625 {
626 	return dev->socket_id;
627 }
628 
629 int
630 spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
631 {
632 	return dev->cfg_read(dev, value, len, offset);
633 }
634 
635 int
636 spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
637 {
638 	return dev->cfg_write(dev, value, len, offset);
639 }
640 
641 int
642 spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset)
643 {
644 	return spdk_pci_device_cfg_read(dev, value, 1, offset);
645 }
646 
647 int
648 spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset)
649 {
650 	return spdk_pci_device_cfg_write(dev, &value, 1, offset);
651 }
652 
653 int
654 spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset)
655 {
656 	return spdk_pci_device_cfg_read(dev, value, 2, offset);
657 }
658 
659 int
660 spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset)
661 {
662 	return spdk_pci_device_cfg_write(dev, &value, 2, offset);
663 }
664 
665 int
666 spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset)
667 {
668 	return spdk_pci_device_cfg_read(dev, value, 4, offset);
669 }
670 
671 int
672 spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset)
673 {
674 	return spdk_pci_device_cfg_write(dev, &value, 4, offset);
675 }
676 
677 int
678 spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len)
679 {
680 	int err;
681 	uint32_t pos, header = 0;
682 	uint32_t i, buf[2];
683 
684 	if (len < 17) {
685 		return -1;
686 	}
687 
688 	err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE);
689 	if (err || !header) {
690 		return -1;
691 	}
692 
693 	pos = PCI_CFG_SIZE;
694 	while (1) {
695 		if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) {
696 			if (pos) {
697 				/* skip the header */
698 				pos += 4;
699 				for (i = 0; i < 2; i++) {
700 					err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i);
701 					if (err) {
702 						return -1;
703 					}
704 				}
705 				snprintf(sn, len, "%08x%08x", buf[1], buf[0]);
706 				return 0;
707 			}
708 		}
709 		pos = (header >> 20) & 0xffc;
710 		/* 0 if no other items exist */
711 		if (pos < PCI_CFG_SIZE) {
712 			return -1;
713 		}
714 		err = spdk_pci_device_cfg_read32(dev, &header, pos);
715 		if (err) {
716 			return -1;
717 		}
718 	}
719 	return -1;
720 }
721 
722 struct spdk_pci_addr
723 spdk_pci_device_get_addr(struct spdk_pci_device *dev)
724 {
725 	return dev->addr;
726 }
727 
728 bool
729 spdk_pci_device_is_removed(struct spdk_pci_device *dev)
730 {
731 	return dev->internal.pending_removal;
732 }
733 
734 int
735 spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2)
736 {
737 	if (a1->domain > a2->domain) {
738 		return 1;
739 	} else if (a1->domain < a2->domain) {
740 		return -1;
741 	} else if (a1->bus > a2->bus) {
742 		return 1;
743 	} else if (a1->bus < a2->bus) {
744 		return -1;
745 	} else if (a1->dev > a2->dev) {
746 		return 1;
747 	} else if (a1->dev < a2->dev) {
748 		return -1;
749 	} else if (a1->func > a2->func) {
750 		return 1;
751 	} else if (a1->func < a2->func) {
752 		return -1;
753 	}
754 
755 	return 0;
756 }
757 
758 #ifdef __linux__
759 int
760 spdk_pci_device_claim(struct spdk_pci_device *dev)
761 {
762 	int dev_fd;
763 	char dev_name[64];
764 	int pid;
765 	void *dev_map;
766 	struct flock pcidev_lock = {
767 		.l_type = F_WRLCK,
768 		.l_whence = SEEK_SET,
769 		.l_start = 0,
770 		.l_len = 0,
771 	};
772 
773 	snprintf(dev_name, sizeof(dev_name), "/tmp/spdk_pci_lock_%04x:%02x:%02x.%x",
774 		 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func);
775 
776 	dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
777 	if (dev_fd == -1) {
778 		fprintf(stderr, "could not open %s\n", dev_name);
779 		return -errno;
780 	}
781 
782 	if (ftruncate(dev_fd, sizeof(int)) != 0) {
783 		fprintf(stderr, "could not truncate %s\n", dev_name);
784 		close(dev_fd);
785 		return -errno;
786 	}
787 
788 	dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
789 		       MAP_SHARED, dev_fd, 0);
790 	if (dev_map == MAP_FAILED) {
791 		fprintf(stderr, "could not mmap dev %s (%d)\n", dev_name, errno);
792 		close(dev_fd);
793 		return -errno;
794 	}
795 
796 	if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) {
797 		pid = *(int *)dev_map;
798 		fprintf(stderr, "Cannot create lock on device %s, probably"
799 			" process %d has claimed it\n", dev_name, pid);
800 		munmap(dev_map, sizeof(int));
801 		close(dev_fd);
802 		/* F_SETLK returns unspecified errnos, normalize them */
803 		return -EACCES;
804 	}
805 
806 	*(int *)dev_map = (int)getpid();
807 	munmap(dev_map, sizeof(int));
808 	dev->internal.claim_fd = dev_fd;
809 	/* Keep dev_fd open to maintain the lock. */
810 	return 0;
811 }
812 
813 void
814 spdk_pci_device_unclaim(struct spdk_pci_device *dev)
815 {
816 	char dev_name[64];
817 
818 	snprintf(dev_name, sizeof(dev_name), "/tmp/spdk_pci_lock_%04x:%02x:%02x.%x",
819 		 dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func);
820 
821 	close(dev->internal.claim_fd);
822 	dev->internal.claim_fd = -1;
823 	unlink(dev_name);
824 }
825 #endif /* __linux__ */
826 
827 #ifdef __FreeBSD__
828 int
829 spdk_pci_device_claim(struct spdk_pci_device *dev)
830 {
831 	/* TODO */
832 	return 0;
833 }
834 
835 void
836 spdk_pci_device_unclaim(struct spdk_pci_device *dev)
837 {
838 	/* TODO */
839 }
840 #endif /* __FreeBSD__ */
841 
842 int
843 spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf)
844 {
845 	unsigned domain, bus, dev, func;
846 
847 	if (addr == NULL || bdf == NULL) {
848 		return -EINVAL;
849 	}
850 
851 	if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) ||
852 	    (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) {
853 		/* Matched a full address - all variables are initialized */
854 	} else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) {
855 		func = 0;
856 	} else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) ||
857 		   (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) {
858 		domain = 0;
859 	} else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) ||
860 		   (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) {
861 		domain = 0;
862 		func = 0;
863 	} else {
864 		return -EINVAL;
865 	}
866 
867 	if (bus > 0xFF || dev > 0x1F || func > 7) {
868 		return -EINVAL;
869 	}
870 
871 	addr->domain = domain;
872 	addr->bus = bus;
873 	addr->dev = dev;
874 	addr->func = func;
875 
876 	return 0;
877 }
878 
879 int
880 spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr)
881 {
882 	int rc;
883 
884 	rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x",
885 		      addr->domain, addr->bus,
886 		      addr->dev, addr->func);
887 
888 	if (rc > 0 && (size_t)rc < sz) {
889 		return 0;
890 	}
891 
892 	return -1;
893 }
894 
895 void
896 spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev)
897 {
898 	assert(dev->map_bar != NULL);
899 	assert(dev->unmap_bar != NULL);
900 	assert(dev->cfg_read != NULL);
901 	assert(dev->cfg_write != NULL);
902 	assert(dev->detach != NULL);
903 	dev->internal.driver = drv;
904 	TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq);
905 }
906 
907 void
908 spdk_pci_unhook_device(struct spdk_pci_device *dev)
909 {
910 	assert(!dev->internal.attached);
911 	TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq);
912 }
913 
914 const char *
915 spdk_pci_device_get_type(const struct spdk_pci_device *dev)
916 {
917 	return dev->type;
918 }
919