xref: /spdk/lib/virtio/virtio_pci.c (revision 927f1fd57bd004df581518466ec4c1b8083e5d23)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 
36 #include "spdk/memory.h"
37 #include "spdk/mmio.h"
38 #include "spdk/string.h"
39 #include "spdk/env.h"
40 
41 #include "spdk_internal/virtio.h"
42 #include <linux/virtio_ids.h>
43 
44 struct virtio_hw {
45 	uint8_t	    use_msix;
46 	uint32_t    notify_off_multiplier;
47 	uint8_t     *isr;
48 	uint16_t    *notify_base;
49 
50 	struct {
51 		/** Mem-mapped resources from given PCI BAR */
52 		void        *vaddr;
53 
54 		/** Length of the address space */
55 		uint32_t    len;
56 	} pci_bar[6];
57 
58 	struct virtio_pci_common_cfg *common_cfg;
59 	struct spdk_pci_device *pci_dev;
60 
61 	/** Device-specific PCI config space */
62 	void *dev_cfg;
63 
64 	struct virtio_dev *vdev;
65 	bool is_remapped;
66 	bool is_removing;
67 	TAILQ_ENTRY(virtio_hw) tailq;
68 };
69 
70 struct virtio_pci_probe_ctx {
71 	virtio_pci_create_cb enum_cb;
72 	void *enum_ctx;
73 	uint16_t device_id;
74 };
75 
76 static TAILQ_HEAD(, virtio_hw) g_virtio_hws = TAILQ_HEAD_INITIALIZER(g_virtio_hws);
77 static pthread_mutex_t g_hw_mutex = PTHREAD_MUTEX_INITIALIZER;
78 __thread struct virtio_hw *g_thread_virtio_hw = NULL;
79 static uint16_t g_signal_lock;
80 static bool g_sigset = false;
81 
82 /*
83  * Following macros are derived from linux/pci_regs.h, however,
84  * we can't simply include that header here, as there is no such
85  * file for non-Linux platform.
86  */
87 #define PCI_CAPABILITY_LIST	0x34
88 #define PCI_CAP_ID_VNDR		0x09
89 #define PCI_CAP_ID_MSIX		0x11
90 
91 static void
92 virtio_pci_dev_sigbus_handler(const void *failure_addr, void *ctx)
93 {
94 	void *map_address = NULL;
95 	uint16_t flag = 0;
96 	int i;
97 
98 	if (!__atomic_compare_exchange_n(&g_signal_lock, &flag, 1, false, __ATOMIC_ACQUIRE,
99 					 __ATOMIC_RELAXED)) {
100 		SPDK_DEBUGLOG(virtio_pci, "request g_signal_lock failed\n");
101 		return;
102 	}
103 
104 	if (g_thread_virtio_hw == NULL || g_thread_virtio_hw->is_remapped) {
105 		__atomic_store_n(&g_signal_lock, 0, __ATOMIC_RELEASE);
106 		return;
107 	}
108 
109 	/* We remap each bar to the same VA to avoid subsequent sigbus error.
110 	 * Because it is mapped to the same VA, such as hw->common_cfg and so on
111 	 * do not need to be modified.
112 	 */
113 	for (i = 0; i < 6; ++i) {
114 		if (g_thread_virtio_hw->pci_bar[i].vaddr == NULL) {
115 			continue;
116 		}
117 
118 		map_address = mmap(g_thread_virtio_hw->pci_bar[i].vaddr,
119 				   g_thread_virtio_hw->pci_bar[i].len,
120 				   PROT_READ | PROT_WRITE,
121 				   MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
122 		if (map_address == MAP_FAILED) {
123 			SPDK_ERRLOG("mmap failed\n");
124 			goto fail;
125 		}
126 		memset(map_address, 0xFF, g_thread_virtio_hw->pci_bar[i].len);
127 	}
128 
129 	g_thread_virtio_hw->is_remapped = true;
130 	__atomic_store_n(&g_signal_lock, 0, __ATOMIC_RELEASE);
131 	return;
132 fail:
133 	for (--i; i >= 0; i--) {
134 		if (g_thread_virtio_hw->pci_bar[i].vaddr == NULL) {
135 			continue;
136 		}
137 
138 		munmap(g_thread_virtio_hw->pci_bar[i].vaddr, g_thread_virtio_hw->pci_bar[i].len);
139 	}
140 	__atomic_store_n(&g_signal_lock, 0, __ATOMIC_RELEASE);
141 }
142 
143 static struct virtio_hw *
144 virtio_pci_dev_get_by_addr(struct spdk_pci_addr *traddr)
145 {
146 	struct virtio_hw *hw;
147 	struct spdk_pci_addr addr;
148 
149 	pthread_mutex_lock(&g_hw_mutex);
150 	TAILQ_FOREACH(hw, &g_virtio_hws, tailq) {
151 		addr = spdk_pci_device_get_addr(hw->pci_dev);
152 		if (!spdk_pci_addr_compare(&addr, traddr)) {
153 			pthread_mutex_unlock(&g_hw_mutex);
154 			return hw;
155 		}
156 	}
157 	pthread_mutex_unlock(&g_hw_mutex);
158 
159 	return NULL;
160 }
161 
162 static const char *
163 virtio_pci_dev_check(struct virtio_hw *hw, uint16_t device_id_match)
164 {
165 	uint16_t pci_device_id, device_id;
166 
167 	pci_device_id = spdk_pci_device_get_device_id(hw->pci_dev);
168 	if (pci_device_id < 0x1040) {
169 		/* Transitional devices: use the PCI subsystem device id as
170 		 * virtio device id, same as legacy driver always did.
171 		 */
172 		device_id = spdk_pci_device_get_subdevice_id(hw->pci_dev);
173 	} else {
174 		/* Modern devices: simply use PCI device id, but start from 0x1040. */
175 		device_id = pci_device_id - 0x1040;
176 	}
177 
178 	if (device_id == device_id_match) {
179 		hw->is_removing = true;
180 		return hw->vdev->name;
181 	}
182 
183 	return NULL;
184 }
185 
186 const char *
187 virtio_pci_dev_event_process(int fd, uint16_t device_id)
188 {
189 	struct spdk_pci_event event;
190 	struct virtio_hw *hw, *tmp;
191 	const char *vdev_name;
192 
193 	/* UIO remove handler */
194 	if (spdk_pci_get_event(fd, &event) > 0) {
195 		if (event.action == SPDK_UEVENT_REMOVE) {
196 			hw = virtio_pci_dev_get_by_addr(&event.traddr);
197 			if (hw == NULL || hw->is_removing) {
198 				return NULL;
199 			}
200 
201 			vdev_name = virtio_pci_dev_check(hw, device_id);
202 			if (vdev_name != NULL) {
203 				return vdev_name;
204 			}
205 		}
206 	}
207 
208 	/* VFIO remove handler */
209 	pthread_mutex_lock(&g_hw_mutex);
210 	TAILQ_FOREACH_SAFE(hw, &g_virtio_hws, tailq, tmp) {
211 		if (spdk_pci_device_is_removed(hw->pci_dev) && !hw->is_removing) {
212 			vdev_name = virtio_pci_dev_check(hw, device_id);
213 			if (vdev_name != NULL) {
214 				pthread_mutex_unlock(&g_hw_mutex);
215 				return vdev_name;
216 			}
217 		}
218 	}
219 	pthread_mutex_unlock(&g_hw_mutex);
220 
221 	return NULL;
222 }
223 
224 static inline int
225 check_vq_phys_addr_ok(struct virtqueue *vq)
226 {
227 	/* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
228 	 * and only accepts 32 bit page frame number.
229 	 * Check if the allocated physical memory exceeds 16TB.
230 	 */
231 	if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >>
232 	    (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
233 		SPDK_ERRLOG("vring address shouldn't be above 16TB!\n");
234 		return 0;
235 	}
236 
237 	return 1;
238 }
239 
240 static void
241 free_virtio_hw(struct virtio_hw *hw)
242 {
243 	unsigned i;
244 
245 	for (i = 0; i < 6; ++i) {
246 		if (hw->pci_bar[i].vaddr == NULL) {
247 			continue;
248 		}
249 
250 		spdk_pci_device_unmap_bar(hw->pci_dev, i, hw->pci_bar[i].vaddr);
251 	}
252 
253 	free(hw);
254 }
255 
256 static void
257 pci_dump_json_info(struct virtio_dev *dev, struct spdk_json_write_ctx *w)
258 {
259 	struct virtio_hw *hw = dev->ctx;
260 	struct spdk_pci_addr pci_addr = spdk_pci_device_get_addr((struct spdk_pci_device *)hw->pci_dev);
261 	char addr[32];
262 
263 	spdk_json_write_name(w, "type");
264 	if (dev->modern) {
265 		spdk_json_write_string(w, "pci-modern");
266 	} else {
267 		spdk_json_write_string(w, "pci-legacy");
268 	}
269 
270 	spdk_pci_addr_fmt(addr, sizeof(addr), &pci_addr);
271 	spdk_json_write_named_string(w, "pci_address", addr);
272 }
273 
274 static void
275 pci_write_json_config(struct virtio_dev *dev, struct spdk_json_write_ctx *w)
276 {
277 	struct virtio_hw *hw = dev->ctx;
278 	struct spdk_pci_addr pci_addr = spdk_pci_device_get_addr(hw->pci_dev);
279 	char addr[32];
280 
281 	spdk_pci_addr_fmt(addr, sizeof(addr), &pci_addr);
282 
283 	spdk_json_write_named_string(w, "trtype", "pci");
284 	spdk_json_write_named_string(w, "traddr", addr);
285 }
286 
287 static inline void
288 io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
289 {
290 	spdk_mmio_write_4(lo, val & ((1ULL << 32) - 1));
291 	spdk_mmio_write_4(hi, val >> 32);
292 }
293 
294 static int
295 modern_read_dev_config(struct virtio_dev *dev, size_t offset,
296 		       void *dst, int length)
297 {
298 	struct virtio_hw *hw = dev->ctx;
299 	int i;
300 	uint8_t *p;
301 	uint8_t old_gen, new_gen;
302 
303 	g_thread_virtio_hw = hw;
304 	do {
305 		old_gen = spdk_mmio_read_1(&hw->common_cfg->config_generation);
306 
307 		p = dst;
308 		for (i = 0;  i < length; i++) {
309 			*p++ = spdk_mmio_read_1((uint8_t *)hw->dev_cfg + offset + i);
310 		}
311 
312 		new_gen = spdk_mmio_read_1(&hw->common_cfg->config_generation);
313 	} while (old_gen != new_gen);
314 	g_thread_virtio_hw = NULL;
315 
316 	return 0;
317 }
318 
319 static int
320 modern_write_dev_config(struct virtio_dev *dev, size_t offset,
321 			const void *src, int length)
322 {
323 	struct virtio_hw *hw = dev->ctx;
324 	int i;
325 	const uint8_t *p = src;
326 
327 	g_thread_virtio_hw = hw;
328 	for (i = 0;  i < length; i++) {
329 		spdk_mmio_write_1(((uint8_t *)hw->dev_cfg) + offset + i, *p++);
330 	}
331 	g_thread_virtio_hw = NULL;
332 
333 	return 0;
334 }
335 
336 static uint64_t
337 modern_get_features(struct virtio_dev *dev)
338 {
339 	struct virtio_hw *hw = dev->ctx;
340 	uint32_t features_lo, features_hi;
341 
342 	g_thread_virtio_hw = hw;
343 	spdk_mmio_write_4(&hw->common_cfg->device_feature_select, 0);
344 	features_lo = spdk_mmio_read_4(&hw->common_cfg->device_feature);
345 
346 	spdk_mmio_write_4(&hw->common_cfg->device_feature_select, 1);
347 	features_hi = spdk_mmio_read_4(&hw->common_cfg->device_feature);
348 	g_thread_virtio_hw = NULL;
349 
350 	return ((uint64_t)features_hi << 32) | features_lo;
351 }
352 
353 static int
354 modern_set_features(struct virtio_dev *dev, uint64_t features)
355 {
356 	struct virtio_hw *hw = dev->ctx;
357 
358 	if ((features & (1ULL << VIRTIO_F_VERSION_1)) == 0) {
359 		SPDK_ERRLOG("VIRTIO_F_VERSION_1 feature is not enabled.\n");
360 		return -EINVAL;
361 	}
362 
363 	g_thread_virtio_hw = hw;
364 	spdk_mmio_write_4(&hw->common_cfg->guest_feature_select, 0);
365 	spdk_mmio_write_4(&hw->common_cfg->guest_feature, features & ((1ULL << 32) - 1));
366 
367 	spdk_mmio_write_4(&hw->common_cfg->guest_feature_select, 1);
368 	spdk_mmio_write_4(&hw->common_cfg->guest_feature, features >> 32);
369 	g_thread_virtio_hw = NULL;
370 
371 	dev->negotiated_features = features;
372 
373 	return 0;
374 }
375 
376 static void
377 modern_destruct_dev(struct virtio_dev *vdev)
378 {
379 	struct virtio_hw *hw = vdev->ctx;
380 	struct spdk_pci_device *pci_dev;
381 
382 	if (hw != NULL) {
383 		pthread_mutex_lock(&g_hw_mutex);
384 		TAILQ_REMOVE(&g_virtio_hws, hw, tailq);
385 		pthread_mutex_unlock(&g_hw_mutex);
386 		pci_dev = hw->pci_dev;
387 		free_virtio_hw(hw);
388 		if (pci_dev) {
389 			spdk_pci_device_detach(pci_dev);
390 		}
391 	}
392 }
393 
394 static uint8_t
395 modern_get_status(struct virtio_dev *dev)
396 {
397 	struct virtio_hw *hw = dev->ctx;
398 	uint8_t ret;
399 
400 	g_thread_virtio_hw = hw;
401 	ret = spdk_mmio_read_1(&hw->common_cfg->device_status);
402 	g_thread_virtio_hw = NULL;
403 
404 	return ret;
405 }
406 
407 static void
408 modern_set_status(struct virtio_dev *dev, uint8_t status)
409 {
410 	struct virtio_hw *hw = dev->ctx;
411 
412 	g_thread_virtio_hw = hw;
413 	spdk_mmio_write_1(&hw->common_cfg->device_status, status);
414 	g_thread_virtio_hw = NULL;
415 }
416 
417 static uint16_t
418 modern_get_queue_size(struct virtio_dev *dev, uint16_t queue_id)
419 {
420 	struct virtio_hw *hw = dev->ctx;
421 	uint16_t ret;
422 
423 	g_thread_virtio_hw = hw;
424 	spdk_mmio_write_2(&hw->common_cfg->queue_select, queue_id);
425 	ret = spdk_mmio_read_2(&hw->common_cfg->queue_size);
426 	g_thread_virtio_hw = NULL;
427 
428 	return ret;
429 }
430 
431 static int
432 modern_setup_queue(struct virtio_dev *dev, struct virtqueue *vq)
433 {
434 	struct virtio_hw *hw = dev->ctx;
435 	uint64_t desc_addr, avail_addr, used_addr;
436 	uint16_t notify_off;
437 	void *queue_mem;
438 	uint64_t queue_mem_phys_addr;
439 
440 	/* To ensure physical address contiguity we make the queue occupy
441 	 * only a single hugepage (2MB). As of Virtio 1.0, the queue size
442 	 * always falls within this limit.
443 	 */
444 	if (vq->vq_ring_size > VALUE_2MB) {
445 		return -ENOMEM;
446 	}
447 
448 	queue_mem = spdk_zmalloc(vq->vq_ring_size, VALUE_2MB, NULL,
449 				 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
450 	if (queue_mem == NULL) {
451 		return -ENOMEM;
452 	}
453 
454 	queue_mem_phys_addr = spdk_vtophys(queue_mem, NULL);
455 	if (queue_mem_phys_addr == SPDK_VTOPHYS_ERROR) {
456 		spdk_free(queue_mem);
457 		return -EFAULT;
458 	}
459 
460 	vq->vq_ring_mem = queue_mem_phys_addr;
461 	vq->vq_ring_virt_mem = queue_mem;
462 
463 	if (!check_vq_phys_addr_ok(vq)) {
464 		spdk_free(queue_mem);
465 		return -ENOMEM;
466 	}
467 
468 	desc_addr = vq->vq_ring_mem;
469 	avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
470 	used_addr = (avail_addr + offsetof(struct vring_avail, ring[vq->vq_nentries])
471 		     + VIRTIO_PCI_VRING_ALIGN - 1) & ~(VIRTIO_PCI_VRING_ALIGN - 1);
472 
473 	g_thread_virtio_hw = hw;
474 	spdk_mmio_write_2(&hw->common_cfg->queue_select, vq->vq_queue_index);
475 
476 	io_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo,
477 			   &hw->common_cfg->queue_desc_hi);
478 	io_write64_twopart(avail_addr, &hw->common_cfg->queue_avail_lo,
479 			   &hw->common_cfg->queue_avail_hi);
480 	io_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo,
481 			   &hw->common_cfg->queue_used_hi);
482 
483 	notify_off = spdk_mmio_read_2(&hw->common_cfg->queue_notify_off);
484 	vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
485 				   notify_off * hw->notify_off_multiplier);
486 
487 	spdk_mmio_write_2(&hw->common_cfg->queue_enable, 1);
488 	g_thread_virtio_hw = NULL;
489 
490 	SPDK_DEBUGLOG(virtio_pci, "queue %"PRIu16" addresses:\n", vq->vq_queue_index);
491 	SPDK_DEBUGLOG(virtio_pci, "\t desc_addr: %" PRIx64 "\n", desc_addr);
492 	SPDK_DEBUGLOG(virtio_pci, "\t aval_addr: %" PRIx64 "\n", avail_addr);
493 	SPDK_DEBUGLOG(virtio_pci, "\t used_addr: %" PRIx64 "\n", used_addr);
494 	SPDK_DEBUGLOG(virtio_pci, "\t notify addr: %p (notify offset: %"PRIu16")\n",
495 		      vq->notify_addr, notify_off);
496 
497 	return 0;
498 }
499 
500 static void
501 modern_del_queue(struct virtio_dev *dev, struct virtqueue *vq)
502 {
503 	struct virtio_hw *hw = dev->ctx;
504 
505 	g_thread_virtio_hw = hw;
506 	spdk_mmio_write_2(&hw->common_cfg->queue_select, vq->vq_queue_index);
507 
508 	io_write64_twopart(0, &hw->common_cfg->queue_desc_lo,
509 			   &hw->common_cfg->queue_desc_hi);
510 	io_write64_twopart(0, &hw->common_cfg->queue_avail_lo,
511 			   &hw->common_cfg->queue_avail_hi);
512 	io_write64_twopart(0, &hw->common_cfg->queue_used_lo,
513 			   &hw->common_cfg->queue_used_hi);
514 
515 	spdk_mmio_write_2(&hw->common_cfg->queue_enable, 0);
516 	g_thread_virtio_hw = NULL;
517 
518 	spdk_free(vq->vq_ring_virt_mem);
519 }
520 
521 static void
522 modern_notify_queue(struct virtio_dev *dev, struct virtqueue *vq)
523 {
524 	g_thread_virtio_hw = dev->ctx;
525 	spdk_mmio_write_2(vq->notify_addr, vq->vq_queue_index);
526 	g_thread_virtio_hw = NULL;
527 }
528 
529 static const struct virtio_dev_ops modern_ops = {
530 	.read_dev_cfg	= modern_read_dev_config,
531 	.write_dev_cfg	= modern_write_dev_config,
532 	.get_status	= modern_get_status,
533 	.set_status	= modern_set_status,
534 	.get_features	= modern_get_features,
535 	.set_features	= modern_set_features,
536 	.destruct_dev	= modern_destruct_dev,
537 	.get_queue_size	= modern_get_queue_size,
538 	.setup_queue	= modern_setup_queue,
539 	.del_queue	= modern_del_queue,
540 	.notify_queue	= modern_notify_queue,
541 	.dump_json_info = pci_dump_json_info,
542 	.write_json_config = pci_write_json_config,
543 };
544 
545 static void *
546 get_cfg_addr(struct virtio_hw *hw, struct virtio_pci_cap *cap)
547 {
548 	uint8_t  bar    = cap->bar;
549 	uint32_t length = cap->length;
550 	uint32_t offset = cap->offset;
551 
552 	if (bar > 5) {
553 		SPDK_ERRLOG("invalid bar: %"PRIu8"\n", bar);
554 		return NULL;
555 	}
556 
557 	if (offset + length < offset) {
558 		SPDK_ERRLOG("offset(%"PRIu32") + length(%"PRIu32") overflows\n",
559 			    offset, length);
560 		return NULL;
561 	}
562 
563 	if (offset + length > hw->pci_bar[bar].len) {
564 		SPDK_ERRLOG("invalid cap: overflows bar space: %"PRIu32" > %"PRIu32"\n",
565 			    offset + length, hw->pci_bar[bar].len);
566 		return NULL;
567 	}
568 
569 	if (hw->pci_bar[bar].vaddr == NULL) {
570 		SPDK_ERRLOG("bar %"PRIu8" base addr is NULL\n", bar);
571 		return NULL;
572 	}
573 
574 	return hw->pci_bar[bar].vaddr + offset;
575 }
576 
577 static int
578 virtio_read_caps(struct virtio_hw *hw)
579 {
580 	uint8_t pos;
581 	struct virtio_pci_cap cap;
582 	int ret;
583 
584 	ret = spdk_pci_device_cfg_read(hw->pci_dev, &pos, 1, PCI_CAPABILITY_LIST);
585 	if (ret < 0) {
586 		SPDK_DEBUGLOG(virtio_pci, "failed to read pci capability list\n");
587 		return ret;
588 	}
589 
590 	while (pos) {
591 		ret = spdk_pci_device_cfg_read(hw->pci_dev, &cap, sizeof(cap), pos);
592 		if (ret < 0) {
593 			SPDK_ERRLOG("failed to read pci cap at pos: %"PRIx8"\n", pos);
594 			break;
595 		}
596 
597 		if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
598 			hw->use_msix = 1;
599 		}
600 
601 		if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
602 			SPDK_DEBUGLOG(virtio_pci,
603 				      "[%2"PRIx8"] skipping non VNDR cap id: %02"PRIx8"\n",
604 				      pos, cap.cap_vndr);
605 			goto next;
606 		}
607 
608 		SPDK_DEBUGLOG(virtio_pci,
609 			      "[%2"PRIx8"] cfg type: %"PRIu8", bar: %"PRIu8", offset: %04"PRIx32", len: %"PRIu32"\n",
610 			      pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
611 
612 		switch (cap.cfg_type) {
613 		case VIRTIO_PCI_CAP_COMMON_CFG:
614 			hw->common_cfg = get_cfg_addr(hw, &cap);
615 			break;
616 		case VIRTIO_PCI_CAP_NOTIFY_CFG:
617 			spdk_pci_device_cfg_read(hw->pci_dev, &hw->notify_off_multiplier,
618 						 4, pos + sizeof(cap));
619 			hw->notify_base = get_cfg_addr(hw, &cap);
620 			break;
621 		case VIRTIO_PCI_CAP_DEVICE_CFG:
622 			hw->dev_cfg = get_cfg_addr(hw, &cap);
623 			break;
624 		case VIRTIO_PCI_CAP_ISR_CFG:
625 			hw->isr = get_cfg_addr(hw, &cap);
626 			break;
627 		}
628 
629 next:
630 		pos = cap.cap_next;
631 	}
632 
633 	if (hw->common_cfg == NULL || hw->notify_base == NULL ||
634 	    hw->dev_cfg == NULL    || hw->isr == NULL) {
635 		SPDK_DEBUGLOG(virtio_pci, "no modern virtio pci device found.\n");
636 		if (ret < 0) {
637 			return ret;
638 		} else {
639 			return -EINVAL;
640 		}
641 	}
642 
643 	SPDK_DEBUGLOG(virtio_pci, "found modern virtio pci device.\n");
644 
645 	SPDK_DEBUGLOG(virtio_pci, "common cfg mapped at: %p\n", hw->common_cfg);
646 	SPDK_DEBUGLOG(virtio_pci, "device cfg mapped at: %p\n", hw->dev_cfg);
647 	SPDK_DEBUGLOG(virtio_pci, "isr cfg mapped at: %p\n", hw->isr);
648 	SPDK_DEBUGLOG(virtio_pci, "notify base: %p, notify off multiplier: %u\n",
649 		      hw->notify_base, hw->notify_off_multiplier);
650 
651 	return 0;
652 }
653 
654 static int
655 virtio_pci_dev_probe(struct spdk_pci_device *pci_dev, struct virtio_pci_probe_ctx *ctx)
656 {
657 	struct virtio_hw *hw;
658 	uint8_t *bar_vaddr;
659 	uint64_t bar_paddr, bar_len;
660 	int rc;
661 	unsigned i;
662 	char bdf[32];
663 	struct spdk_pci_addr addr;
664 
665 	addr = spdk_pci_device_get_addr(pci_dev);
666 	rc = spdk_pci_addr_fmt(bdf, sizeof(bdf), &addr);
667 	if (rc != 0) {
668 		SPDK_ERRLOG("Ignoring a device with non-parseable PCI address\n");
669 		return -1;
670 	}
671 
672 	hw = calloc(1, sizeof(*hw));
673 	if (hw == NULL) {
674 		SPDK_ERRLOG("%s: calloc failed\n", bdf);
675 		return -1;
676 	}
677 
678 	hw->pci_dev = pci_dev;
679 
680 	for (i = 0; i < 6; ++i) {
681 		rc = spdk_pci_device_map_bar(pci_dev, i, (void *) &bar_vaddr, &bar_paddr,
682 					     &bar_len);
683 		if (rc != 0) {
684 			SPDK_ERRLOG("%s: failed to memmap PCI BAR %u\n", bdf, i);
685 			free_virtio_hw(hw);
686 			return -1;
687 		}
688 
689 		hw->pci_bar[i].vaddr = bar_vaddr;
690 		hw->pci_bar[i].len = bar_len;
691 	}
692 
693 	/* Virtio PCI caps exist only on modern PCI devices.
694 	 * Legacy devices are not supported.
695 	 */
696 	if (virtio_read_caps(hw) != 0) {
697 		SPDK_NOTICELOG("Ignoring legacy PCI device at %s\n", bdf);
698 		free_virtio_hw(hw);
699 		return -1;
700 	}
701 
702 	rc = ctx->enum_cb((struct virtio_pci_ctx *)hw, ctx->enum_ctx);
703 	if (rc != 0) {
704 		free_virtio_hw(hw);
705 		return rc;
706 	}
707 
708 	if (g_sigset != true) {
709 		spdk_pci_register_error_handler(virtio_pci_dev_sigbus_handler,
710 						NULL);
711 		g_sigset = true;
712 	}
713 
714 	pthread_mutex_lock(&g_hw_mutex);
715 	TAILQ_INSERT_TAIL(&g_virtio_hws, hw, tailq);
716 	pthread_mutex_unlock(&g_hw_mutex);
717 
718 	return 0;
719 }
720 
721 static int
722 virtio_pci_dev_probe_cb(void *probe_ctx, struct spdk_pci_device *pci_dev)
723 {
724 	struct virtio_pci_probe_ctx *ctx = probe_ctx;
725 	uint16_t pci_device_id = spdk_pci_device_get_device_id(pci_dev);
726 	uint16_t device_id;
727 
728 	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
729 		SPDK_ERRLOG("Probe device is not a virtio device\n");
730 		return 1;
731 	}
732 
733 	if (pci_device_id < 0x1040) {
734 		/* Transitional devices: use the PCI subsystem device id as
735 		 * virtio device id, same as legacy driver always did.
736 		 */
737 		device_id = spdk_pci_device_get_subdevice_id(pci_dev);
738 	} else {
739 		/* Modern devices: simply use PCI device id, but start from 0x1040. */
740 		device_id = pci_device_id - 0x1040;
741 	}
742 
743 	if (device_id != ctx->device_id) {
744 		return 1;
745 	}
746 
747 	return virtio_pci_dev_probe(pci_dev, ctx);
748 }
749 
750 int
751 virtio_pci_dev_enumerate(virtio_pci_create_cb enum_cb, void *enum_ctx,
752 			 uint16_t pci_device_id)
753 {
754 	struct virtio_pci_probe_ctx ctx;
755 
756 	if (!spdk_process_is_primary()) {
757 		SPDK_WARNLOG("virtio_pci secondary process support is not implemented yet.\n");
758 		return 0;
759 	}
760 
761 	ctx.enum_cb = enum_cb;
762 	ctx.enum_ctx = enum_ctx;
763 	ctx.device_id = pci_device_id;
764 
765 	return spdk_pci_enumerate(spdk_pci_virtio_get_driver(),
766 				  virtio_pci_dev_probe_cb, &ctx);
767 }
768 
769 int
770 virtio_pci_dev_attach(virtio_pci_create_cb enum_cb, void *enum_ctx,
771 		      uint16_t device_id, struct spdk_pci_addr *pci_address)
772 {
773 	struct virtio_pci_probe_ctx ctx;
774 
775 	if (!spdk_process_is_primary()) {
776 		SPDK_WARNLOG("virtio_pci secondary process support is not implemented yet.\n");
777 		return 0;
778 	}
779 
780 	ctx.enum_cb = enum_cb;
781 	ctx.enum_ctx = enum_ctx;
782 	ctx.device_id = device_id;
783 
784 	return spdk_pci_device_attach(spdk_pci_virtio_get_driver(),
785 				      virtio_pci_dev_probe_cb, &ctx, pci_address);
786 }
787 
788 int
789 virtio_pci_dev_init(struct virtio_dev *vdev, const char *name,
790 		    struct virtio_pci_ctx *pci_ctx)
791 {
792 	int rc;
793 	struct virtio_hw *hw = (struct virtio_hw *)pci_ctx;
794 
795 	rc = virtio_dev_construct(vdev, name, &modern_ops, pci_ctx);
796 	if (rc != 0) {
797 		return rc;
798 	}
799 
800 	vdev->is_hw = 1;
801 	vdev->modern = 1;
802 	hw->vdev = vdev;
803 
804 	return 0;
805 }
806 
807 SPDK_LOG_REGISTER_COMPONENT(virtio_pci)
808