xref: /dpdk/drivers/net/virtio/virtio_pci.c (revision 2bf48044dca1892e571fd4964eecaacf6cb0c1c2)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 #include <stdint.h>
5 
6 #ifdef RTE_EXEC_ENV_LINUX
7  #include <dirent.h>
8  #include <fcntl.h>
9 #endif
10 
11 #include <rte_io.h>
12 #include <bus_driver.h>
13 
14 #include "virtio_pci.h"
15 #include "virtio_logs.h"
16 #include "virtqueue.h"
17 
18 /*
19  * Following macros are derived from linux/pci_regs.h, however,
20  * we can't simply include that header here, as there is no such
21  * file for non-Linux platform.
22  */
23 #define PCI_CAPABILITY_LIST	0x34
24 #define PCI_CAP_ID_VNDR		0x09
25 #define PCI_CAP_ID_MSIX		0x11
26 
27 /*
28  * The remaining space is defined by each driver as the per-driver
29  * configuration space.
30  */
31 #define VIRTIO_PCI_CONFIG(dev) \
32 		(((dev)->msix_status == VIRTIO_MSIX_ENABLED) ? 24 : 20)
33 
34 struct virtio_pci_internal virtio_pci_internal[RTE_MAX_ETHPORTS];
35 
36 #define PCI_MSIX_ENABLE 0x8000
37 
38 static enum virtio_msix_status
39 vtpci_msix_detect(struct rte_pci_device *dev)
40 {
41 	uint8_t pos;
42 	int ret;
43 
44 	ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
45 	if (ret != 1) {
46 		PMD_INIT_LOG(DEBUG,
47 			     "failed to read pci capability list, ret %d", ret);
48 		return VIRTIO_MSIX_NONE;
49 	}
50 
51 	while (pos) {
52 		uint8_t cap[2];
53 
54 		ret = rte_pci_read_config(dev, cap, sizeof(cap), pos);
55 		if (ret != sizeof(cap)) {
56 			PMD_INIT_LOG(DEBUG,
57 				     "failed to read pci cap at pos: %x ret %d",
58 				     pos, ret);
59 			break;
60 		}
61 
62 		if (cap[0] == PCI_CAP_ID_MSIX) {
63 			uint16_t flags;
64 
65 			ret = rte_pci_read_config(dev, &flags, sizeof(flags),
66 					pos + sizeof(cap));
67 			if (ret != sizeof(flags)) {
68 				PMD_INIT_LOG(DEBUG,
69 					     "failed to read pci cap at pos:"
70 					     " %x ret %d", pos + 2, ret);
71 				break;
72 			}
73 
74 			if (flags & PCI_MSIX_ENABLE)
75 				return VIRTIO_MSIX_ENABLED;
76 			else
77 				return VIRTIO_MSIX_DISABLED;
78 		}
79 
80 		pos = cap[1];
81 	}
82 
83 	return VIRTIO_MSIX_NONE;
84 }
85 
86 /*
87  * Since we are in legacy mode:
88  * http://ozlabs.org/~rusty/virtio-spec/virtio-0.9.5.pdf
89  *
90  * "Note that this is possible because while the virtio header is PCI (i.e.
91  * little) endian, the device-specific region is encoded in the native endian of
92  * the guest (where such distinction is applicable)."
93  *
94  * For powerpc which supports both, qemu supposes that cpu is big endian and
95  * enforces this for the virtio-net stuff.
96  */
97 static void
98 legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
99 		       void *dst, int length)
100 {
101 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
102 #ifdef RTE_ARCH_PPC_64
103 	int size;
104 
105 	while (length > 0) {
106 		if (length >= 4) {
107 			size = 4;
108 			rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
109 				VIRTIO_PCI_CONFIG(dev) + offset);
110 			*(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t *)dst);
111 		} else if (length >= 2) {
112 			size = 2;
113 			rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
114 				VIRTIO_PCI_CONFIG(dev) + offset);
115 			*(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t *)dst);
116 		} else {
117 			size = 1;
118 			rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
119 				VIRTIO_PCI_CONFIG(dev) + offset);
120 		}
121 
122 		dst = (char *)dst + size;
123 		offset += size;
124 		length -= size;
125 	}
126 #else
127 	rte_pci_ioport_read(VTPCI_IO(hw), dst, length,
128 		VIRTIO_PCI_CONFIG(dev) + offset);
129 #endif
130 }
131 
132 static void
133 legacy_write_dev_config(struct virtio_hw *hw, size_t offset,
134 			const void *src, int length)
135 {
136 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
137 #ifdef RTE_ARCH_PPC_64
138 	union {
139 		uint32_t u32;
140 		uint16_t u16;
141 	} tmp;
142 	int size;
143 
144 	while (length > 0) {
145 		if (length >= 4) {
146 			size = 4;
147 			tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src);
148 			rte_pci_ioport_write(VTPCI_IO(hw), &tmp.u32, size,
149 				VIRTIO_PCI_CONFIG(dev) + offset);
150 		} else if (length >= 2) {
151 			size = 2;
152 			tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src);
153 			rte_pci_ioport_write(VTPCI_IO(hw), &tmp.u16, size,
154 				VIRTIO_PCI_CONFIG(dev) + offset);
155 		} else {
156 			size = 1;
157 			rte_pci_ioport_write(VTPCI_IO(hw), src, size,
158 				VIRTIO_PCI_CONFIG(dev) + offset);
159 		}
160 
161 		src = (const char *)src + size;
162 		offset += size;
163 		length -= size;
164 	}
165 #else
166 	rte_pci_ioport_write(VTPCI_IO(hw), src, length,
167 		VIRTIO_PCI_CONFIG(dev) + offset);
168 #endif
169 }
170 
171 static uint64_t
172 legacy_get_features(struct virtio_hw *hw)
173 {
174 	uint32_t dst;
175 
176 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 4, VIRTIO_PCI_HOST_FEATURES);
177 	return dst;
178 }
179 
180 static void
181 legacy_set_features(struct virtio_hw *hw, uint64_t features)
182 {
183 	if ((features >> 32) != 0) {
184 		PMD_DRV_LOG(ERR,
185 			"only 32 bit features are allowed for legacy virtio!");
186 		return;
187 	}
188 	rte_pci_ioport_write(VTPCI_IO(hw), &features, 4,
189 		VIRTIO_PCI_GUEST_FEATURES);
190 }
191 
192 static int
193 legacy_features_ok(struct virtio_hw *hw __rte_unused)
194 {
195 	return 0;
196 }
197 
198 static uint8_t
199 legacy_get_status(struct virtio_hw *hw)
200 {
201 	uint8_t dst;
202 
203 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_STATUS);
204 	return dst;
205 }
206 
207 static void
208 legacy_set_status(struct virtio_hw *hw, uint8_t status)
209 {
210 	rte_pci_ioport_write(VTPCI_IO(hw), &status, 1, VIRTIO_PCI_STATUS);
211 }
212 
213 static uint8_t
214 legacy_get_isr(struct virtio_hw *hw)
215 {
216 	uint8_t dst;
217 
218 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_ISR);
219 	return dst;
220 }
221 
222 /* Enable one vector (0) for Link State Interrupt */
223 static uint16_t
224 legacy_set_config_irq(struct virtio_hw *hw, uint16_t vec)
225 {
226 	uint16_t dst;
227 
228 	rte_pci_ioport_write(VTPCI_IO(hw), &vec, 2, VIRTIO_MSI_CONFIG_VECTOR);
229 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_CONFIG_VECTOR);
230 	return dst;
231 }
232 
233 static uint16_t
234 legacy_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec)
235 {
236 	uint16_t dst;
237 
238 	rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
239 		VIRTIO_PCI_QUEUE_SEL);
240 	rte_pci_ioport_write(VTPCI_IO(hw), &vec, 2, VIRTIO_MSI_QUEUE_VECTOR);
241 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_QUEUE_VECTOR);
242 	return dst;
243 }
244 
245 static uint16_t
246 legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
247 {
248 	uint16_t dst;
249 
250 	rte_pci_ioport_write(VTPCI_IO(hw), &queue_id, 2, VIRTIO_PCI_QUEUE_SEL);
251 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_PCI_QUEUE_NUM);
252 	return dst;
253 }
254 
255 static int
256 legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
257 {
258 	uint32_t src;
259 
260 	/* Virtio PCI device VIRTIO_PCI_QUEUE_PFN register is 32bit,
261 	 * and only accepts 32 bit page frame number.
262 	 * Check if the allocated physical memory exceeds 16TB.
263 	 */
264 	if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >>
265 			(VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
266 		PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
267 		return -1;
268 	}
269 
270 	rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
271 		VIRTIO_PCI_QUEUE_SEL);
272 	src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
273 	rte_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
274 
275 	return 0;
276 }
277 
278 static void
279 legacy_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
280 {
281 	uint32_t src = 0;
282 
283 	rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
284 		VIRTIO_PCI_QUEUE_SEL);
285 	rte_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
286 }
287 
288 static void
289 legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
290 {
291 	rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
292 		VIRTIO_PCI_QUEUE_NOTIFY);
293 }
294 
295 static void
296 legacy_intr_detect(struct virtio_hw *hw)
297 {
298 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
299 
300 	dev->msix_status = vtpci_msix_detect(VTPCI_DEV(hw));
301 	hw->intr_lsc = !!dev->msix_status;
302 }
303 
304 static int
305 legacy_dev_close(struct virtio_hw *hw)
306 {
307 	rte_pci_unmap_device(VTPCI_DEV(hw));
308 	rte_pci_ioport_unmap(VTPCI_IO(hw));
309 
310 	return 0;
311 }
312 
313 const struct virtio_ops legacy_ops = {
314 	.read_dev_cfg	= legacy_read_dev_config,
315 	.write_dev_cfg	= legacy_write_dev_config,
316 	.get_status	= legacy_get_status,
317 	.set_status	= legacy_set_status,
318 	.get_features	= legacy_get_features,
319 	.set_features	= legacy_set_features,
320 	.features_ok	= legacy_features_ok,
321 	.get_isr	= legacy_get_isr,
322 	.set_config_irq	= legacy_set_config_irq,
323 	.set_queue_irq  = legacy_set_queue_irq,
324 	.get_queue_num	= legacy_get_queue_num,
325 	.setup_queue	= legacy_setup_queue,
326 	.del_queue	= legacy_del_queue,
327 	.notify_queue	= legacy_notify_queue,
328 	.intr_detect	= legacy_intr_detect,
329 	.dev_close	= legacy_dev_close,
330 };
331 
332 static inline void
333 io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
334 {
335 	rte_write32(val & ((1ULL << 32) - 1), lo);
336 	rte_write32(val >> 32,		     hi);
337 }
338 
339 static void
340 modern_read_dev_config(struct virtio_hw *hw, size_t offset,
341 		       void *dst, int length)
342 {
343 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
344 	int i;
345 	uint8_t *p;
346 	uint8_t old_gen, new_gen;
347 
348 	do {
349 		old_gen = rte_read8(&dev->common_cfg->config_generation);
350 
351 		p = dst;
352 		for (i = 0;  i < length; i++)
353 			*p++ = rte_read8((uint8_t *)dev->dev_cfg + offset + i);
354 
355 		new_gen = rte_read8(&dev->common_cfg->config_generation);
356 	} while (old_gen != new_gen);
357 }
358 
359 static void
360 modern_write_dev_config(struct virtio_hw *hw, size_t offset,
361 			const void *src, int length)
362 {
363 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
364 	int i;
365 	const uint8_t *p = src;
366 
367 	for (i = 0;  i < length; i++)
368 		rte_write8((*p++), (((uint8_t *)dev->dev_cfg) + offset + i));
369 }
370 
371 static uint64_t
372 modern_get_features(struct virtio_hw *hw)
373 {
374 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
375 	uint32_t features_lo, features_hi;
376 
377 	rte_write32(0, &dev->common_cfg->device_feature_select);
378 	features_lo = rte_read32(&dev->common_cfg->device_feature);
379 
380 	rte_write32(1, &dev->common_cfg->device_feature_select);
381 	features_hi = rte_read32(&dev->common_cfg->device_feature);
382 
383 	return ((uint64_t)features_hi << 32) | features_lo;
384 }
385 
386 static void
387 modern_set_features(struct virtio_hw *hw, uint64_t features)
388 {
389 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
390 
391 	rte_write32(0, &dev->common_cfg->guest_feature_select);
392 	rte_write32(features & ((1ULL << 32) - 1),
393 		    &dev->common_cfg->guest_feature);
394 
395 	rte_write32(1, &dev->common_cfg->guest_feature_select);
396 	rte_write32(features >> 32,
397 		    &dev->common_cfg->guest_feature);
398 }
399 
400 static int
401 modern_features_ok(struct virtio_hw *hw)
402 {
403 	if (!virtio_with_feature(hw, VIRTIO_F_VERSION_1)) {
404 		PMD_INIT_LOG(ERR, "Version 1+ required with modern devices");
405 		return -1;
406 	}
407 
408 	return 0;
409 }
410 
411 static uint8_t
412 modern_get_status(struct virtio_hw *hw)
413 {
414 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
415 
416 	return rte_read8(&dev->common_cfg->device_status);
417 }
418 
419 static void
420 modern_set_status(struct virtio_hw *hw, uint8_t status)
421 {
422 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
423 
424 	rte_write8(status, &dev->common_cfg->device_status);
425 }
426 
427 static uint8_t
428 modern_get_isr(struct virtio_hw *hw)
429 {
430 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
431 
432 	return rte_read8(dev->isr);
433 }
434 
435 static uint16_t
436 modern_set_config_irq(struct virtio_hw *hw, uint16_t vec)
437 {
438 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
439 
440 	rte_write16(vec, &dev->common_cfg->msix_config);
441 	return rte_read16(&dev->common_cfg->msix_config);
442 }
443 
444 static uint16_t
445 modern_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec)
446 {
447 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
448 
449 	rte_write16(vq->vq_queue_index, &dev->common_cfg->queue_select);
450 	rte_write16(vec, &dev->common_cfg->queue_msix_vector);
451 	return rte_read16(&dev->common_cfg->queue_msix_vector);
452 }
453 
454 static uint16_t
455 modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
456 {
457 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
458 
459 	rte_write16(queue_id, &dev->common_cfg->queue_select);
460 	return rte_read16(&dev->common_cfg->queue_size);
461 }
462 
463 static int
464 modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
465 {
466 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
467 	uint64_t desc_addr, avail_addr, used_addr;
468 	uint16_t notify_off;
469 
470 	desc_addr = vq->vq_ring_mem;
471 	avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
472 	used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
473 							 ring[vq->vq_nentries]),
474 				   VIRTIO_VRING_ALIGN);
475 
476 	rte_write16(vq->vq_queue_index, &dev->common_cfg->queue_select);
477 
478 	io_write64_twopart(desc_addr, &dev->common_cfg->queue_desc_lo,
479 				      &dev->common_cfg->queue_desc_hi);
480 	io_write64_twopart(avail_addr, &dev->common_cfg->queue_avail_lo,
481 				       &dev->common_cfg->queue_avail_hi);
482 	io_write64_twopart(used_addr, &dev->common_cfg->queue_used_lo,
483 				      &dev->common_cfg->queue_used_hi);
484 
485 	notify_off = rte_read16(&dev->common_cfg->queue_notify_off);
486 	vq->notify_addr = (void *)((uint8_t *)dev->notify_base +
487 				notify_off * dev->notify_off_multiplier);
488 
489 	rte_write16(1, &dev->common_cfg->queue_enable);
490 
491 	PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
492 	PMD_INIT_LOG(DEBUG, "\t desc_addr: %" PRIx64, desc_addr);
493 	PMD_INIT_LOG(DEBUG, "\t aval_addr: %" PRIx64, avail_addr);
494 	PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr);
495 	PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)",
496 		vq->notify_addr, notify_off);
497 
498 	return 0;
499 }
500 
501 static void
502 modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
503 {
504 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
505 
506 	rte_write16(vq->vq_queue_index, &dev->common_cfg->queue_select);
507 
508 	io_write64_twopart(0, &dev->common_cfg->queue_desc_lo,
509 				  &dev->common_cfg->queue_desc_hi);
510 	io_write64_twopart(0, &dev->common_cfg->queue_avail_lo,
511 				  &dev->common_cfg->queue_avail_hi);
512 	io_write64_twopart(0, &dev->common_cfg->queue_used_lo,
513 				  &dev->common_cfg->queue_used_hi);
514 
515 	rte_write16(0, &dev->common_cfg->queue_enable);
516 }
517 
518 static void
519 modern_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
520 {
521 	uint32_t notify_data;
522 
523 	if (!virtio_with_feature(hw, VIRTIO_F_NOTIFICATION_DATA)) {
524 		rte_write16(vq->vq_queue_index, vq->notify_addr);
525 		return;
526 	}
527 
528 	if (virtio_with_packed_queue(hw)) {
529 		/*
530 		 * Bit[0:15]: vq queue index
531 		 * Bit[16:30]: avail index
532 		 * Bit[31]: avail wrap counter
533 		 */
534 		notify_data = ((uint32_t)(!!(vq->vq_packed.cached_flags &
535 				VRING_PACKED_DESC_F_AVAIL)) << 31) |
536 				((uint32_t)vq->vq_avail_idx << 16) |
537 				vq->vq_queue_index;
538 	} else {
539 		/*
540 		 * Bit[0:15]: vq queue index
541 		 * Bit[16:31]: avail index
542 		 */
543 		notify_data = ((uint32_t)vq->vq_avail_idx << 16) |
544 				vq->vq_queue_index;
545 	}
546 	rte_write32(notify_data, vq->notify_addr);
547 }
548 
549 
550 
551 static void
552 modern_intr_detect(struct virtio_hw *hw)
553 {
554 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
555 
556 	dev->msix_status = vtpci_msix_detect(VTPCI_DEV(hw));
557 	hw->intr_lsc = !!dev->msix_status;
558 }
559 
560 static int
561 modern_dev_close(struct virtio_hw *hw)
562 {
563 	rte_pci_unmap_device(VTPCI_DEV(hw));
564 
565 	return 0;
566 }
567 
568 const struct virtio_ops modern_ops = {
569 	.read_dev_cfg	= modern_read_dev_config,
570 	.write_dev_cfg	= modern_write_dev_config,
571 	.get_status	= modern_get_status,
572 	.set_status	= modern_set_status,
573 	.get_features	= modern_get_features,
574 	.set_features	= modern_set_features,
575 	.features_ok	= modern_features_ok,
576 	.get_isr	= modern_get_isr,
577 	.set_config_irq	= modern_set_config_irq,
578 	.set_queue_irq  = modern_set_queue_irq,
579 	.get_queue_num	= modern_get_queue_num,
580 	.setup_queue	= modern_setup_queue,
581 	.del_queue	= modern_del_queue,
582 	.notify_queue	= modern_notify_queue,
583 	.intr_detect	= modern_intr_detect,
584 	.dev_close	= modern_dev_close,
585 };
586 
587 static void *
588 get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
589 {
590 	uint8_t  bar    = cap->bar;
591 	uint32_t length = cap->length;
592 	uint32_t offset = cap->offset;
593 	uint8_t *base;
594 
595 	if (bar >= PCI_MAX_RESOURCE) {
596 		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
597 		return NULL;
598 	}
599 
600 	if (offset + length < offset) {
601 		PMD_INIT_LOG(ERR, "offset(%u) + length(%u) overflows",
602 			offset, length);
603 		return NULL;
604 	}
605 
606 	if (offset + length > dev->mem_resource[bar].len) {
607 		PMD_INIT_LOG(ERR,
608 			"invalid cap: overflows bar space: %u > %" PRIu64,
609 			offset + length, dev->mem_resource[bar].len);
610 		return NULL;
611 	}
612 
613 	base = dev->mem_resource[bar].addr;
614 	if (base == NULL) {
615 		PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
616 		return NULL;
617 	}
618 
619 	return base + offset;
620 }
621 
622 static int
623 virtio_read_caps(struct rte_pci_device *pci_dev, struct virtio_hw *hw)
624 {
625 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
626 	uint8_t pos;
627 	struct virtio_pci_cap cap;
628 	int ret;
629 
630 	if (rte_pci_map_device(pci_dev)) {
631 		PMD_INIT_LOG(DEBUG, "failed to map pci device!");
632 		return -1;
633 	}
634 
635 	ret = rte_pci_read_config(pci_dev, &pos, 1, PCI_CAPABILITY_LIST);
636 	if (ret != 1) {
637 		PMD_INIT_LOG(DEBUG,
638 			     "failed to read pci capability list, ret %d", ret);
639 		return -1;
640 	}
641 
642 	while (pos) {
643 		ret = rte_pci_read_config(pci_dev, &cap, 2, pos);
644 		if (ret != 2) {
645 			PMD_INIT_LOG(DEBUG,
646 				     "failed to read pci cap at pos: %x ret %d",
647 				     pos, ret);
648 			break;
649 		}
650 
651 		if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
652 			/* Transitional devices would also have this capability,
653 			 * that's why we also check if msix is enabled.
654 			 * 1st byte is cap ID; 2nd byte is the position of next
655 			 * cap; next two bytes are the flags.
656 			 */
657 			uint16_t flags;
658 
659 			ret = rte_pci_read_config(pci_dev, &flags, sizeof(flags),
660 					pos + 2);
661 			if (ret != sizeof(flags)) {
662 				PMD_INIT_LOG(DEBUG,
663 					     "failed to read pci cap at pos:"
664 					     " %x ret %d", pos + 2, ret);
665 				break;
666 			}
667 
668 			if (flags & PCI_MSIX_ENABLE)
669 				dev->msix_status = VIRTIO_MSIX_ENABLED;
670 			else
671 				dev->msix_status = VIRTIO_MSIX_DISABLED;
672 		}
673 
674 		if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
675 			PMD_INIT_LOG(DEBUG,
676 				"[%2x] skipping non VNDR cap id: %02x",
677 				pos, cap.cap_vndr);
678 			goto next;
679 		}
680 
681 		ret = rte_pci_read_config(pci_dev, &cap, sizeof(cap), pos);
682 		if (ret != sizeof(cap)) {
683 			PMD_INIT_LOG(DEBUG,
684 				     "failed to read pci cap at pos: %x ret %d",
685 				     pos, ret);
686 			break;
687 		}
688 
689 		PMD_INIT_LOG(DEBUG,
690 			"[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
691 			pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
692 
693 		switch (cap.cfg_type) {
694 		case VIRTIO_PCI_CAP_COMMON_CFG:
695 			dev->common_cfg = get_cfg_addr(pci_dev, &cap);
696 			break;
697 		case VIRTIO_PCI_CAP_NOTIFY_CFG:
698 			ret = rte_pci_read_config(pci_dev,
699 					&dev->notify_off_multiplier,
700 					4, pos + sizeof(cap));
701 			if (ret != 4)
702 				PMD_INIT_LOG(DEBUG,
703 					"failed to read notify_off_multiplier, ret %d",
704 					ret);
705 			else
706 				dev->notify_base = get_cfg_addr(pci_dev, &cap);
707 			break;
708 		case VIRTIO_PCI_CAP_DEVICE_CFG:
709 			dev->dev_cfg = get_cfg_addr(pci_dev, &cap);
710 			break;
711 		case VIRTIO_PCI_CAP_ISR_CFG:
712 			dev->isr = get_cfg_addr(pci_dev, &cap);
713 			break;
714 		}
715 
716 next:
717 		pos = cap.cap_next;
718 	}
719 
720 	if (dev->common_cfg == NULL || dev->notify_base == NULL ||
721 	    dev->dev_cfg == NULL    || dev->isr == NULL) {
722 		PMD_INIT_LOG(INFO, "no modern virtio pci device found.");
723 		return -1;
724 	}
725 
726 	PMD_INIT_LOG(INFO, "found modern virtio pci device.");
727 
728 	PMD_INIT_LOG(DEBUG, "common cfg mapped at: %p", dev->common_cfg);
729 	PMD_INIT_LOG(DEBUG, "device cfg mapped at: %p", dev->dev_cfg);
730 	PMD_INIT_LOG(DEBUG, "isr cfg mapped at: %p", dev->isr);
731 	PMD_INIT_LOG(DEBUG, "notify base: %p, notify off multiplier: %u",
732 		dev->notify_base, dev->notify_off_multiplier);
733 
734 	return 0;
735 }
736 
737 /*
738  * Return -1:
739  *   if there is error mapping with VFIO/UIO.
740  *   if port map error when driver type is KDRV_NONE.
741  *   if marked as allowed but driver type is KDRV_UNKNOWN.
742  * Return 1 if kernel driver is managing the device.
743  * Return 0 on success.
744  */
745 int
746 vtpci_init(struct rte_pci_device *pci_dev, struct virtio_pci_dev *dev)
747 {
748 	struct virtio_hw *hw = &dev->hw;
749 
750 	RTE_BUILD_BUG_ON(offsetof(struct virtio_pci_dev, hw) != 0);
751 
752 	/*
753 	 * Try if we can succeed reading virtio pci caps, which exists
754 	 * only on modern pci device. If failed, we fallback to legacy
755 	 * virtio handling.
756 	 */
757 	if (virtio_read_caps(pci_dev, hw) == 0) {
758 		PMD_INIT_LOG(INFO, "modern virtio pci detected.");
759 		VIRTIO_OPS(hw) = &modern_ops;
760 		dev->modern = true;
761 		goto msix_detect;
762 	}
763 
764 	PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
765 	if (rte_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0) {
766 		rte_pci_unmap_device(pci_dev);
767 		if (pci_dev->kdrv == RTE_PCI_KDRV_UNKNOWN &&
768 		    (!pci_dev->device.devargs ||
769 		     pci_dev->device.devargs->bus !=
770 		     rte_bus_find_by_name("pci"))) {
771 			PMD_INIT_LOG(INFO,
772 				"skip kernel managed virtio device.");
773 			return 1;
774 		}
775 		return -1;
776 	}
777 
778 	VIRTIO_OPS(hw) = &legacy_ops;
779 	dev->modern = false;
780 
781 msix_detect:
782 	VIRTIO_OPS(hw)->intr_detect(hw);
783 
784 	return 0;
785 }
786 
787 void vtpci_legacy_ioport_unmap(struct virtio_hw *hw)
788 {
789 	rte_pci_ioport_unmap(VTPCI_IO(hw));
790 }
791 
792 int vtpci_legacy_ioport_map(struct virtio_hw *hw)
793 {
794 	return rte_pci_ioport_map(VTPCI_DEV(hw), 0, VTPCI_IO(hw));
795 }
796