xref: /dpdk/drivers/net/virtio/virtio_pci.c (revision 6c02043e9967a9d8f6e8c058256e257efe1d6d1a)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 #include <stdint.h>
5 
6 #ifdef RTE_EXEC_ENV_LINUX
7  #include <dirent.h>
8  #include <fcntl.h>
9 #endif
10 
11 #include <rte_io.h>
12 #include <rte_bus.h>
13 
14 #include "virtio_pci.h"
15 #include "virtio_logs.h"
16 #include "virtqueue.h"
17 
18 /*
19  * Following macros are derived from linux/pci_regs.h, however,
20  * we can't simply include that header here, as there is no such
21  * file for non-Linux platform.
22  */
23 #define PCI_CAPABILITY_LIST	0x34
24 #define PCI_CAP_ID_VNDR		0x09
25 #define PCI_CAP_ID_MSIX		0x11
26 
27 /*
28  * The remaining space is defined by each driver as the per-driver
29  * configuration space.
30  */
31 #define VIRTIO_PCI_CONFIG(dev) \
32 		(((dev)->msix_status == VIRTIO_MSIX_ENABLED) ? 24 : 20)
33 
34 
35 struct virtio_pci_internal {
36 	struct rte_pci_ioport io;
37 };
38 
39 #define VTPCI_IO(hw) (&virtio_pci_internal[(hw)->port_id].io)
40 
41 struct virtio_pci_internal virtio_pci_internal[RTE_MAX_ETHPORTS];
42 
43 static inline int
44 check_vq_phys_addr_ok(struct virtqueue *vq)
45 {
46 	/* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
47 	 * and only accepts 32 bit page frame number.
48 	 * Check if the allocated physical memory exceeds 16TB.
49 	 */
50 	if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >>
51 			(VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
52 		PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
53 		return 0;
54 	}
55 
56 	return 1;
57 }
58 
59 #define PCI_MSIX_ENABLE 0x8000
60 
61 static enum virtio_msix_status
62 vtpci_msix_detect(struct rte_pci_device *dev)
63 {
64 	uint8_t pos;
65 	int ret;
66 
67 	ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
68 	if (ret != 1) {
69 		PMD_INIT_LOG(DEBUG,
70 			     "failed to read pci capability list, ret %d", ret);
71 		return VIRTIO_MSIX_NONE;
72 	}
73 
74 	while (pos) {
75 		uint8_t cap[2];
76 
77 		ret = rte_pci_read_config(dev, cap, sizeof(cap), pos);
78 		if (ret != sizeof(cap)) {
79 			PMD_INIT_LOG(DEBUG,
80 				     "failed to read pci cap at pos: %x ret %d",
81 				     pos, ret);
82 			break;
83 		}
84 
85 		if (cap[0] == PCI_CAP_ID_MSIX) {
86 			uint16_t flags;
87 
88 			ret = rte_pci_read_config(dev, &flags, sizeof(flags),
89 					pos + sizeof(cap));
90 			if (ret != sizeof(flags)) {
91 				PMD_INIT_LOG(DEBUG,
92 					     "failed to read pci cap at pos:"
93 					     " %x ret %d", pos + 2, ret);
94 				break;
95 			}
96 
97 			if (flags & PCI_MSIX_ENABLE)
98 				return VIRTIO_MSIX_ENABLED;
99 			else
100 				return VIRTIO_MSIX_DISABLED;
101 		}
102 
103 		pos = cap[1];
104 	}
105 
106 	return VIRTIO_MSIX_NONE;
107 }
108 
109 /*
110  * Since we are in legacy mode:
111  * http://ozlabs.org/~rusty/virtio-spec/virtio-0.9.5.pdf
112  *
113  * "Note that this is possible because while the virtio header is PCI (i.e.
114  * little) endian, the device-specific region is encoded in the native endian of
115  * the guest (where such distinction is applicable)."
116  *
117  * For powerpc which supports both, qemu supposes that cpu is big endian and
118  * enforces this for the virtio-net stuff.
119  */
120 static void
121 legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
122 		       void *dst, int length)
123 {
124 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
125 #ifdef RTE_ARCH_PPC_64
126 	int size;
127 
128 	while (length > 0) {
129 		if (length >= 4) {
130 			size = 4;
131 			rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
132 				VIRTIO_PCI_CONFIG(dev) + offset);
133 			*(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t *)dst);
134 		} else if (length >= 2) {
135 			size = 2;
136 			rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
137 				VIRTIO_PCI_CONFIG(dev) + offset);
138 			*(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t *)dst);
139 		} else {
140 			size = 1;
141 			rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
142 				VIRTIO_PCI_CONFIG(dev) + offset);
143 		}
144 
145 		dst = (char *)dst + size;
146 		offset += size;
147 		length -= size;
148 	}
149 #else
150 	rte_pci_ioport_read(VTPCI_IO(hw), dst, length,
151 		VIRTIO_PCI_CONFIG(dev) + offset);
152 #endif
153 }
154 
155 static void
156 legacy_write_dev_config(struct virtio_hw *hw, size_t offset,
157 			const void *src, int length)
158 {
159 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
160 #ifdef RTE_ARCH_PPC_64
161 	union {
162 		uint32_t u32;
163 		uint16_t u16;
164 	} tmp;
165 	int size;
166 
167 	while (length > 0) {
168 		if (length >= 4) {
169 			size = 4;
170 			tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src);
171 			rte_pci_ioport_write(VTPCI_IO(hw), &tmp.u32, size,
172 				VIRTIO_PCI_CONFIG(dev) + offset);
173 		} else if (length >= 2) {
174 			size = 2;
175 			tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src);
176 			rte_pci_ioport_write(VTPCI_IO(hw), &tmp.u16, size,
177 				VIRTIO_PCI_CONFIG(dev) + offset);
178 		} else {
179 			size = 1;
180 			rte_pci_ioport_write(VTPCI_IO(hw), src, size,
181 				VIRTIO_PCI_CONFIG(dev) + offset);
182 		}
183 
184 		src = (const char *)src + size;
185 		offset += size;
186 		length -= size;
187 	}
188 #else
189 	rte_pci_ioport_write(VTPCI_IO(hw), src, length,
190 		VIRTIO_PCI_CONFIG(dev) + offset);
191 #endif
192 }
193 
194 static uint64_t
195 legacy_get_features(struct virtio_hw *hw)
196 {
197 	uint32_t dst;
198 
199 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 4, VIRTIO_PCI_HOST_FEATURES);
200 	return dst;
201 }
202 
203 static void
204 legacy_set_features(struct virtio_hw *hw, uint64_t features)
205 {
206 	if ((features >> 32) != 0) {
207 		PMD_DRV_LOG(ERR,
208 			"only 32 bit features are allowed for legacy virtio!");
209 		return;
210 	}
211 	rte_pci_ioport_write(VTPCI_IO(hw), &features, 4,
212 		VIRTIO_PCI_GUEST_FEATURES);
213 }
214 
215 static int
216 legacy_features_ok(struct virtio_hw *hw __rte_unused)
217 {
218 	return 0;
219 }
220 
221 static uint8_t
222 legacy_get_status(struct virtio_hw *hw)
223 {
224 	uint8_t dst;
225 
226 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_STATUS);
227 	return dst;
228 }
229 
230 static void
231 legacy_set_status(struct virtio_hw *hw, uint8_t status)
232 {
233 	rte_pci_ioport_write(VTPCI_IO(hw), &status, 1, VIRTIO_PCI_STATUS);
234 }
235 
236 static uint8_t
237 legacy_get_isr(struct virtio_hw *hw)
238 {
239 	uint8_t dst;
240 
241 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_ISR);
242 	return dst;
243 }
244 
245 /* Enable one vector (0) for Link State Intrerrupt */
246 static uint16_t
247 legacy_set_config_irq(struct virtio_hw *hw, uint16_t vec)
248 {
249 	uint16_t dst;
250 
251 	rte_pci_ioport_write(VTPCI_IO(hw), &vec, 2, VIRTIO_MSI_CONFIG_VECTOR);
252 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_CONFIG_VECTOR);
253 	return dst;
254 }
255 
256 static uint16_t
257 legacy_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec)
258 {
259 	uint16_t dst;
260 
261 	rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
262 		VIRTIO_PCI_QUEUE_SEL);
263 	rte_pci_ioport_write(VTPCI_IO(hw), &vec, 2, VIRTIO_MSI_QUEUE_VECTOR);
264 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_QUEUE_VECTOR);
265 	return dst;
266 }
267 
268 static uint16_t
269 legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
270 {
271 	uint16_t dst;
272 
273 	rte_pci_ioport_write(VTPCI_IO(hw), &queue_id, 2, VIRTIO_PCI_QUEUE_SEL);
274 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_PCI_QUEUE_NUM);
275 	return dst;
276 }
277 
278 static int
279 legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
280 {
281 	uint32_t src;
282 
283 	if (!check_vq_phys_addr_ok(vq))
284 		return -1;
285 
286 	rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
287 		VIRTIO_PCI_QUEUE_SEL);
288 	src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
289 	rte_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
290 
291 	return 0;
292 }
293 
294 static void
295 legacy_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
296 {
297 	uint32_t src = 0;
298 
299 	rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
300 		VIRTIO_PCI_QUEUE_SEL);
301 	rte_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
302 }
303 
304 static void
305 legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
306 {
307 	rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
308 		VIRTIO_PCI_QUEUE_NOTIFY);
309 }
310 
311 static void
312 legacy_intr_detect(struct virtio_hw *hw)
313 {
314 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
315 
316 	dev->msix_status = vtpci_msix_detect(dev->pci_dev);
317 	hw->intr_lsc = !!dev->msix_status;
318 }
319 
320 static int
321 legacy_dev_close(struct virtio_hw *hw)
322 {
323 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
324 
325 	rte_pci_unmap_device(dev->pci_dev);
326 	rte_pci_ioport_unmap(VTPCI_IO(hw));
327 
328 	return 0;
329 }
330 
331 const struct virtio_ops legacy_ops = {
332 	.read_dev_cfg	= legacy_read_dev_config,
333 	.write_dev_cfg	= legacy_write_dev_config,
334 	.get_status	= legacy_get_status,
335 	.set_status	= legacy_set_status,
336 	.get_features	= legacy_get_features,
337 	.set_features	= legacy_set_features,
338 	.features_ok	= legacy_features_ok,
339 	.get_isr	= legacy_get_isr,
340 	.set_config_irq	= legacy_set_config_irq,
341 	.set_queue_irq  = legacy_set_queue_irq,
342 	.get_queue_num	= legacy_get_queue_num,
343 	.setup_queue	= legacy_setup_queue,
344 	.del_queue	= legacy_del_queue,
345 	.notify_queue	= legacy_notify_queue,
346 	.intr_detect	= legacy_intr_detect,
347 	.dev_close	= legacy_dev_close,
348 };
349 
350 static inline void
351 io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
352 {
353 	rte_write32(val & ((1ULL << 32) - 1), lo);
354 	rte_write32(val >> 32,		     hi);
355 }
356 
357 static void
358 modern_read_dev_config(struct virtio_hw *hw, size_t offset,
359 		       void *dst, int length)
360 {
361 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
362 	int i;
363 	uint8_t *p;
364 	uint8_t old_gen, new_gen;
365 
366 	do {
367 		old_gen = rte_read8(&dev->common_cfg->config_generation);
368 
369 		p = dst;
370 		for (i = 0;  i < length; i++)
371 			*p++ = rte_read8((uint8_t *)dev->dev_cfg + offset + i);
372 
373 		new_gen = rte_read8(&dev->common_cfg->config_generation);
374 	} while (old_gen != new_gen);
375 }
376 
377 static void
378 modern_write_dev_config(struct virtio_hw *hw, size_t offset,
379 			const void *src, int length)
380 {
381 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
382 	int i;
383 	const uint8_t *p = src;
384 
385 	for (i = 0;  i < length; i++)
386 		rte_write8((*p++), (((uint8_t *)dev->dev_cfg) + offset + i));
387 }
388 
389 static uint64_t
390 modern_get_features(struct virtio_hw *hw)
391 {
392 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
393 	uint32_t features_lo, features_hi;
394 
395 	rte_write32(0, &dev->common_cfg->device_feature_select);
396 	features_lo = rte_read32(&dev->common_cfg->device_feature);
397 
398 	rte_write32(1, &dev->common_cfg->device_feature_select);
399 	features_hi = rte_read32(&dev->common_cfg->device_feature);
400 
401 	return ((uint64_t)features_hi << 32) | features_lo;
402 }
403 
404 static void
405 modern_set_features(struct virtio_hw *hw, uint64_t features)
406 {
407 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
408 
409 	rte_write32(0, &dev->common_cfg->guest_feature_select);
410 	rte_write32(features & ((1ULL << 32) - 1),
411 		    &dev->common_cfg->guest_feature);
412 
413 	rte_write32(1, &dev->common_cfg->guest_feature_select);
414 	rte_write32(features >> 32,
415 		    &dev->common_cfg->guest_feature);
416 }
417 
418 static int
419 modern_features_ok(struct virtio_hw *hw)
420 {
421 	if (!virtio_with_feature(hw, VIRTIO_F_VERSION_1)) {
422 		PMD_INIT_LOG(ERR, "Version 1+ required with modern devices\n");
423 		return -1;
424 	}
425 
426 	return 0;
427 }
428 
429 static uint8_t
430 modern_get_status(struct virtio_hw *hw)
431 {
432 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
433 
434 	return rte_read8(&dev->common_cfg->device_status);
435 }
436 
437 static void
438 modern_set_status(struct virtio_hw *hw, uint8_t status)
439 {
440 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
441 
442 	rte_write8(status, &dev->common_cfg->device_status);
443 }
444 
445 static uint8_t
446 modern_get_isr(struct virtio_hw *hw)
447 {
448 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
449 
450 	return rte_read8(dev->isr);
451 }
452 
453 static uint16_t
454 modern_set_config_irq(struct virtio_hw *hw, uint16_t vec)
455 {
456 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
457 
458 	rte_write16(vec, &dev->common_cfg->msix_config);
459 	return rte_read16(&dev->common_cfg->msix_config);
460 }
461 
462 static uint16_t
463 modern_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec)
464 {
465 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
466 
467 	rte_write16(vq->vq_queue_index, &dev->common_cfg->queue_select);
468 	rte_write16(vec, &dev->common_cfg->queue_msix_vector);
469 	return rte_read16(&dev->common_cfg->queue_msix_vector);
470 }
471 
472 static uint16_t
473 modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
474 {
475 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
476 
477 	rte_write16(queue_id, &dev->common_cfg->queue_select);
478 	return rte_read16(&dev->common_cfg->queue_size);
479 }
480 
481 static int
482 modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
483 {
484 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
485 	uint64_t desc_addr, avail_addr, used_addr;
486 	uint16_t notify_off;
487 
488 	if (!check_vq_phys_addr_ok(vq))
489 		return -1;
490 
491 	desc_addr = vq->vq_ring_mem;
492 	avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
493 	used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
494 							 ring[vq->vq_nentries]),
495 				   VIRTIO_VRING_ALIGN);
496 
497 	rte_write16(vq->vq_queue_index, &dev->common_cfg->queue_select);
498 
499 	io_write64_twopart(desc_addr, &dev->common_cfg->queue_desc_lo,
500 				      &dev->common_cfg->queue_desc_hi);
501 	io_write64_twopart(avail_addr, &dev->common_cfg->queue_avail_lo,
502 				       &dev->common_cfg->queue_avail_hi);
503 	io_write64_twopart(used_addr, &dev->common_cfg->queue_used_lo,
504 				      &dev->common_cfg->queue_used_hi);
505 
506 	notify_off = rte_read16(&dev->common_cfg->queue_notify_off);
507 	vq->notify_addr = (void *)((uint8_t *)dev->notify_base +
508 				notify_off * dev->notify_off_multiplier);
509 
510 	rte_write16(1, &dev->common_cfg->queue_enable);
511 
512 	PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
513 	PMD_INIT_LOG(DEBUG, "\t desc_addr: %" PRIx64, desc_addr);
514 	PMD_INIT_LOG(DEBUG, "\t aval_addr: %" PRIx64, avail_addr);
515 	PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr);
516 	PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)",
517 		vq->notify_addr, notify_off);
518 
519 	return 0;
520 }
521 
522 static void
523 modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
524 {
525 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
526 
527 	rte_write16(vq->vq_queue_index, &dev->common_cfg->queue_select);
528 
529 	io_write64_twopart(0, &dev->common_cfg->queue_desc_lo,
530 				  &dev->common_cfg->queue_desc_hi);
531 	io_write64_twopart(0, &dev->common_cfg->queue_avail_lo,
532 				  &dev->common_cfg->queue_avail_hi);
533 	io_write64_twopart(0, &dev->common_cfg->queue_used_lo,
534 				  &dev->common_cfg->queue_used_hi);
535 
536 	rte_write16(0, &dev->common_cfg->queue_enable);
537 }
538 
539 static void
540 modern_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
541 {
542 	uint32_t notify_data;
543 
544 	if (!virtio_with_feature(hw, VIRTIO_F_NOTIFICATION_DATA)) {
545 		rte_write16(vq->vq_queue_index, vq->notify_addr);
546 		return;
547 	}
548 
549 	if (virtio_with_packed_queue(hw)) {
550 		/*
551 		 * Bit[0:15]: vq queue index
552 		 * Bit[16:30]: avail index
553 		 * Bit[31]: avail wrap counter
554 		 */
555 		notify_data = ((uint32_t)(!!(vq->vq_packed.cached_flags &
556 				VRING_PACKED_DESC_F_AVAIL)) << 31) |
557 				((uint32_t)vq->vq_avail_idx << 16) |
558 				vq->vq_queue_index;
559 	} else {
560 		/*
561 		 * Bit[0:15]: vq queue index
562 		 * Bit[16:31]: avail index
563 		 */
564 		notify_data = ((uint32_t)vq->vq_avail_idx << 16) |
565 				vq->vq_queue_index;
566 	}
567 	rte_write32(notify_data, vq->notify_addr);
568 }
569 
570 
571 
572 static void
573 modern_intr_detect(struct virtio_hw *hw)
574 {
575 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
576 
577 	dev->msix_status = vtpci_msix_detect(dev->pci_dev);
578 	hw->intr_lsc = !!dev->msix_status;
579 }
580 
581 static int
582 modern_dev_close(struct virtio_hw *hw)
583 {
584 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
585 
586 	rte_pci_unmap_device(dev->pci_dev);
587 
588 	return 0;
589 }
590 
591 const struct virtio_ops modern_ops = {
592 	.read_dev_cfg	= modern_read_dev_config,
593 	.write_dev_cfg	= modern_write_dev_config,
594 	.get_status	= modern_get_status,
595 	.set_status	= modern_set_status,
596 	.get_features	= modern_get_features,
597 	.set_features	= modern_set_features,
598 	.features_ok	= modern_features_ok,
599 	.get_isr	= modern_get_isr,
600 	.set_config_irq	= modern_set_config_irq,
601 	.set_queue_irq  = modern_set_queue_irq,
602 	.get_queue_num	= modern_get_queue_num,
603 	.setup_queue	= modern_setup_queue,
604 	.del_queue	= modern_del_queue,
605 	.notify_queue	= modern_notify_queue,
606 	.intr_detect	= modern_intr_detect,
607 	.dev_close	= modern_dev_close,
608 };
609 
610 static void *
611 get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
612 {
613 	uint8_t  bar    = cap->bar;
614 	uint32_t length = cap->length;
615 	uint32_t offset = cap->offset;
616 	uint8_t *base;
617 
618 	if (bar >= PCI_MAX_RESOURCE) {
619 		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
620 		return NULL;
621 	}
622 
623 	if (offset + length < offset) {
624 		PMD_INIT_LOG(ERR, "offset(%u) + length(%u) overflows",
625 			offset, length);
626 		return NULL;
627 	}
628 
629 	if (offset + length > dev->mem_resource[bar].len) {
630 		PMD_INIT_LOG(ERR,
631 			"invalid cap: overflows bar space: %u > %" PRIu64,
632 			offset + length, dev->mem_resource[bar].len);
633 		return NULL;
634 	}
635 
636 	base = dev->mem_resource[bar].addr;
637 	if (base == NULL) {
638 		PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
639 		return NULL;
640 	}
641 
642 	return base + offset;
643 }
644 
645 static int
646 virtio_read_caps(struct rte_pci_device *pci_dev, struct virtio_hw *hw)
647 {
648 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
649 	uint8_t pos;
650 	struct virtio_pci_cap cap;
651 	int ret;
652 
653 	if (rte_pci_map_device(pci_dev)) {
654 		PMD_INIT_LOG(DEBUG, "failed to map pci device!");
655 		return -1;
656 	}
657 
658 	ret = rte_pci_read_config(pci_dev, &pos, 1, PCI_CAPABILITY_LIST);
659 	if (ret != 1) {
660 		PMD_INIT_LOG(DEBUG,
661 			     "failed to read pci capability list, ret %d", ret);
662 		return -1;
663 	}
664 
665 	while (pos) {
666 		ret = rte_pci_read_config(pci_dev, &cap, 2, pos);
667 		if (ret != 2) {
668 			PMD_INIT_LOG(DEBUG,
669 				     "failed to read pci cap at pos: %x ret %d",
670 				     pos, ret);
671 			break;
672 		}
673 
674 		if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
675 			/* Transitional devices would also have this capability,
676 			 * that's why we also check if msix is enabled.
677 			 * 1st byte is cap ID; 2nd byte is the position of next
678 			 * cap; next two bytes are the flags.
679 			 */
680 			uint16_t flags;
681 
682 			ret = rte_pci_read_config(pci_dev, &flags, sizeof(flags),
683 					pos + 2);
684 			if (ret != sizeof(flags)) {
685 				PMD_INIT_LOG(DEBUG,
686 					     "failed to read pci cap at pos:"
687 					     " %x ret %d", pos + 2, ret);
688 				break;
689 			}
690 
691 			if (flags & PCI_MSIX_ENABLE)
692 				dev->msix_status = VIRTIO_MSIX_ENABLED;
693 			else
694 				dev->msix_status = VIRTIO_MSIX_DISABLED;
695 		}
696 
697 		if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
698 			PMD_INIT_LOG(DEBUG,
699 				"[%2x] skipping non VNDR cap id: %02x",
700 				pos, cap.cap_vndr);
701 			goto next;
702 		}
703 
704 		ret = rte_pci_read_config(pci_dev, &cap, sizeof(cap), pos);
705 		if (ret != sizeof(cap)) {
706 			PMD_INIT_LOG(DEBUG,
707 				     "failed to read pci cap at pos: %x ret %d",
708 				     pos, ret);
709 			break;
710 		}
711 
712 		PMD_INIT_LOG(DEBUG,
713 			"[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
714 			pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
715 
716 		switch (cap.cfg_type) {
717 		case VIRTIO_PCI_CAP_COMMON_CFG:
718 			dev->common_cfg = get_cfg_addr(pci_dev, &cap);
719 			break;
720 		case VIRTIO_PCI_CAP_NOTIFY_CFG:
721 			ret = rte_pci_read_config(pci_dev,
722 					&dev->notify_off_multiplier,
723 					4, pos + sizeof(cap));
724 			if (ret != 4)
725 				PMD_INIT_LOG(DEBUG,
726 					"failed to read notify_off_multiplier, ret %d",
727 					ret);
728 			else
729 				dev->notify_base = get_cfg_addr(pci_dev, &cap);
730 			break;
731 		case VIRTIO_PCI_CAP_DEVICE_CFG:
732 			dev->dev_cfg = get_cfg_addr(pci_dev, &cap);
733 			break;
734 		case VIRTIO_PCI_CAP_ISR_CFG:
735 			dev->isr = get_cfg_addr(pci_dev, &cap);
736 			break;
737 		}
738 
739 next:
740 		pos = cap.cap_next;
741 	}
742 
743 	if (dev->common_cfg == NULL || dev->notify_base == NULL ||
744 	    dev->dev_cfg == NULL    || dev->isr == NULL) {
745 		PMD_INIT_LOG(INFO, "no modern virtio pci device found.");
746 		return -1;
747 	}
748 
749 	PMD_INIT_LOG(INFO, "found modern virtio pci device.");
750 
751 	PMD_INIT_LOG(DEBUG, "common cfg mapped at: %p", dev->common_cfg);
752 	PMD_INIT_LOG(DEBUG, "device cfg mapped at: %p", dev->dev_cfg);
753 	PMD_INIT_LOG(DEBUG, "isr cfg mapped at: %p", dev->isr);
754 	PMD_INIT_LOG(DEBUG, "notify base: %p, notify off multiplier: %u",
755 		dev->notify_base, dev->notify_off_multiplier);
756 
757 	return 0;
758 }
759 
760 /*
761  * Return -1:
762  *   if there is error mapping with VFIO/UIO.
763  *   if port map error when driver type is KDRV_NONE.
764  *   if marked as allowed but driver type is KDRV_UNKNOWN.
765  * Return 1 if kernel driver is managing the device.
766  * Return 0 on success.
767  */
768 int
769 vtpci_init(struct rte_pci_device *pci_dev, struct virtio_pci_dev *dev)
770 {
771 	struct virtio_hw *hw = &dev->hw;
772 
773 	RTE_BUILD_BUG_ON(offsetof(struct virtio_pci_dev, hw) != 0);
774 
775 	dev->pci_dev = pci_dev;
776 
777 	/*
778 	 * Try if we can succeed reading virtio pci caps, which exists
779 	 * only on modern pci device. If failed, we fallback to legacy
780 	 * virtio handling.
781 	 */
782 	if (virtio_read_caps(pci_dev, hw) == 0) {
783 		PMD_INIT_LOG(INFO, "modern virtio pci detected.");
784 		VIRTIO_OPS(hw) = &modern_ops;
785 		dev->modern = true;
786 		goto msix_detect;
787 	}
788 
789 	PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
790 	if (rte_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0) {
791 		rte_pci_unmap_device(pci_dev);
792 		if (pci_dev->kdrv == RTE_PCI_KDRV_UNKNOWN &&
793 		    (!pci_dev->device.devargs ||
794 		     pci_dev->device.devargs->bus !=
795 		     rte_bus_find_by_name("pci"))) {
796 			PMD_INIT_LOG(INFO,
797 				"skip kernel managed virtio device.");
798 			return 1;
799 		}
800 		return -1;
801 	}
802 
803 	VIRTIO_OPS(hw) = &legacy_ops;
804 	dev->modern = false;
805 
806 msix_detect:
807 	VIRTIO_OPS(hw)->intr_detect(hw);
808 
809 	return 0;
810 }
811 
812 void vtpci_legacy_ioport_unmap(struct virtio_hw *hw)
813 {
814 	rte_pci_ioport_unmap(VTPCI_IO(hw));
815 }
816 
817 int vtpci_legacy_ioport_map(struct virtio_hw *hw)
818 {
819 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
820 
821 	return rte_pci_ioport_map(dev->pci_dev, 0, VTPCI_IO(hw));
822 }
823