xref: /dpdk/drivers/net/virtio/virtio_pci.c (revision daa02b5cddbb8e11b31d41e2bf7bb1ae64dcae2f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 #include <stdint.h>
5 
6 #ifdef RTE_EXEC_ENV_LINUX
7  #include <dirent.h>
8  #include <fcntl.h>
9 #endif
10 
11 #include <rte_io.h>
12 #include <rte_bus.h>
13 
14 #include "virtio_pci.h"
15 #include "virtio_logs.h"
16 #include "virtqueue.h"
17 
18 /*
19  * Following macros are derived from linux/pci_regs.h, however,
20  * we can't simply include that header here, as there is no such
21  * file for non-Linux platform.
22  */
23 #define PCI_CAPABILITY_LIST	0x34
24 #define PCI_CAP_ID_VNDR		0x09
25 #define PCI_CAP_ID_MSIX		0x11
26 
27 /*
28  * The remaining space is defined by each driver as the per-driver
29  * configuration space.
30  */
31 #define VIRTIO_PCI_CONFIG(dev) \
32 		(((dev)->msix_status == VIRTIO_MSIX_ENABLED) ? 24 : 20)
33 
34 struct virtio_pci_internal virtio_pci_internal[RTE_MAX_ETHPORTS];
35 
36 static inline int
37 check_vq_phys_addr_ok(struct virtqueue *vq)
38 {
39 	/* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
40 	 * and only accepts 32 bit page frame number.
41 	 * Check if the allocated physical memory exceeds 16TB.
42 	 */
43 	if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >>
44 			(VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
45 		PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
46 		return 0;
47 	}
48 
49 	return 1;
50 }
51 
52 #define PCI_MSIX_ENABLE 0x8000
53 
54 static enum virtio_msix_status
55 vtpci_msix_detect(struct rte_pci_device *dev)
56 {
57 	uint8_t pos;
58 	int ret;
59 
60 	ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
61 	if (ret != 1) {
62 		PMD_INIT_LOG(DEBUG,
63 			     "failed to read pci capability list, ret %d", ret);
64 		return VIRTIO_MSIX_NONE;
65 	}
66 
67 	while (pos) {
68 		uint8_t cap[2];
69 
70 		ret = rte_pci_read_config(dev, cap, sizeof(cap), pos);
71 		if (ret != sizeof(cap)) {
72 			PMD_INIT_LOG(DEBUG,
73 				     "failed to read pci cap at pos: %x ret %d",
74 				     pos, ret);
75 			break;
76 		}
77 
78 		if (cap[0] == PCI_CAP_ID_MSIX) {
79 			uint16_t flags;
80 
81 			ret = rte_pci_read_config(dev, &flags, sizeof(flags),
82 					pos + sizeof(cap));
83 			if (ret != sizeof(flags)) {
84 				PMD_INIT_LOG(DEBUG,
85 					     "failed to read pci cap at pos:"
86 					     " %x ret %d", pos + 2, ret);
87 				break;
88 			}
89 
90 			if (flags & PCI_MSIX_ENABLE)
91 				return VIRTIO_MSIX_ENABLED;
92 			else
93 				return VIRTIO_MSIX_DISABLED;
94 		}
95 
96 		pos = cap[1];
97 	}
98 
99 	return VIRTIO_MSIX_NONE;
100 }
101 
102 /*
103  * Since we are in legacy mode:
104  * http://ozlabs.org/~rusty/virtio-spec/virtio-0.9.5.pdf
105  *
106  * "Note that this is possible because while the virtio header is PCI (i.e.
107  * little) endian, the device-specific region is encoded in the native endian of
108  * the guest (where such distinction is applicable)."
109  *
110  * For powerpc which supports both, qemu supposes that cpu is big endian and
111  * enforces this for the virtio-net stuff.
112  */
113 static void
114 legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
115 		       void *dst, int length)
116 {
117 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
118 #ifdef RTE_ARCH_PPC_64
119 	int size;
120 
121 	while (length > 0) {
122 		if (length >= 4) {
123 			size = 4;
124 			rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
125 				VIRTIO_PCI_CONFIG(dev) + offset);
126 			*(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t *)dst);
127 		} else if (length >= 2) {
128 			size = 2;
129 			rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
130 				VIRTIO_PCI_CONFIG(dev) + offset);
131 			*(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t *)dst);
132 		} else {
133 			size = 1;
134 			rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
135 				VIRTIO_PCI_CONFIG(dev) + offset);
136 		}
137 
138 		dst = (char *)dst + size;
139 		offset += size;
140 		length -= size;
141 	}
142 #else
143 	rte_pci_ioport_read(VTPCI_IO(hw), dst, length,
144 		VIRTIO_PCI_CONFIG(dev) + offset);
145 #endif
146 }
147 
148 static void
149 legacy_write_dev_config(struct virtio_hw *hw, size_t offset,
150 			const void *src, int length)
151 {
152 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
153 #ifdef RTE_ARCH_PPC_64
154 	union {
155 		uint32_t u32;
156 		uint16_t u16;
157 	} tmp;
158 	int size;
159 
160 	while (length > 0) {
161 		if (length >= 4) {
162 			size = 4;
163 			tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src);
164 			rte_pci_ioport_write(VTPCI_IO(hw), &tmp.u32, size,
165 				VIRTIO_PCI_CONFIG(dev) + offset);
166 		} else if (length >= 2) {
167 			size = 2;
168 			tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src);
169 			rte_pci_ioport_write(VTPCI_IO(hw), &tmp.u16, size,
170 				VIRTIO_PCI_CONFIG(dev) + offset);
171 		} else {
172 			size = 1;
173 			rte_pci_ioport_write(VTPCI_IO(hw), src, size,
174 				VIRTIO_PCI_CONFIG(dev) + offset);
175 		}
176 
177 		src = (const char *)src + size;
178 		offset += size;
179 		length -= size;
180 	}
181 #else
182 	rte_pci_ioport_write(VTPCI_IO(hw), src, length,
183 		VIRTIO_PCI_CONFIG(dev) + offset);
184 #endif
185 }
186 
187 static uint64_t
188 legacy_get_features(struct virtio_hw *hw)
189 {
190 	uint32_t dst;
191 
192 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 4, VIRTIO_PCI_HOST_FEATURES);
193 	return dst;
194 }
195 
196 static void
197 legacy_set_features(struct virtio_hw *hw, uint64_t features)
198 {
199 	if ((features >> 32) != 0) {
200 		PMD_DRV_LOG(ERR,
201 			"only 32 bit features are allowed for legacy virtio!");
202 		return;
203 	}
204 	rte_pci_ioport_write(VTPCI_IO(hw), &features, 4,
205 		VIRTIO_PCI_GUEST_FEATURES);
206 }
207 
208 static int
209 legacy_features_ok(struct virtio_hw *hw __rte_unused)
210 {
211 	return 0;
212 }
213 
214 static uint8_t
215 legacy_get_status(struct virtio_hw *hw)
216 {
217 	uint8_t dst;
218 
219 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_STATUS);
220 	return dst;
221 }
222 
223 static void
224 legacy_set_status(struct virtio_hw *hw, uint8_t status)
225 {
226 	rte_pci_ioport_write(VTPCI_IO(hw), &status, 1, VIRTIO_PCI_STATUS);
227 }
228 
229 static uint8_t
230 legacy_get_isr(struct virtio_hw *hw)
231 {
232 	uint8_t dst;
233 
234 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_ISR);
235 	return dst;
236 }
237 
238 /* Enable one vector (0) for Link State Intrerrupt */
239 static uint16_t
240 legacy_set_config_irq(struct virtio_hw *hw, uint16_t vec)
241 {
242 	uint16_t dst;
243 
244 	rte_pci_ioport_write(VTPCI_IO(hw), &vec, 2, VIRTIO_MSI_CONFIG_VECTOR);
245 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_CONFIG_VECTOR);
246 	return dst;
247 }
248 
249 static uint16_t
250 legacy_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec)
251 {
252 	uint16_t dst;
253 
254 	rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
255 		VIRTIO_PCI_QUEUE_SEL);
256 	rte_pci_ioport_write(VTPCI_IO(hw), &vec, 2, VIRTIO_MSI_QUEUE_VECTOR);
257 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_QUEUE_VECTOR);
258 	return dst;
259 }
260 
261 static uint16_t
262 legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
263 {
264 	uint16_t dst;
265 
266 	rte_pci_ioport_write(VTPCI_IO(hw), &queue_id, 2, VIRTIO_PCI_QUEUE_SEL);
267 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_PCI_QUEUE_NUM);
268 	return dst;
269 }
270 
271 static int
272 legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
273 {
274 	uint32_t src;
275 
276 	if (!check_vq_phys_addr_ok(vq))
277 		return -1;
278 
279 	rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
280 		VIRTIO_PCI_QUEUE_SEL);
281 	src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
282 	rte_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
283 
284 	return 0;
285 }
286 
287 static void
288 legacy_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
289 {
290 	uint32_t src = 0;
291 
292 	rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
293 		VIRTIO_PCI_QUEUE_SEL);
294 	rte_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
295 }
296 
297 static void
298 legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
299 {
300 	rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
301 		VIRTIO_PCI_QUEUE_NOTIFY);
302 }
303 
304 static void
305 legacy_intr_detect(struct virtio_hw *hw)
306 {
307 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
308 
309 	dev->msix_status = vtpci_msix_detect(VTPCI_DEV(hw));
310 	hw->intr_lsc = !!dev->msix_status;
311 }
312 
313 static int
314 legacy_dev_close(struct virtio_hw *hw)
315 {
316 	rte_pci_unmap_device(VTPCI_DEV(hw));
317 	rte_pci_ioport_unmap(VTPCI_IO(hw));
318 
319 	return 0;
320 }
321 
322 const struct virtio_ops legacy_ops = {
323 	.read_dev_cfg	= legacy_read_dev_config,
324 	.write_dev_cfg	= legacy_write_dev_config,
325 	.get_status	= legacy_get_status,
326 	.set_status	= legacy_set_status,
327 	.get_features	= legacy_get_features,
328 	.set_features	= legacy_set_features,
329 	.features_ok	= legacy_features_ok,
330 	.get_isr	= legacy_get_isr,
331 	.set_config_irq	= legacy_set_config_irq,
332 	.set_queue_irq  = legacy_set_queue_irq,
333 	.get_queue_num	= legacy_get_queue_num,
334 	.setup_queue	= legacy_setup_queue,
335 	.del_queue	= legacy_del_queue,
336 	.notify_queue	= legacy_notify_queue,
337 	.intr_detect	= legacy_intr_detect,
338 	.dev_close	= legacy_dev_close,
339 };
340 
341 static inline void
342 io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
343 {
344 	rte_write32(val & ((1ULL << 32) - 1), lo);
345 	rte_write32(val >> 32,		     hi);
346 }
347 
348 static void
349 modern_read_dev_config(struct virtio_hw *hw, size_t offset,
350 		       void *dst, int length)
351 {
352 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
353 	int i;
354 	uint8_t *p;
355 	uint8_t old_gen, new_gen;
356 
357 	do {
358 		old_gen = rte_read8(&dev->common_cfg->config_generation);
359 
360 		p = dst;
361 		for (i = 0;  i < length; i++)
362 			*p++ = rte_read8((uint8_t *)dev->dev_cfg + offset + i);
363 
364 		new_gen = rte_read8(&dev->common_cfg->config_generation);
365 	} while (old_gen != new_gen);
366 }
367 
368 static void
369 modern_write_dev_config(struct virtio_hw *hw, size_t offset,
370 			const void *src, int length)
371 {
372 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
373 	int i;
374 	const uint8_t *p = src;
375 
376 	for (i = 0;  i < length; i++)
377 		rte_write8((*p++), (((uint8_t *)dev->dev_cfg) + offset + i));
378 }
379 
380 static uint64_t
381 modern_get_features(struct virtio_hw *hw)
382 {
383 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
384 	uint32_t features_lo, features_hi;
385 
386 	rte_write32(0, &dev->common_cfg->device_feature_select);
387 	features_lo = rte_read32(&dev->common_cfg->device_feature);
388 
389 	rte_write32(1, &dev->common_cfg->device_feature_select);
390 	features_hi = rte_read32(&dev->common_cfg->device_feature);
391 
392 	return ((uint64_t)features_hi << 32) | features_lo;
393 }
394 
395 static void
396 modern_set_features(struct virtio_hw *hw, uint64_t features)
397 {
398 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
399 
400 	rte_write32(0, &dev->common_cfg->guest_feature_select);
401 	rte_write32(features & ((1ULL << 32) - 1),
402 		    &dev->common_cfg->guest_feature);
403 
404 	rte_write32(1, &dev->common_cfg->guest_feature_select);
405 	rte_write32(features >> 32,
406 		    &dev->common_cfg->guest_feature);
407 }
408 
409 static int
410 modern_features_ok(struct virtio_hw *hw)
411 {
412 	if (!virtio_with_feature(hw, VIRTIO_F_VERSION_1)) {
413 		PMD_INIT_LOG(ERR, "Version 1+ required with modern devices");
414 		return -1;
415 	}
416 
417 	return 0;
418 }
419 
420 static uint8_t
421 modern_get_status(struct virtio_hw *hw)
422 {
423 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
424 
425 	return rte_read8(&dev->common_cfg->device_status);
426 }
427 
428 static void
429 modern_set_status(struct virtio_hw *hw, uint8_t status)
430 {
431 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
432 
433 	rte_write8(status, &dev->common_cfg->device_status);
434 }
435 
436 static uint8_t
437 modern_get_isr(struct virtio_hw *hw)
438 {
439 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
440 
441 	return rte_read8(dev->isr);
442 }
443 
444 static uint16_t
445 modern_set_config_irq(struct virtio_hw *hw, uint16_t vec)
446 {
447 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
448 
449 	rte_write16(vec, &dev->common_cfg->msix_config);
450 	return rte_read16(&dev->common_cfg->msix_config);
451 }
452 
453 static uint16_t
454 modern_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec)
455 {
456 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
457 
458 	rte_write16(vq->vq_queue_index, &dev->common_cfg->queue_select);
459 	rte_write16(vec, &dev->common_cfg->queue_msix_vector);
460 	return rte_read16(&dev->common_cfg->queue_msix_vector);
461 }
462 
463 static uint16_t
464 modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
465 {
466 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
467 
468 	rte_write16(queue_id, &dev->common_cfg->queue_select);
469 	return rte_read16(&dev->common_cfg->queue_size);
470 }
471 
472 static int
473 modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
474 {
475 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
476 	uint64_t desc_addr, avail_addr, used_addr;
477 	uint16_t notify_off;
478 
479 	if (!check_vq_phys_addr_ok(vq))
480 		return -1;
481 
482 	desc_addr = vq->vq_ring_mem;
483 	avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
484 	used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
485 							 ring[vq->vq_nentries]),
486 				   VIRTIO_VRING_ALIGN);
487 
488 	rte_write16(vq->vq_queue_index, &dev->common_cfg->queue_select);
489 
490 	io_write64_twopart(desc_addr, &dev->common_cfg->queue_desc_lo,
491 				      &dev->common_cfg->queue_desc_hi);
492 	io_write64_twopart(avail_addr, &dev->common_cfg->queue_avail_lo,
493 				       &dev->common_cfg->queue_avail_hi);
494 	io_write64_twopart(used_addr, &dev->common_cfg->queue_used_lo,
495 				      &dev->common_cfg->queue_used_hi);
496 
497 	notify_off = rte_read16(&dev->common_cfg->queue_notify_off);
498 	vq->notify_addr = (void *)((uint8_t *)dev->notify_base +
499 				notify_off * dev->notify_off_multiplier);
500 
501 	rte_write16(1, &dev->common_cfg->queue_enable);
502 
503 	PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
504 	PMD_INIT_LOG(DEBUG, "\t desc_addr: %" PRIx64, desc_addr);
505 	PMD_INIT_LOG(DEBUG, "\t aval_addr: %" PRIx64, avail_addr);
506 	PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr);
507 	PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)",
508 		vq->notify_addr, notify_off);
509 
510 	return 0;
511 }
512 
513 static void
514 modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
515 {
516 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
517 
518 	rte_write16(vq->vq_queue_index, &dev->common_cfg->queue_select);
519 
520 	io_write64_twopart(0, &dev->common_cfg->queue_desc_lo,
521 				  &dev->common_cfg->queue_desc_hi);
522 	io_write64_twopart(0, &dev->common_cfg->queue_avail_lo,
523 				  &dev->common_cfg->queue_avail_hi);
524 	io_write64_twopart(0, &dev->common_cfg->queue_used_lo,
525 				  &dev->common_cfg->queue_used_hi);
526 
527 	rte_write16(0, &dev->common_cfg->queue_enable);
528 }
529 
530 static void
531 modern_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
532 {
533 	uint32_t notify_data;
534 
535 	if (!virtio_with_feature(hw, VIRTIO_F_NOTIFICATION_DATA)) {
536 		rte_write16(vq->vq_queue_index, vq->notify_addr);
537 		return;
538 	}
539 
540 	if (virtio_with_packed_queue(hw)) {
541 		/*
542 		 * Bit[0:15]: vq queue index
543 		 * Bit[16:30]: avail index
544 		 * Bit[31]: avail wrap counter
545 		 */
546 		notify_data = ((uint32_t)(!!(vq->vq_packed.cached_flags &
547 				VRING_PACKED_DESC_F_AVAIL)) << 31) |
548 				((uint32_t)vq->vq_avail_idx << 16) |
549 				vq->vq_queue_index;
550 	} else {
551 		/*
552 		 * Bit[0:15]: vq queue index
553 		 * Bit[16:31]: avail index
554 		 */
555 		notify_data = ((uint32_t)vq->vq_avail_idx << 16) |
556 				vq->vq_queue_index;
557 	}
558 	rte_write32(notify_data, vq->notify_addr);
559 }
560 
561 
562 
563 static void
564 modern_intr_detect(struct virtio_hw *hw)
565 {
566 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
567 
568 	dev->msix_status = vtpci_msix_detect(VTPCI_DEV(hw));
569 	hw->intr_lsc = !!dev->msix_status;
570 }
571 
572 static int
573 modern_dev_close(struct virtio_hw *hw)
574 {
575 	rte_pci_unmap_device(VTPCI_DEV(hw));
576 
577 	return 0;
578 }
579 
580 const struct virtio_ops modern_ops = {
581 	.read_dev_cfg	= modern_read_dev_config,
582 	.write_dev_cfg	= modern_write_dev_config,
583 	.get_status	= modern_get_status,
584 	.set_status	= modern_set_status,
585 	.get_features	= modern_get_features,
586 	.set_features	= modern_set_features,
587 	.features_ok	= modern_features_ok,
588 	.get_isr	= modern_get_isr,
589 	.set_config_irq	= modern_set_config_irq,
590 	.set_queue_irq  = modern_set_queue_irq,
591 	.get_queue_num	= modern_get_queue_num,
592 	.setup_queue	= modern_setup_queue,
593 	.del_queue	= modern_del_queue,
594 	.notify_queue	= modern_notify_queue,
595 	.intr_detect	= modern_intr_detect,
596 	.dev_close	= modern_dev_close,
597 };
598 
599 static void *
600 get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
601 {
602 	uint8_t  bar    = cap->bar;
603 	uint32_t length = cap->length;
604 	uint32_t offset = cap->offset;
605 	uint8_t *base;
606 
607 	if (bar >= PCI_MAX_RESOURCE) {
608 		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
609 		return NULL;
610 	}
611 
612 	if (offset + length < offset) {
613 		PMD_INIT_LOG(ERR, "offset(%u) + length(%u) overflows",
614 			offset, length);
615 		return NULL;
616 	}
617 
618 	if (offset + length > dev->mem_resource[bar].len) {
619 		PMD_INIT_LOG(ERR,
620 			"invalid cap: overflows bar space: %u > %" PRIu64,
621 			offset + length, dev->mem_resource[bar].len);
622 		return NULL;
623 	}
624 
625 	base = dev->mem_resource[bar].addr;
626 	if (base == NULL) {
627 		PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
628 		return NULL;
629 	}
630 
631 	return base + offset;
632 }
633 
634 static int
635 virtio_read_caps(struct rte_pci_device *pci_dev, struct virtio_hw *hw)
636 {
637 	struct virtio_pci_dev *dev = virtio_pci_get_dev(hw);
638 	uint8_t pos;
639 	struct virtio_pci_cap cap;
640 	int ret;
641 
642 	if (rte_pci_map_device(pci_dev)) {
643 		PMD_INIT_LOG(DEBUG, "failed to map pci device!");
644 		return -1;
645 	}
646 
647 	ret = rte_pci_read_config(pci_dev, &pos, 1, PCI_CAPABILITY_LIST);
648 	if (ret != 1) {
649 		PMD_INIT_LOG(DEBUG,
650 			     "failed to read pci capability list, ret %d", ret);
651 		return -1;
652 	}
653 
654 	while (pos) {
655 		ret = rte_pci_read_config(pci_dev, &cap, 2, pos);
656 		if (ret != 2) {
657 			PMD_INIT_LOG(DEBUG,
658 				     "failed to read pci cap at pos: %x ret %d",
659 				     pos, ret);
660 			break;
661 		}
662 
663 		if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
664 			/* Transitional devices would also have this capability,
665 			 * that's why we also check if msix is enabled.
666 			 * 1st byte is cap ID; 2nd byte is the position of next
667 			 * cap; next two bytes are the flags.
668 			 */
669 			uint16_t flags;
670 
671 			ret = rte_pci_read_config(pci_dev, &flags, sizeof(flags),
672 					pos + 2);
673 			if (ret != sizeof(flags)) {
674 				PMD_INIT_LOG(DEBUG,
675 					     "failed to read pci cap at pos:"
676 					     " %x ret %d", pos + 2, ret);
677 				break;
678 			}
679 
680 			if (flags & PCI_MSIX_ENABLE)
681 				dev->msix_status = VIRTIO_MSIX_ENABLED;
682 			else
683 				dev->msix_status = VIRTIO_MSIX_DISABLED;
684 		}
685 
686 		if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
687 			PMD_INIT_LOG(DEBUG,
688 				"[%2x] skipping non VNDR cap id: %02x",
689 				pos, cap.cap_vndr);
690 			goto next;
691 		}
692 
693 		ret = rte_pci_read_config(pci_dev, &cap, sizeof(cap), pos);
694 		if (ret != sizeof(cap)) {
695 			PMD_INIT_LOG(DEBUG,
696 				     "failed to read pci cap at pos: %x ret %d",
697 				     pos, ret);
698 			break;
699 		}
700 
701 		PMD_INIT_LOG(DEBUG,
702 			"[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
703 			pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
704 
705 		switch (cap.cfg_type) {
706 		case VIRTIO_PCI_CAP_COMMON_CFG:
707 			dev->common_cfg = get_cfg_addr(pci_dev, &cap);
708 			break;
709 		case VIRTIO_PCI_CAP_NOTIFY_CFG:
710 			ret = rte_pci_read_config(pci_dev,
711 					&dev->notify_off_multiplier,
712 					4, pos + sizeof(cap));
713 			if (ret != 4)
714 				PMD_INIT_LOG(DEBUG,
715 					"failed to read notify_off_multiplier, ret %d",
716 					ret);
717 			else
718 				dev->notify_base = get_cfg_addr(pci_dev, &cap);
719 			break;
720 		case VIRTIO_PCI_CAP_DEVICE_CFG:
721 			dev->dev_cfg = get_cfg_addr(pci_dev, &cap);
722 			break;
723 		case VIRTIO_PCI_CAP_ISR_CFG:
724 			dev->isr = get_cfg_addr(pci_dev, &cap);
725 			break;
726 		}
727 
728 next:
729 		pos = cap.cap_next;
730 	}
731 
732 	if (dev->common_cfg == NULL || dev->notify_base == NULL ||
733 	    dev->dev_cfg == NULL    || dev->isr == NULL) {
734 		PMD_INIT_LOG(INFO, "no modern virtio pci device found.");
735 		return -1;
736 	}
737 
738 	PMD_INIT_LOG(INFO, "found modern virtio pci device.");
739 
740 	PMD_INIT_LOG(DEBUG, "common cfg mapped at: %p", dev->common_cfg);
741 	PMD_INIT_LOG(DEBUG, "device cfg mapped at: %p", dev->dev_cfg);
742 	PMD_INIT_LOG(DEBUG, "isr cfg mapped at: %p", dev->isr);
743 	PMD_INIT_LOG(DEBUG, "notify base: %p, notify off multiplier: %u",
744 		dev->notify_base, dev->notify_off_multiplier);
745 
746 	return 0;
747 }
748 
749 /*
750  * Return -1:
751  *   if there is error mapping with VFIO/UIO.
752  *   if port map error when driver type is KDRV_NONE.
753  *   if marked as allowed but driver type is KDRV_UNKNOWN.
754  * Return 1 if kernel driver is managing the device.
755  * Return 0 on success.
756  */
757 int
758 vtpci_init(struct rte_pci_device *pci_dev, struct virtio_pci_dev *dev)
759 {
760 	struct virtio_hw *hw = &dev->hw;
761 
762 	RTE_BUILD_BUG_ON(offsetof(struct virtio_pci_dev, hw) != 0);
763 
764 	/*
765 	 * Try if we can succeed reading virtio pci caps, which exists
766 	 * only on modern pci device. If failed, we fallback to legacy
767 	 * virtio handling.
768 	 */
769 	if (virtio_read_caps(pci_dev, hw) == 0) {
770 		PMD_INIT_LOG(INFO, "modern virtio pci detected.");
771 		VIRTIO_OPS(hw) = &modern_ops;
772 		dev->modern = true;
773 		goto msix_detect;
774 	}
775 
776 	PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
777 	if (rte_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0) {
778 		rte_pci_unmap_device(pci_dev);
779 		if (pci_dev->kdrv == RTE_PCI_KDRV_UNKNOWN &&
780 		    (!pci_dev->device.devargs ||
781 		     pci_dev->device.devargs->bus !=
782 		     rte_bus_find_by_name("pci"))) {
783 			PMD_INIT_LOG(INFO,
784 				"skip kernel managed virtio device.");
785 			return 1;
786 		}
787 		return -1;
788 	}
789 
790 	VIRTIO_OPS(hw) = &legacy_ops;
791 	dev->modern = false;
792 
793 msix_detect:
794 	VIRTIO_OPS(hw)->intr_detect(hw);
795 
796 	return 0;
797 }
798 
799 void vtpci_legacy_ioport_unmap(struct virtio_hw *hw)
800 {
801 	rte_pci_ioport_unmap(VTPCI_IO(hw));
802 }
803 
804 int vtpci_legacy_ioport_map(struct virtio_hw *hw)
805 {
806 	return rte_pci_ioport_map(VTPCI_DEV(hw), 0, VTPCI_IO(hw));
807 }
808