xref: /dpdk/drivers/net/virtio/virtio_pci.c (revision 3e0ceb9f17fff027fc6c8f18de35e11719ffa61e)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdint.h>
34 
35 #ifdef RTE_EXEC_ENV_LINUXAPP
36  #include <dirent.h>
37  #include <fcntl.h>
38 #endif
39 
40 #include <rte_io.h>
41 #include <rte_bus.h>
42 
43 #include "virtio_pci.h"
44 #include "virtio_logs.h"
45 #include "virtqueue.h"
46 
47 /*
48  * Following macros are derived from linux/pci_regs.h, however,
49  * we can't simply include that header here, as there is no such
50  * file for non-Linux platform.
51  */
52 #define PCI_CAPABILITY_LIST	0x34
53 #define PCI_CAP_ID_VNDR		0x09
54 #define PCI_CAP_ID_MSIX		0x11
55 
56 /*
57  * The remaining space is defined by each driver as the per-driver
58  * configuration space.
59  */
60 #define VIRTIO_PCI_CONFIG(hw) (((hw)->use_msix) ? 24 : 20)
61 
62 static inline int
63 check_vq_phys_addr_ok(struct virtqueue *vq)
64 {
65 	/* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
66 	 * and only accepts 32 bit page frame number.
67 	 * Check if the allocated physical memory exceeds 16TB.
68 	 */
69 	if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >>
70 			(VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
71 		PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
72 		return 0;
73 	}
74 
75 	return 1;
76 }
77 
78 /*
79  * Since we are in legacy mode:
80  * http://ozlabs.org/~rusty/virtio-spec/virtio-0.9.5.pdf
81  *
82  * "Note that this is possible because while the virtio header is PCI (i.e.
83  * little) endian, the device-specific region is encoded in the native endian of
84  * the guest (where such distinction is applicable)."
85  *
86  * For powerpc which supports both, qemu supposes that cpu is big endian and
87  * enforces this for the virtio-net stuff.
88  */
89 static void
90 legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
91 		       void *dst, int length)
92 {
93 #ifdef RTE_ARCH_PPC_64
94 	int size;
95 
96 	while (length > 0) {
97 		if (length >= 4) {
98 			size = 4;
99 			rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
100 				VIRTIO_PCI_CONFIG(hw) + offset);
101 			*(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t *)dst);
102 		} else if (length >= 2) {
103 			size = 2;
104 			rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
105 				VIRTIO_PCI_CONFIG(hw) + offset);
106 			*(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t *)dst);
107 		} else {
108 			size = 1;
109 			rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
110 				VIRTIO_PCI_CONFIG(hw) + offset);
111 		}
112 
113 		dst = (char *)dst + size;
114 		offset += size;
115 		length -= size;
116 	}
117 #else
118 	rte_pci_ioport_read(VTPCI_IO(hw), dst, length,
119 		VIRTIO_PCI_CONFIG(hw) + offset);
120 #endif
121 }
122 
123 static void
124 legacy_write_dev_config(struct virtio_hw *hw, size_t offset,
125 			const void *src, int length)
126 {
127 #ifdef RTE_ARCH_PPC_64
128 	union {
129 		uint32_t u32;
130 		uint16_t u16;
131 	} tmp;
132 	int size;
133 
134 	while (length > 0) {
135 		if (length >= 4) {
136 			size = 4;
137 			tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src);
138 			rte_pci_ioport_write(VTPCI_IO(hw), &tmp.u32, size,
139 				VIRTIO_PCI_CONFIG(hw) + offset);
140 		} else if (length >= 2) {
141 			size = 2;
142 			tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src);
143 			rte_pci_ioport_write(VTPCI_IO(hw), &tmp.u16, size,
144 				VIRTIO_PCI_CONFIG(hw) + offset);
145 		} else {
146 			size = 1;
147 			rte_pci_ioport_write(VTPCI_IO(hw), src, size,
148 				VIRTIO_PCI_CONFIG(hw) + offset);
149 		}
150 
151 		src = (const char *)src + size;
152 		offset += size;
153 		length -= size;
154 	}
155 #else
156 	rte_pci_ioport_write(VTPCI_IO(hw), src, length,
157 		VIRTIO_PCI_CONFIG(hw) + offset);
158 #endif
159 }
160 
161 static uint64_t
162 legacy_get_features(struct virtio_hw *hw)
163 {
164 	uint32_t dst;
165 
166 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 4, VIRTIO_PCI_HOST_FEATURES);
167 	return dst;
168 }
169 
170 static void
171 legacy_set_features(struct virtio_hw *hw, uint64_t features)
172 {
173 	if ((features >> 32) != 0) {
174 		PMD_DRV_LOG(ERR,
175 			"only 32 bit features are allowed for legacy virtio!");
176 		return;
177 	}
178 	rte_pci_ioport_write(VTPCI_IO(hw), &features, 4,
179 		VIRTIO_PCI_GUEST_FEATURES);
180 }
181 
182 static uint8_t
183 legacy_get_status(struct virtio_hw *hw)
184 {
185 	uint8_t dst;
186 
187 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_STATUS);
188 	return dst;
189 }
190 
191 static void
192 legacy_set_status(struct virtio_hw *hw, uint8_t status)
193 {
194 	rte_pci_ioport_write(VTPCI_IO(hw), &status, 1, VIRTIO_PCI_STATUS);
195 }
196 
197 static void
198 legacy_reset(struct virtio_hw *hw)
199 {
200 	legacy_set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
201 }
202 
203 static uint8_t
204 legacy_get_isr(struct virtio_hw *hw)
205 {
206 	uint8_t dst;
207 
208 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_ISR);
209 	return dst;
210 }
211 
212 /* Enable one vector (0) for Link State Intrerrupt */
213 static uint16_t
214 legacy_set_config_irq(struct virtio_hw *hw, uint16_t vec)
215 {
216 	uint16_t dst;
217 
218 	rte_pci_ioport_write(VTPCI_IO(hw), &vec, 2, VIRTIO_MSI_CONFIG_VECTOR);
219 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_CONFIG_VECTOR);
220 	return dst;
221 }
222 
223 static uint16_t
224 legacy_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec)
225 {
226 	uint16_t dst;
227 
228 	rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
229 		VIRTIO_PCI_QUEUE_SEL);
230 	rte_pci_ioport_write(VTPCI_IO(hw), &vec, 2, VIRTIO_MSI_QUEUE_VECTOR);
231 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_QUEUE_VECTOR);
232 	return dst;
233 }
234 
235 static uint16_t
236 legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
237 {
238 	uint16_t dst;
239 
240 	rte_pci_ioport_write(VTPCI_IO(hw), &queue_id, 2, VIRTIO_PCI_QUEUE_SEL);
241 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_PCI_QUEUE_NUM);
242 	return dst;
243 }
244 
245 static int
246 legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
247 {
248 	uint32_t src;
249 
250 	if (!check_vq_phys_addr_ok(vq))
251 		return -1;
252 
253 	rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
254 		VIRTIO_PCI_QUEUE_SEL);
255 	src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
256 	rte_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
257 
258 	return 0;
259 }
260 
261 static void
262 legacy_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
263 {
264 	uint32_t src = 0;
265 
266 	rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
267 		VIRTIO_PCI_QUEUE_SEL);
268 	rte_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
269 }
270 
271 static void
272 legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
273 {
274 	rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
275 		VIRTIO_PCI_QUEUE_NOTIFY);
276 }
277 
278 const struct virtio_pci_ops legacy_ops = {
279 	.read_dev_cfg	= legacy_read_dev_config,
280 	.write_dev_cfg	= legacy_write_dev_config,
281 	.reset		= legacy_reset,
282 	.get_status	= legacy_get_status,
283 	.set_status	= legacy_set_status,
284 	.get_features	= legacy_get_features,
285 	.set_features	= legacy_set_features,
286 	.get_isr	= legacy_get_isr,
287 	.set_config_irq	= legacy_set_config_irq,
288 	.set_queue_irq  = legacy_set_queue_irq,
289 	.get_queue_num	= legacy_get_queue_num,
290 	.setup_queue	= legacy_setup_queue,
291 	.del_queue	= legacy_del_queue,
292 	.notify_queue	= legacy_notify_queue,
293 };
294 
295 static inline void
296 io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
297 {
298 	rte_write32(val & ((1ULL << 32) - 1), lo);
299 	rte_write32(val >> 32,		     hi);
300 }
301 
302 static void
303 modern_read_dev_config(struct virtio_hw *hw, size_t offset,
304 		       void *dst, int length)
305 {
306 	int i;
307 	uint8_t *p;
308 	uint8_t old_gen, new_gen;
309 
310 	do {
311 		old_gen = rte_read8(&hw->common_cfg->config_generation);
312 
313 		p = dst;
314 		for (i = 0;  i < length; i++)
315 			*p++ = rte_read8((uint8_t *)hw->dev_cfg + offset + i);
316 
317 		new_gen = rte_read8(&hw->common_cfg->config_generation);
318 	} while (old_gen != new_gen);
319 }
320 
321 static void
322 modern_write_dev_config(struct virtio_hw *hw, size_t offset,
323 			const void *src, int length)
324 {
325 	int i;
326 	const uint8_t *p = src;
327 
328 	for (i = 0;  i < length; i++)
329 		rte_write8((*p++), (((uint8_t *)hw->dev_cfg) + offset + i));
330 }
331 
332 static uint64_t
333 modern_get_features(struct virtio_hw *hw)
334 {
335 	uint32_t features_lo, features_hi;
336 
337 	rte_write32(0, &hw->common_cfg->device_feature_select);
338 	features_lo = rte_read32(&hw->common_cfg->device_feature);
339 
340 	rte_write32(1, &hw->common_cfg->device_feature_select);
341 	features_hi = rte_read32(&hw->common_cfg->device_feature);
342 
343 	return ((uint64_t)features_hi << 32) | features_lo;
344 }
345 
346 static void
347 modern_set_features(struct virtio_hw *hw, uint64_t features)
348 {
349 	rte_write32(0, &hw->common_cfg->guest_feature_select);
350 	rte_write32(features & ((1ULL << 32) - 1),
351 		    &hw->common_cfg->guest_feature);
352 
353 	rte_write32(1, &hw->common_cfg->guest_feature_select);
354 	rte_write32(features >> 32,
355 		    &hw->common_cfg->guest_feature);
356 }
357 
358 static uint8_t
359 modern_get_status(struct virtio_hw *hw)
360 {
361 	return rte_read8(&hw->common_cfg->device_status);
362 }
363 
364 static void
365 modern_set_status(struct virtio_hw *hw, uint8_t status)
366 {
367 	rte_write8(status, &hw->common_cfg->device_status);
368 }
369 
370 static void
371 modern_reset(struct virtio_hw *hw)
372 {
373 	modern_set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
374 	modern_get_status(hw);
375 }
376 
377 static uint8_t
378 modern_get_isr(struct virtio_hw *hw)
379 {
380 	return rte_read8(hw->isr);
381 }
382 
383 static uint16_t
384 modern_set_config_irq(struct virtio_hw *hw, uint16_t vec)
385 {
386 	rte_write16(vec, &hw->common_cfg->msix_config);
387 	return rte_read16(&hw->common_cfg->msix_config);
388 }
389 
390 static uint16_t
391 modern_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec)
392 {
393 	rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
394 	rte_write16(vec, &hw->common_cfg->queue_msix_vector);
395 	return rte_read16(&hw->common_cfg->queue_msix_vector);
396 }
397 
398 static uint16_t
399 modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
400 {
401 	rte_write16(queue_id, &hw->common_cfg->queue_select);
402 	return rte_read16(&hw->common_cfg->queue_size);
403 }
404 
405 static int
406 modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
407 {
408 	uint64_t desc_addr, avail_addr, used_addr;
409 	uint16_t notify_off;
410 
411 	if (!check_vq_phys_addr_ok(vq))
412 		return -1;
413 
414 	desc_addr = vq->vq_ring_mem;
415 	avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
416 	used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
417 							 ring[vq->vq_nentries]),
418 				   VIRTIO_PCI_VRING_ALIGN);
419 
420 	rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
421 
422 	io_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo,
423 				      &hw->common_cfg->queue_desc_hi);
424 	io_write64_twopart(avail_addr, &hw->common_cfg->queue_avail_lo,
425 				       &hw->common_cfg->queue_avail_hi);
426 	io_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo,
427 				      &hw->common_cfg->queue_used_hi);
428 
429 	notify_off = rte_read16(&hw->common_cfg->queue_notify_off);
430 	vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
431 				notify_off * hw->notify_off_multiplier);
432 
433 	rte_write16(1, &hw->common_cfg->queue_enable);
434 
435 	PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
436 	PMD_INIT_LOG(DEBUG, "\t desc_addr: %" PRIx64, desc_addr);
437 	PMD_INIT_LOG(DEBUG, "\t aval_addr: %" PRIx64, avail_addr);
438 	PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr);
439 	PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)",
440 		vq->notify_addr, notify_off);
441 
442 	return 0;
443 }
444 
445 static void
446 modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
447 {
448 	rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
449 
450 	io_write64_twopart(0, &hw->common_cfg->queue_desc_lo,
451 				  &hw->common_cfg->queue_desc_hi);
452 	io_write64_twopart(0, &hw->common_cfg->queue_avail_lo,
453 				  &hw->common_cfg->queue_avail_hi);
454 	io_write64_twopart(0, &hw->common_cfg->queue_used_lo,
455 				  &hw->common_cfg->queue_used_hi);
456 
457 	rte_write16(0, &hw->common_cfg->queue_enable);
458 }
459 
460 static void
461 modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq)
462 {
463 	rte_write16(vq->vq_queue_index, vq->notify_addr);
464 }
465 
466 const struct virtio_pci_ops modern_ops = {
467 	.read_dev_cfg	= modern_read_dev_config,
468 	.write_dev_cfg	= modern_write_dev_config,
469 	.reset		= modern_reset,
470 	.get_status	= modern_get_status,
471 	.set_status	= modern_set_status,
472 	.get_features	= modern_get_features,
473 	.set_features	= modern_set_features,
474 	.get_isr	= modern_get_isr,
475 	.set_config_irq	= modern_set_config_irq,
476 	.set_queue_irq  = modern_set_queue_irq,
477 	.get_queue_num	= modern_get_queue_num,
478 	.setup_queue	= modern_setup_queue,
479 	.del_queue	= modern_del_queue,
480 	.notify_queue	= modern_notify_queue,
481 };
482 
483 
484 void
485 vtpci_read_dev_config(struct virtio_hw *hw, size_t offset,
486 		      void *dst, int length)
487 {
488 	VTPCI_OPS(hw)->read_dev_cfg(hw, offset, dst, length);
489 }
490 
491 void
492 vtpci_write_dev_config(struct virtio_hw *hw, size_t offset,
493 		       const void *src, int length)
494 {
495 	VTPCI_OPS(hw)->write_dev_cfg(hw, offset, src, length);
496 }
497 
498 uint64_t
499 vtpci_negotiate_features(struct virtio_hw *hw, uint64_t host_features)
500 {
501 	uint64_t features;
502 
503 	/*
504 	 * Limit negotiated features to what the driver, virtqueue, and
505 	 * host all support.
506 	 */
507 	features = host_features & hw->guest_features;
508 	VTPCI_OPS(hw)->set_features(hw, features);
509 
510 	return features;
511 }
512 
513 void
514 vtpci_reset(struct virtio_hw *hw)
515 {
516 	VTPCI_OPS(hw)->set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
517 	/* flush status write */
518 	VTPCI_OPS(hw)->get_status(hw);
519 }
520 
521 void
522 vtpci_reinit_complete(struct virtio_hw *hw)
523 {
524 	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK);
525 }
526 
527 void
528 vtpci_set_status(struct virtio_hw *hw, uint8_t status)
529 {
530 	if (status != VIRTIO_CONFIG_STATUS_RESET)
531 		status |= VTPCI_OPS(hw)->get_status(hw);
532 
533 	VTPCI_OPS(hw)->set_status(hw, status);
534 }
535 
536 uint8_t
537 vtpci_get_status(struct virtio_hw *hw)
538 {
539 	return VTPCI_OPS(hw)->get_status(hw);
540 }
541 
542 uint8_t
543 vtpci_isr(struct virtio_hw *hw)
544 {
545 	return VTPCI_OPS(hw)->get_isr(hw);
546 }
547 
548 static void *
549 get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
550 {
551 	uint8_t  bar    = cap->bar;
552 	uint32_t length = cap->length;
553 	uint32_t offset = cap->offset;
554 	uint8_t *base;
555 
556 	if (bar >= PCI_MAX_RESOURCE) {
557 		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
558 		return NULL;
559 	}
560 
561 	if (offset + length < offset) {
562 		PMD_INIT_LOG(ERR, "offset(%u) + length(%u) overflows",
563 			offset, length);
564 		return NULL;
565 	}
566 
567 	if (offset + length > dev->mem_resource[bar].len) {
568 		PMD_INIT_LOG(ERR,
569 			"invalid cap: overflows bar space: %u > %" PRIu64,
570 			offset + length, dev->mem_resource[bar].len);
571 		return NULL;
572 	}
573 
574 	base = dev->mem_resource[bar].addr;
575 	if (base == NULL) {
576 		PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
577 		return NULL;
578 	}
579 
580 	return base + offset;
581 }
582 
583 #define PCI_MSIX_ENABLE 0x8000
584 
585 static int
586 virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
587 {
588 	uint8_t pos;
589 	struct virtio_pci_cap cap;
590 	int ret;
591 
592 	if (rte_pci_map_device(dev)) {
593 		PMD_INIT_LOG(DEBUG, "failed to map pci device!");
594 		return -1;
595 	}
596 
597 	ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
598 	if (ret < 0) {
599 		PMD_INIT_LOG(DEBUG, "failed to read pci capability list");
600 		return -1;
601 	}
602 
603 	while (pos) {
604 		ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos);
605 		if (ret < 0) {
606 			PMD_INIT_LOG(ERR,
607 				"failed to read pci cap at pos: %x", pos);
608 			break;
609 		}
610 
611 		if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
612 			/* Transitional devices would also have this capability,
613 			 * that's why we also check if msix is enabled.
614 			 * 1st byte is cap ID; 2nd byte is the position of next
615 			 * cap; next two bytes are the flags.
616 			 */
617 			uint16_t flags = ((uint16_t *)&cap)[1];
618 
619 			if (flags & PCI_MSIX_ENABLE)
620 				hw->use_msix = 1;
621 		}
622 
623 		if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
624 			PMD_INIT_LOG(DEBUG,
625 				"[%2x] skipping non VNDR cap id: %02x",
626 				pos, cap.cap_vndr);
627 			goto next;
628 		}
629 
630 		PMD_INIT_LOG(DEBUG,
631 			"[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
632 			pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
633 
634 		switch (cap.cfg_type) {
635 		case VIRTIO_PCI_CAP_COMMON_CFG:
636 			hw->common_cfg = get_cfg_addr(dev, &cap);
637 			break;
638 		case VIRTIO_PCI_CAP_NOTIFY_CFG:
639 			rte_pci_read_config(dev, &hw->notify_off_multiplier,
640 					4, pos + sizeof(cap));
641 			hw->notify_base = get_cfg_addr(dev, &cap);
642 			break;
643 		case VIRTIO_PCI_CAP_DEVICE_CFG:
644 			hw->dev_cfg = get_cfg_addr(dev, &cap);
645 			break;
646 		case VIRTIO_PCI_CAP_ISR_CFG:
647 			hw->isr = get_cfg_addr(dev, &cap);
648 			break;
649 		}
650 
651 next:
652 		pos = cap.cap_next;
653 	}
654 
655 	if (hw->common_cfg == NULL || hw->notify_base == NULL ||
656 	    hw->dev_cfg == NULL    || hw->isr == NULL) {
657 		PMD_INIT_LOG(INFO, "no modern virtio pci device found.");
658 		return -1;
659 	}
660 
661 	PMD_INIT_LOG(INFO, "found modern virtio pci device.");
662 
663 	PMD_INIT_LOG(DEBUG, "common cfg mapped at: %p", hw->common_cfg);
664 	PMD_INIT_LOG(DEBUG, "device cfg mapped at: %p", hw->dev_cfg);
665 	PMD_INIT_LOG(DEBUG, "isr cfg mapped at: %p", hw->isr);
666 	PMD_INIT_LOG(DEBUG, "notify base: %p, notify off multiplier: %u",
667 		hw->notify_base, hw->notify_off_multiplier);
668 
669 	return 0;
670 }
671 
672 /*
673  * Return -1:
674  *   if there is error mapping with VFIO/UIO.
675  *   if port map error when driver type is KDRV_NONE.
676  *   if whitelisted but driver type is KDRV_UNKNOWN.
677  * Return 1 if kernel driver is managing the device.
678  * Return 0 on success.
679  */
680 int
681 vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
682 {
683 	/*
684 	 * Try if we can succeed reading virtio pci caps, which exists
685 	 * only on modern pci device. If failed, we fallback to legacy
686 	 * virtio handling.
687 	 */
688 	if (virtio_read_caps(dev, hw) == 0) {
689 		PMD_INIT_LOG(INFO, "modern virtio pci detected.");
690 		virtio_hw_internal[hw->port_id].vtpci_ops = &modern_ops;
691 		hw->modern = 1;
692 		return 0;
693 	}
694 
695 	PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
696 	if (rte_pci_ioport_map(dev, 0, VTPCI_IO(hw)) < 0) {
697 		if (dev->kdrv == RTE_KDRV_UNKNOWN &&
698 		    (!dev->device.devargs ||
699 		     dev->device.devargs->bus !=
700 		     rte_bus_find_by_name("pci"))) {
701 			PMD_INIT_LOG(INFO,
702 				"skip kernel managed virtio device.");
703 			return 1;
704 		}
705 		return -1;
706 	}
707 
708 	virtio_hw_internal[hw->port_id].vtpci_ops = &legacy_ops;
709 	hw->modern   = 0;
710 
711 	return 0;
712 }
713