xref: /dpdk/drivers/net/virtio/virtio_pci.c (revision 131a75b6e4df60586103d71defb85dcf9f77fb17)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdint.h>
34 
35 #ifdef RTE_EXEC_ENV_LINUXAPP
36  #include <dirent.h>
37  #include <fcntl.h>
38 #endif
39 
40 #include <rte_io.h>
41 #include <rte_bus.h>
42 
43 #include "virtio_pci.h"
44 #include "virtio_logs.h"
45 #include "virtqueue.h"
46 
47 /*
48  * Following macros are derived from linux/pci_regs.h, however,
49  * we can't simply include that header here, as there is no such
50  * file for non-Linux platform.
51  */
52 #define PCI_CAPABILITY_LIST	0x34
53 #define PCI_CAP_ID_VNDR		0x09
54 #define PCI_CAP_ID_MSIX		0x11
55 
56 /*
57  * The remaining space is defined by each driver as the per-driver
58  * configuration space.
59  */
60 #define VIRTIO_PCI_CONFIG(hw) \
61 		(((hw)->use_msix == VIRTIO_MSIX_ENABLED) ? 24 : 20)
62 
63 static inline int
64 check_vq_phys_addr_ok(struct virtqueue *vq)
65 {
66 	/* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
67 	 * and only accepts 32 bit page frame number.
68 	 * Check if the allocated physical memory exceeds 16TB.
69 	 */
70 	if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >>
71 			(VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
72 		PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
73 		return 0;
74 	}
75 
76 	return 1;
77 }
78 
79 /*
80  * Since we are in legacy mode:
81  * http://ozlabs.org/~rusty/virtio-spec/virtio-0.9.5.pdf
82  *
83  * "Note that this is possible because while the virtio header is PCI (i.e.
84  * little) endian, the device-specific region is encoded in the native endian of
85  * the guest (where such distinction is applicable)."
86  *
87  * For powerpc which supports both, qemu supposes that cpu is big endian and
88  * enforces this for the virtio-net stuff.
89  */
90 static void
91 legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
92 		       void *dst, int length)
93 {
94 #ifdef RTE_ARCH_PPC_64
95 	int size;
96 
97 	while (length > 0) {
98 		if (length >= 4) {
99 			size = 4;
100 			rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
101 				VIRTIO_PCI_CONFIG(hw) + offset);
102 			*(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t *)dst);
103 		} else if (length >= 2) {
104 			size = 2;
105 			rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
106 				VIRTIO_PCI_CONFIG(hw) + offset);
107 			*(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t *)dst);
108 		} else {
109 			size = 1;
110 			rte_pci_ioport_read(VTPCI_IO(hw), dst, size,
111 				VIRTIO_PCI_CONFIG(hw) + offset);
112 		}
113 
114 		dst = (char *)dst + size;
115 		offset += size;
116 		length -= size;
117 	}
118 #else
119 	rte_pci_ioport_read(VTPCI_IO(hw), dst, length,
120 		VIRTIO_PCI_CONFIG(hw) + offset);
121 #endif
122 }
123 
124 static void
125 legacy_write_dev_config(struct virtio_hw *hw, size_t offset,
126 			const void *src, int length)
127 {
128 #ifdef RTE_ARCH_PPC_64
129 	union {
130 		uint32_t u32;
131 		uint16_t u16;
132 	} tmp;
133 	int size;
134 
135 	while (length > 0) {
136 		if (length >= 4) {
137 			size = 4;
138 			tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src);
139 			rte_pci_ioport_write(VTPCI_IO(hw), &tmp.u32, size,
140 				VIRTIO_PCI_CONFIG(hw) + offset);
141 		} else if (length >= 2) {
142 			size = 2;
143 			tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src);
144 			rte_pci_ioport_write(VTPCI_IO(hw), &tmp.u16, size,
145 				VIRTIO_PCI_CONFIG(hw) + offset);
146 		} else {
147 			size = 1;
148 			rte_pci_ioport_write(VTPCI_IO(hw), src, size,
149 				VIRTIO_PCI_CONFIG(hw) + offset);
150 		}
151 
152 		src = (const char *)src + size;
153 		offset += size;
154 		length -= size;
155 	}
156 #else
157 	rte_pci_ioport_write(VTPCI_IO(hw), src, length,
158 		VIRTIO_PCI_CONFIG(hw) + offset);
159 #endif
160 }
161 
162 static uint64_t
163 legacy_get_features(struct virtio_hw *hw)
164 {
165 	uint32_t dst;
166 
167 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 4, VIRTIO_PCI_HOST_FEATURES);
168 	return dst;
169 }
170 
171 static void
172 legacy_set_features(struct virtio_hw *hw, uint64_t features)
173 {
174 	if ((features >> 32) != 0) {
175 		PMD_DRV_LOG(ERR,
176 			"only 32 bit features are allowed for legacy virtio!");
177 		return;
178 	}
179 	rte_pci_ioport_write(VTPCI_IO(hw), &features, 4,
180 		VIRTIO_PCI_GUEST_FEATURES);
181 }
182 
183 static uint8_t
184 legacy_get_status(struct virtio_hw *hw)
185 {
186 	uint8_t dst;
187 
188 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_STATUS);
189 	return dst;
190 }
191 
192 static void
193 legacy_set_status(struct virtio_hw *hw, uint8_t status)
194 {
195 	rte_pci_ioport_write(VTPCI_IO(hw), &status, 1, VIRTIO_PCI_STATUS);
196 }
197 
198 static void
199 legacy_reset(struct virtio_hw *hw)
200 {
201 	legacy_set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
202 }
203 
204 static uint8_t
205 legacy_get_isr(struct virtio_hw *hw)
206 {
207 	uint8_t dst;
208 
209 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_ISR);
210 	return dst;
211 }
212 
213 /* Enable one vector (0) for Link State Intrerrupt */
214 static uint16_t
215 legacy_set_config_irq(struct virtio_hw *hw, uint16_t vec)
216 {
217 	uint16_t dst;
218 
219 	rte_pci_ioport_write(VTPCI_IO(hw), &vec, 2, VIRTIO_MSI_CONFIG_VECTOR);
220 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_CONFIG_VECTOR);
221 	return dst;
222 }
223 
224 static uint16_t
225 legacy_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec)
226 {
227 	uint16_t dst;
228 
229 	rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
230 		VIRTIO_PCI_QUEUE_SEL);
231 	rte_pci_ioport_write(VTPCI_IO(hw), &vec, 2, VIRTIO_MSI_QUEUE_VECTOR);
232 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_QUEUE_VECTOR);
233 	return dst;
234 }
235 
236 static uint16_t
237 legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
238 {
239 	uint16_t dst;
240 
241 	rte_pci_ioport_write(VTPCI_IO(hw), &queue_id, 2, VIRTIO_PCI_QUEUE_SEL);
242 	rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_PCI_QUEUE_NUM);
243 	return dst;
244 }
245 
246 static int
247 legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
248 {
249 	uint32_t src;
250 
251 	if (!check_vq_phys_addr_ok(vq))
252 		return -1;
253 
254 	rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
255 		VIRTIO_PCI_QUEUE_SEL);
256 	src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
257 	rte_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
258 
259 	return 0;
260 }
261 
262 static void
263 legacy_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
264 {
265 	uint32_t src = 0;
266 
267 	rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
268 		VIRTIO_PCI_QUEUE_SEL);
269 	rte_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
270 }
271 
272 static void
273 legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
274 {
275 	rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
276 		VIRTIO_PCI_QUEUE_NOTIFY);
277 }
278 
279 const struct virtio_pci_ops legacy_ops = {
280 	.read_dev_cfg	= legacy_read_dev_config,
281 	.write_dev_cfg	= legacy_write_dev_config,
282 	.reset		= legacy_reset,
283 	.get_status	= legacy_get_status,
284 	.set_status	= legacy_set_status,
285 	.get_features	= legacy_get_features,
286 	.set_features	= legacy_set_features,
287 	.get_isr	= legacy_get_isr,
288 	.set_config_irq	= legacy_set_config_irq,
289 	.set_queue_irq  = legacy_set_queue_irq,
290 	.get_queue_num	= legacy_get_queue_num,
291 	.setup_queue	= legacy_setup_queue,
292 	.del_queue	= legacy_del_queue,
293 	.notify_queue	= legacy_notify_queue,
294 };
295 
296 static inline void
297 io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
298 {
299 	rte_write32(val & ((1ULL << 32) - 1), lo);
300 	rte_write32(val >> 32,		     hi);
301 }
302 
303 static void
304 modern_read_dev_config(struct virtio_hw *hw, size_t offset,
305 		       void *dst, int length)
306 {
307 	int i;
308 	uint8_t *p;
309 	uint8_t old_gen, new_gen;
310 
311 	do {
312 		old_gen = rte_read8(&hw->common_cfg->config_generation);
313 
314 		p = dst;
315 		for (i = 0;  i < length; i++)
316 			*p++ = rte_read8((uint8_t *)hw->dev_cfg + offset + i);
317 
318 		new_gen = rte_read8(&hw->common_cfg->config_generation);
319 	} while (old_gen != new_gen);
320 }
321 
322 static void
323 modern_write_dev_config(struct virtio_hw *hw, size_t offset,
324 			const void *src, int length)
325 {
326 	int i;
327 	const uint8_t *p = src;
328 
329 	for (i = 0;  i < length; i++)
330 		rte_write8((*p++), (((uint8_t *)hw->dev_cfg) + offset + i));
331 }
332 
333 static uint64_t
334 modern_get_features(struct virtio_hw *hw)
335 {
336 	uint32_t features_lo, features_hi;
337 
338 	rte_write32(0, &hw->common_cfg->device_feature_select);
339 	features_lo = rte_read32(&hw->common_cfg->device_feature);
340 
341 	rte_write32(1, &hw->common_cfg->device_feature_select);
342 	features_hi = rte_read32(&hw->common_cfg->device_feature);
343 
344 	return ((uint64_t)features_hi << 32) | features_lo;
345 }
346 
347 static void
348 modern_set_features(struct virtio_hw *hw, uint64_t features)
349 {
350 	rte_write32(0, &hw->common_cfg->guest_feature_select);
351 	rte_write32(features & ((1ULL << 32) - 1),
352 		    &hw->common_cfg->guest_feature);
353 
354 	rte_write32(1, &hw->common_cfg->guest_feature_select);
355 	rte_write32(features >> 32,
356 		    &hw->common_cfg->guest_feature);
357 }
358 
359 static uint8_t
360 modern_get_status(struct virtio_hw *hw)
361 {
362 	return rte_read8(&hw->common_cfg->device_status);
363 }
364 
365 static void
366 modern_set_status(struct virtio_hw *hw, uint8_t status)
367 {
368 	rte_write8(status, &hw->common_cfg->device_status);
369 }
370 
371 static void
372 modern_reset(struct virtio_hw *hw)
373 {
374 	modern_set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
375 	modern_get_status(hw);
376 }
377 
378 static uint8_t
379 modern_get_isr(struct virtio_hw *hw)
380 {
381 	return rte_read8(hw->isr);
382 }
383 
384 static uint16_t
385 modern_set_config_irq(struct virtio_hw *hw, uint16_t vec)
386 {
387 	rte_write16(vec, &hw->common_cfg->msix_config);
388 	return rte_read16(&hw->common_cfg->msix_config);
389 }
390 
391 static uint16_t
392 modern_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec)
393 {
394 	rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
395 	rte_write16(vec, &hw->common_cfg->queue_msix_vector);
396 	return rte_read16(&hw->common_cfg->queue_msix_vector);
397 }
398 
399 static uint16_t
400 modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
401 {
402 	rte_write16(queue_id, &hw->common_cfg->queue_select);
403 	return rte_read16(&hw->common_cfg->queue_size);
404 }
405 
406 static int
407 modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
408 {
409 	uint64_t desc_addr, avail_addr, used_addr;
410 	uint16_t notify_off;
411 
412 	if (!check_vq_phys_addr_ok(vq))
413 		return -1;
414 
415 	desc_addr = vq->vq_ring_mem;
416 	avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
417 	used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
418 							 ring[vq->vq_nentries]),
419 				   VIRTIO_PCI_VRING_ALIGN);
420 
421 	rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
422 
423 	io_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo,
424 				      &hw->common_cfg->queue_desc_hi);
425 	io_write64_twopart(avail_addr, &hw->common_cfg->queue_avail_lo,
426 				       &hw->common_cfg->queue_avail_hi);
427 	io_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo,
428 				      &hw->common_cfg->queue_used_hi);
429 
430 	notify_off = rte_read16(&hw->common_cfg->queue_notify_off);
431 	vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
432 				notify_off * hw->notify_off_multiplier);
433 
434 	rte_write16(1, &hw->common_cfg->queue_enable);
435 
436 	PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
437 	PMD_INIT_LOG(DEBUG, "\t desc_addr: %" PRIx64, desc_addr);
438 	PMD_INIT_LOG(DEBUG, "\t aval_addr: %" PRIx64, avail_addr);
439 	PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr);
440 	PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)",
441 		vq->notify_addr, notify_off);
442 
443 	return 0;
444 }
445 
446 static void
447 modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
448 {
449 	rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
450 
451 	io_write64_twopart(0, &hw->common_cfg->queue_desc_lo,
452 				  &hw->common_cfg->queue_desc_hi);
453 	io_write64_twopart(0, &hw->common_cfg->queue_avail_lo,
454 				  &hw->common_cfg->queue_avail_hi);
455 	io_write64_twopart(0, &hw->common_cfg->queue_used_lo,
456 				  &hw->common_cfg->queue_used_hi);
457 
458 	rte_write16(0, &hw->common_cfg->queue_enable);
459 }
460 
461 static void
462 modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq)
463 {
464 	rte_write16(vq->vq_queue_index, vq->notify_addr);
465 }
466 
467 const struct virtio_pci_ops modern_ops = {
468 	.read_dev_cfg	= modern_read_dev_config,
469 	.write_dev_cfg	= modern_write_dev_config,
470 	.reset		= modern_reset,
471 	.get_status	= modern_get_status,
472 	.set_status	= modern_set_status,
473 	.get_features	= modern_get_features,
474 	.set_features	= modern_set_features,
475 	.get_isr	= modern_get_isr,
476 	.set_config_irq	= modern_set_config_irq,
477 	.set_queue_irq  = modern_set_queue_irq,
478 	.get_queue_num	= modern_get_queue_num,
479 	.setup_queue	= modern_setup_queue,
480 	.del_queue	= modern_del_queue,
481 	.notify_queue	= modern_notify_queue,
482 };
483 
484 
485 void
486 vtpci_read_dev_config(struct virtio_hw *hw, size_t offset,
487 		      void *dst, int length)
488 {
489 	VTPCI_OPS(hw)->read_dev_cfg(hw, offset, dst, length);
490 }
491 
492 void
493 vtpci_write_dev_config(struct virtio_hw *hw, size_t offset,
494 		       const void *src, int length)
495 {
496 	VTPCI_OPS(hw)->write_dev_cfg(hw, offset, src, length);
497 }
498 
499 uint64_t
500 vtpci_negotiate_features(struct virtio_hw *hw, uint64_t host_features)
501 {
502 	uint64_t features;
503 
504 	/*
505 	 * Limit negotiated features to what the driver, virtqueue, and
506 	 * host all support.
507 	 */
508 	features = host_features & hw->guest_features;
509 	VTPCI_OPS(hw)->set_features(hw, features);
510 
511 	return features;
512 }
513 
514 void
515 vtpci_reset(struct virtio_hw *hw)
516 {
517 	VTPCI_OPS(hw)->set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
518 	/* flush status write */
519 	VTPCI_OPS(hw)->get_status(hw);
520 }
521 
522 void
523 vtpci_reinit_complete(struct virtio_hw *hw)
524 {
525 	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK);
526 }
527 
528 void
529 vtpci_set_status(struct virtio_hw *hw, uint8_t status)
530 {
531 	if (status != VIRTIO_CONFIG_STATUS_RESET)
532 		status |= VTPCI_OPS(hw)->get_status(hw);
533 
534 	VTPCI_OPS(hw)->set_status(hw, status);
535 }
536 
537 uint8_t
538 vtpci_get_status(struct virtio_hw *hw)
539 {
540 	return VTPCI_OPS(hw)->get_status(hw);
541 }
542 
543 uint8_t
544 vtpci_isr(struct virtio_hw *hw)
545 {
546 	return VTPCI_OPS(hw)->get_isr(hw);
547 }
548 
549 static void *
550 get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
551 {
552 	uint8_t  bar    = cap->bar;
553 	uint32_t length = cap->length;
554 	uint32_t offset = cap->offset;
555 	uint8_t *base;
556 
557 	if (bar >= PCI_MAX_RESOURCE) {
558 		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
559 		return NULL;
560 	}
561 
562 	if (offset + length < offset) {
563 		PMD_INIT_LOG(ERR, "offset(%u) + length(%u) overflows",
564 			offset, length);
565 		return NULL;
566 	}
567 
568 	if (offset + length > dev->mem_resource[bar].len) {
569 		PMD_INIT_LOG(ERR,
570 			"invalid cap: overflows bar space: %u > %" PRIu64,
571 			offset + length, dev->mem_resource[bar].len);
572 		return NULL;
573 	}
574 
575 	base = dev->mem_resource[bar].addr;
576 	if (base == NULL) {
577 		PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
578 		return NULL;
579 	}
580 
581 	return base + offset;
582 }
583 
584 #define PCI_MSIX_ENABLE 0x8000
585 
586 static int
587 virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
588 {
589 	uint8_t pos;
590 	struct virtio_pci_cap cap;
591 	int ret;
592 
593 	if (rte_pci_map_device(dev)) {
594 		PMD_INIT_LOG(DEBUG, "failed to map pci device!");
595 		return -1;
596 	}
597 
598 	ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
599 	if (ret < 0) {
600 		PMD_INIT_LOG(DEBUG, "failed to read pci capability list");
601 		return -1;
602 	}
603 
604 	while (pos) {
605 		ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos);
606 		if (ret < 0) {
607 			PMD_INIT_LOG(ERR,
608 				"failed to read pci cap at pos: %x", pos);
609 			break;
610 		}
611 
612 		if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
613 			/* Transitional devices would also have this capability,
614 			 * that's why we also check if msix is enabled.
615 			 * 1st byte is cap ID; 2nd byte is the position of next
616 			 * cap; next two bytes are the flags.
617 			 */
618 			uint16_t flags = ((uint16_t *)&cap)[1];
619 
620 			if (flags & PCI_MSIX_ENABLE)
621 				hw->use_msix = VIRTIO_MSIX_ENABLED;
622 			else
623 				hw->use_msix = VIRTIO_MSIX_DISABLED;
624 		}
625 
626 		if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
627 			PMD_INIT_LOG(DEBUG,
628 				"[%2x] skipping non VNDR cap id: %02x",
629 				pos, cap.cap_vndr);
630 			goto next;
631 		}
632 
633 		PMD_INIT_LOG(DEBUG,
634 			"[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
635 			pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
636 
637 		switch (cap.cfg_type) {
638 		case VIRTIO_PCI_CAP_COMMON_CFG:
639 			hw->common_cfg = get_cfg_addr(dev, &cap);
640 			break;
641 		case VIRTIO_PCI_CAP_NOTIFY_CFG:
642 			rte_pci_read_config(dev, &hw->notify_off_multiplier,
643 					4, pos + sizeof(cap));
644 			hw->notify_base = get_cfg_addr(dev, &cap);
645 			break;
646 		case VIRTIO_PCI_CAP_DEVICE_CFG:
647 			hw->dev_cfg = get_cfg_addr(dev, &cap);
648 			break;
649 		case VIRTIO_PCI_CAP_ISR_CFG:
650 			hw->isr = get_cfg_addr(dev, &cap);
651 			break;
652 		}
653 
654 next:
655 		pos = cap.cap_next;
656 	}
657 
658 	if (hw->common_cfg == NULL || hw->notify_base == NULL ||
659 	    hw->dev_cfg == NULL    || hw->isr == NULL) {
660 		PMD_INIT_LOG(INFO, "no modern virtio pci device found.");
661 		return -1;
662 	}
663 
664 	PMD_INIT_LOG(INFO, "found modern virtio pci device.");
665 
666 	PMD_INIT_LOG(DEBUG, "common cfg mapped at: %p", hw->common_cfg);
667 	PMD_INIT_LOG(DEBUG, "device cfg mapped at: %p", hw->dev_cfg);
668 	PMD_INIT_LOG(DEBUG, "isr cfg mapped at: %p", hw->isr);
669 	PMD_INIT_LOG(DEBUG, "notify base: %p, notify off multiplier: %u",
670 		hw->notify_base, hw->notify_off_multiplier);
671 
672 	return 0;
673 }
674 
675 /*
676  * Return -1:
677  *   if there is error mapping with VFIO/UIO.
678  *   if port map error when driver type is KDRV_NONE.
679  *   if whitelisted but driver type is KDRV_UNKNOWN.
680  * Return 1 if kernel driver is managing the device.
681  * Return 0 on success.
682  */
683 int
684 vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
685 {
686 	/*
687 	 * Try if we can succeed reading virtio pci caps, which exists
688 	 * only on modern pci device. If failed, we fallback to legacy
689 	 * virtio handling.
690 	 */
691 	if (virtio_read_caps(dev, hw) == 0) {
692 		PMD_INIT_LOG(INFO, "modern virtio pci detected.");
693 		virtio_hw_internal[hw->port_id].vtpci_ops = &modern_ops;
694 		hw->modern = 1;
695 		return 0;
696 	}
697 
698 	PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
699 	if (rte_pci_ioport_map(dev, 0, VTPCI_IO(hw)) < 0) {
700 		if (dev->kdrv == RTE_KDRV_UNKNOWN &&
701 		    (!dev->device.devargs ||
702 		     dev->device.devargs->bus !=
703 		     rte_bus_find_by_name("pci"))) {
704 			PMD_INIT_LOG(INFO,
705 				"skip kernel managed virtio device.");
706 			return 1;
707 		}
708 		return -1;
709 	}
710 
711 	virtio_hw_internal[hw->port_id].vtpci_ops = &legacy_ops;
712 	hw->modern   = 0;
713 
714 	return 0;
715 }
716 
717 enum virtio_msix_status
718 vtpci_msix_detect(struct rte_pci_device *dev)
719 {
720 	uint8_t pos;
721 	struct virtio_pci_cap cap;
722 	int ret;
723 
724 	ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
725 	if (ret < 0) {
726 		PMD_INIT_LOG(DEBUG, "failed to read pci capability list");
727 		return VIRTIO_MSIX_NONE;
728 	}
729 
730 	while (pos) {
731 		ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos);
732 		if (ret < 0) {
733 			PMD_INIT_LOG(ERR,
734 				"failed to read pci cap at pos: %x", pos);
735 			break;
736 		}
737 
738 		if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
739 			uint16_t flags = ((uint16_t *)&cap)[1];
740 
741 			if (flags & PCI_MSIX_ENABLE)
742 				return VIRTIO_MSIX_ENABLED;
743 			else
744 				return VIRTIO_MSIX_DISABLED;
745 		}
746 
747 		pos = cap.cap_next;
748 	}
749 
750 	return VIRTIO_MSIX_NONE;
751 }
752