xref: /dpdk/drivers/net/virtio/virtio_pci.c (revision 4e30ead5e7ca886535e2b30632b2948d2aac1681)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdint.h>
34 
35 #ifdef RTE_EXEC_ENV_LINUXAPP
36  #include <dirent.h>
37  #include <fcntl.h>
38 #endif
39 
40 #include <rte_io.h>
41 
42 #include "virtio_pci.h"
43 #include "virtio_logs.h"
44 #include "virtqueue.h"
45 
46 /*
47  * Following macros are derived from linux/pci_regs.h, however,
48  * we can't simply include that header here, as there is no such
49  * file for non-Linux platform.
50  */
51 #define PCI_CAPABILITY_LIST	0x34
52 #define PCI_CAP_ID_VNDR		0x09
53 
54 /*
55  * The remaining space is defined by each driver as the per-driver
56  * configuration space.
57  */
58 #define VIRTIO_PCI_CONFIG(hw) (((hw)->use_msix) ? 24 : 20)
59 
60 static inline int
61 check_vq_phys_addr_ok(struct virtqueue *vq)
62 {
63 	/* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
64 	 * and only accepts 32 bit page frame number.
65 	 * Check if the allocated physical memory exceeds 16TB.
66 	 */
67 	if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >>
68 			(VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
69 		PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
70 		return 0;
71 	}
72 
73 	return 1;
74 }
75 
76 /*
77  * Since we are in legacy mode:
78  * http://ozlabs.org/~rusty/virtio-spec/virtio-0.9.5.pdf
79  *
80  * "Note that this is possible because while the virtio header is PCI (i.e.
81  * little) endian, the device-specific region is encoded in the native endian of
82  * the guest (where such distinction is applicable)."
83  *
84  * For powerpc which supports both, qemu supposes that cpu is big endian and
85  * enforces this for the virtio-net stuff.
86  */
87 static void
88 legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
89 		       void *dst, int length)
90 {
91 #ifdef RTE_ARCH_PPC_64
92 	int size;
93 
94 	while (length > 0) {
95 		if (length >= 4) {
96 			size = 4;
97 			rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size,
98 				VIRTIO_PCI_CONFIG(hw) + offset);
99 			*(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t *)dst);
100 		} else if (length >= 2) {
101 			size = 2;
102 			rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size,
103 				VIRTIO_PCI_CONFIG(hw) + offset);
104 			*(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t *)dst);
105 		} else {
106 			size = 1;
107 			rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size,
108 				VIRTIO_PCI_CONFIG(hw) + offset);
109 		}
110 
111 		dst = (char *)dst + size;
112 		offset += size;
113 		length -= size;
114 	}
115 #else
116 	rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, length,
117 				VIRTIO_PCI_CONFIG(hw) + offset);
118 #endif
119 }
120 
121 static void
122 legacy_write_dev_config(struct virtio_hw *hw, size_t offset,
123 			const void *src, int length)
124 {
125 #ifdef RTE_ARCH_PPC_64
126 	union {
127 		uint32_t u32;
128 		uint16_t u16;
129 	} tmp;
130 	int size;
131 
132 	while (length > 0) {
133 		if (length >= 4) {
134 			size = 4;
135 			tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src);
136 			rte_eal_pci_ioport_write(VTPCI_IO(hw), &tmp.u32, size,
137 				VIRTIO_PCI_CONFIG(hw) + offset);
138 		} else if (length >= 2) {
139 			size = 2;
140 			tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src);
141 			rte_eal_pci_ioport_write(VTPCI_IO(hw), &tmp.u16, size,
142 				VIRTIO_PCI_CONFIG(hw) + offset);
143 		} else {
144 			size = 1;
145 			rte_eal_pci_ioport_write(VTPCI_IO(hw), src, size,
146 				VIRTIO_PCI_CONFIG(hw) + offset);
147 		}
148 
149 		src = (const char *)src + size;
150 		offset += size;
151 		length -= size;
152 	}
153 #else
154 	rte_eal_pci_ioport_write(VTPCI_IO(hw), src, length,
155 				 VIRTIO_PCI_CONFIG(hw) + offset);
156 #endif
157 }
158 
159 static uint64_t
160 legacy_get_features(struct virtio_hw *hw)
161 {
162 	uint32_t dst;
163 
164 	rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 4,
165 				VIRTIO_PCI_HOST_FEATURES);
166 	return dst;
167 }
168 
169 static void
170 legacy_set_features(struct virtio_hw *hw, uint64_t features)
171 {
172 	if ((features >> 32) != 0) {
173 		PMD_DRV_LOG(ERR,
174 			"only 32 bit features are allowed for legacy virtio!");
175 		return;
176 	}
177 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &features, 4,
178 				 VIRTIO_PCI_GUEST_FEATURES);
179 }
180 
181 static uint8_t
182 legacy_get_status(struct virtio_hw *hw)
183 {
184 	uint8_t dst;
185 
186 	rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_STATUS);
187 	return dst;
188 }
189 
190 static void
191 legacy_set_status(struct virtio_hw *hw, uint8_t status)
192 {
193 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &status, 1, VIRTIO_PCI_STATUS);
194 }
195 
196 static void
197 legacy_reset(struct virtio_hw *hw)
198 {
199 	legacy_set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
200 }
201 
202 static uint8_t
203 legacy_get_isr(struct virtio_hw *hw)
204 {
205 	uint8_t dst;
206 
207 	rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_ISR);
208 	return dst;
209 }
210 
211 /* Enable one vector (0) for Link State Intrerrupt */
212 static uint16_t
213 legacy_set_config_irq(struct virtio_hw *hw, uint16_t vec)
214 {
215 	uint16_t dst;
216 
217 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &vec, 2,
218 				 VIRTIO_MSI_CONFIG_VECTOR);
219 	rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2,
220 				VIRTIO_MSI_CONFIG_VECTOR);
221 	return dst;
222 }
223 
224 static uint16_t
225 legacy_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec)
226 {
227 	uint16_t dst;
228 
229 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
230 				 VIRTIO_PCI_QUEUE_SEL);
231 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &vec, 2,
232 				 VIRTIO_MSI_QUEUE_VECTOR);
233 	rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_QUEUE_VECTOR);
234 	return dst;
235 }
236 
237 static uint16_t
238 legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
239 {
240 	uint16_t dst;
241 
242 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &queue_id, 2,
243 				 VIRTIO_PCI_QUEUE_SEL);
244 	rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_PCI_QUEUE_NUM);
245 	return dst;
246 }
247 
248 static int
249 legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
250 {
251 	uint32_t src;
252 
253 	if (!check_vq_phys_addr_ok(vq))
254 		return -1;
255 
256 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
257 			 VIRTIO_PCI_QUEUE_SEL);
258 	src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
259 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
260 
261 	return 0;
262 }
263 
264 static void
265 legacy_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
266 {
267 	uint32_t src = 0;
268 
269 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
270 			 VIRTIO_PCI_QUEUE_SEL);
271 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
272 }
273 
274 static void
275 legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
276 {
277 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
278 			 VIRTIO_PCI_QUEUE_NOTIFY);
279 }
280 
281 #ifdef RTE_EXEC_ENV_LINUXAPP
282 static int
283 legacy_virtio_has_msix(const struct rte_pci_addr *loc)
284 {
285 	DIR *d;
286 	char dirname[PATH_MAX];
287 
288 	snprintf(dirname, sizeof(dirname),
289 		     "%s/" PCI_PRI_FMT "/msi_irqs", pci_get_sysfs_path(),
290 		     loc->domain, loc->bus, loc->devid, loc->function);
291 
292 	d = opendir(dirname);
293 	if (d)
294 		closedir(d);
295 
296 	return d != NULL;
297 }
298 #else
299 static int
300 legacy_virtio_has_msix(const struct rte_pci_addr *loc __rte_unused)
301 {
302 	/* nic_uio does not enable interrupts, return 0 (false). */
303 	return 0;
304 }
305 #endif
306 
307 static int
308 legacy_virtio_resource_init(struct rte_pci_device *pci_dev,
309 			    struct virtio_hw *hw, uint32_t *dev_flags)
310 {
311 	if (rte_eal_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0)
312 		return -1;
313 
314 	if (pci_dev->intr_handle.type != RTE_INTR_HANDLE_UNKNOWN)
315 		*dev_flags |= RTE_ETH_DEV_INTR_LSC;
316 	else
317 		*dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
318 
319 	return 0;
320 }
321 
322 const struct virtio_pci_ops legacy_ops = {
323 	.read_dev_cfg	= legacy_read_dev_config,
324 	.write_dev_cfg	= legacy_write_dev_config,
325 	.reset		= legacy_reset,
326 	.get_status	= legacy_get_status,
327 	.set_status	= legacy_set_status,
328 	.get_features	= legacy_get_features,
329 	.set_features	= legacy_set_features,
330 	.get_isr	= legacy_get_isr,
331 	.set_config_irq	= legacy_set_config_irq,
332 	.set_queue_irq  = legacy_set_queue_irq,
333 	.get_queue_num	= legacy_get_queue_num,
334 	.setup_queue	= legacy_setup_queue,
335 	.del_queue	= legacy_del_queue,
336 	.notify_queue	= legacy_notify_queue,
337 };
338 
339 static inline void
340 io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
341 {
342 	rte_write32(val & ((1ULL << 32) - 1), lo);
343 	rte_write32(val >> 32,		     hi);
344 }
345 
346 static void
347 modern_read_dev_config(struct virtio_hw *hw, size_t offset,
348 		       void *dst, int length)
349 {
350 	int i;
351 	uint8_t *p;
352 	uint8_t old_gen, new_gen;
353 
354 	do {
355 		old_gen = rte_read8(&hw->common_cfg->config_generation);
356 
357 		p = dst;
358 		for (i = 0;  i < length; i++)
359 			*p++ = rte_read8((uint8_t *)hw->dev_cfg + offset + i);
360 
361 		new_gen = rte_read8(&hw->common_cfg->config_generation);
362 	} while (old_gen != new_gen);
363 }
364 
365 static void
366 modern_write_dev_config(struct virtio_hw *hw, size_t offset,
367 			const void *src, int length)
368 {
369 	int i;
370 	const uint8_t *p = src;
371 
372 	for (i = 0;  i < length; i++)
373 		rte_write8((*p++), (((uint8_t *)hw->dev_cfg) + offset + i));
374 }
375 
376 static uint64_t
377 modern_get_features(struct virtio_hw *hw)
378 {
379 	uint32_t features_lo, features_hi;
380 
381 	rte_write32(0, &hw->common_cfg->device_feature_select);
382 	features_lo = rte_read32(&hw->common_cfg->device_feature);
383 
384 	rte_write32(1, &hw->common_cfg->device_feature_select);
385 	features_hi = rte_read32(&hw->common_cfg->device_feature);
386 
387 	return ((uint64_t)features_hi << 32) | features_lo;
388 }
389 
390 static void
391 modern_set_features(struct virtio_hw *hw, uint64_t features)
392 {
393 	rte_write32(0, &hw->common_cfg->guest_feature_select);
394 	rte_write32(features & ((1ULL << 32) - 1),
395 		    &hw->common_cfg->guest_feature);
396 
397 	rte_write32(1, &hw->common_cfg->guest_feature_select);
398 	rte_write32(features >> 32,
399 		    &hw->common_cfg->guest_feature);
400 }
401 
402 static uint8_t
403 modern_get_status(struct virtio_hw *hw)
404 {
405 	return rte_read8(&hw->common_cfg->device_status);
406 }
407 
408 static void
409 modern_set_status(struct virtio_hw *hw, uint8_t status)
410 {
411 	rte_write8(status, &hw->common_cfg->device_status);
412 }
413 
414 static void
415 modern_reset(struct virtio_hw *hw)
416 {
417 	modern_set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
418 	modern_get_status(hw);
419 }
420 
421 static uint8_t
422 modern_get_isr(struct virtio_hw *hw)
423 {
424 	return rte_read8(hw->isr);
425 }
426 
427 static uint16_t
428 modern_set_config_irq(struct virtio_hw *hw, uint16_t vec)
429 {
430 	rte_write16(vec, &hw->common_cfg->msix_config);
431 	return rte_read16(&hw->common_cfg->msix_config);
432 }
433 
434 static uint16_t
435 modern_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec)
436 {
437 	rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
438 	rte_write16(vec, &hw->common_cfg->queue_msix_vector);
439 	return rte_read16(&hw->common_cfg->queue_msix_vector);
440 }
441 
442 static uint16_t
443 modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
444 {
445 	rte_write16(queue_id, &hw->common_cfg->queue_select);
446 	return rte_read16(&hw->common_cfg->queue_size);
447 }
448 
449 static int
450 modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
451 {
452 	uint64_t desc_addr, avail_addr, used_addr;
453 	uint16_t notify_off;
454 
455 	if (!check_vq_phys_addr_ok(vq))
456 		return -1;
457 
458 	desc_addr = vq->vq_ring_mem;
459 	avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
460 	used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
461 							 ring[vq->vq_nentries]),
462 				   VIRTIO_PCI_VRING_ALIGN);
463 
464 	rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
465 
466 	io_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo,
467 				      &hw->common_cfg->queue_desc_hi);
468 	io_write64_twopart(avail_addr, &hw->common_cfg->queue_avail_lo,
469 				       &hw->common_cfg->queue_avail_hi);
470 	io_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo,
471 				      &hw->common_cfg->queue_used_hi);
472 
473 	notify_off = rte_read16(&hw->common_cfg->queue_notify_off);
474 	vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
475 				notify_off * hw->notify_off_multiplier);
476 
477 	rte_write16(1, &hw->common_cfg->queue_enable);
478 
479 	PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
480 	PMD_INIT_LOG(DEBUG, "\t desc_addr: %" PRIx64, desc_addr);
481 	PMD_INIT_LOG(DEBUG, "\t aval_addr: %" PRIx64, avail_addr);
482 	PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr);
483 	PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)",
484 		vq->notify_addr, notify_off);
485 
486 	return 0;
487 }
488 
489 static void
490 modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
491 {
492 	rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
493 
494 	io_write64_twopart(0, &hw->common_cfg->queue_desc_lo,
495 				  &hw->common_cfg->queue_desc_hi);
496 	io_write64_twopart(0, &hw->common_cfg->queue_avail_lo,
497 				  &hw->common_cfg->queue_avail_hi);
498 	io_write64_twopart(0, &hw->common_cfg->queue_used_lo,
499 				  &hw->common_cfg->queue_used_hi);
500 
501 	rte_write16(0, &hw->common_cfg->queue_enable);
502 }
503 
504 static void
505 modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq)
506 {
507 	rte_write16(vq->vq_queue_index, vq->notify_addr);
508 }
509 
510 const struct virtio_pci_ops modern_ops = {
511 	.read_dev_cfg	= modern_read_dev_config,
512 	.write_dev_cfg	= modern_write_dev_config,
513 	.reset		= modern_reset,
514 	.get_status	= modern_get_status,
515 	.set_status	= modern_set_status,
516 	.get_features	= modern_get_features,
517 	.set_features	= modern_set_features,
518 	.get_isr	= modern_get_isr,
519 	.set_config_irq	= modern_set_config_irq,
520 	.set_queue_irq  = modern_set_queue_irq,
521 	.get_queue_num	= modern_get_queue_num,
522 	.setup_queue	= modern_setup_queue,
523 	.del_queue	= modern_del_queue,
524 	.notify_queue	= modern_notify_queue,
525 };
526 
527 
528 void
529 vtpci_read_dev_config(struct virtio_hw *hw, size_t offset,
530 		      void *dst, int length)
531 {
532 	VTPCI_OPS(hw)->read_dev_cfg(hw, offset, dst, length);
533 }
534 
535 void
536 vtpci_write_dev_config(struct virtio_hw *hw, size_t offset,
537 		       const void *src, int length)
538 {
539 	VTPCI_OPS(hw)->write_dev_cfg(hw, offset, src, length);
540 }
541 
542 uint64_t
543 vtpci_negotiate_features(struct virtio_hw *hw, uint64_t host_features)
544 {
545 	uint64_t features;
546 
547 	/*
548 	 * Limit negotiated features to what the driver, virtqueue, and
549 	 * host all support.
550 	 */
551 	features = host_features & hw->guest_features;
552 	VTPCI_OPS(hw)->set_features(hw, features);
553 
554 	return features;
555 }
556 
557 void
558 vtpci_reset(struct virtio_hw *hw)
559 {
560 	VTPCI_OPS(hw)->set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
561 	/* flush status write */
562 	VTPCI_OPS(hw)->get_status(hw);
563 }
564 
565 void
566 vtpci_reinit_complete(struct virtio_hw *hw)
567 {
568 	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK);
569 }
570 
571 void
572 vtpci_set_status(struct virtio_hw *hw, uint8_t status)
573 {
574 	if (status != VIRTIO_CONFIG_STATUS_RESET)
575 		status |= VTPCI_OPS(hw)->get_status(hw);
576 
577 	VTPCI_OPS(hw)->set_status(hw, status);
578 }
579 
580 uint8_t
581 vtpci_get_status(struct virtio_hw *hw)
582 {
583 	return VTPCI_OPS(hw)->get_status(hw);
584 }
585 
586 uint8_t
587 vtpci_isr(struct virtio_hw *hw)
588 {
589 	return VTPCI_OPS(hw)->get_isr(hw);
590 }
591 
592 static void *
593 get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
594 {
595 	uint8_t  bar    = cap->bar;
596 	uint32_t length = cap->length;
597 	uint32_t offset = cap->offset;
598 	uint8_t *base;
599 
600 	if (bar > 5) {
601 		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
602 		return NULL;
603 	}
604 
605 	if (offset + length < offset) {
606 		PMD_INIT_LOG(ERR, "offset(%u) + length(%u) overflows",
607 			offset, length);
608 		return NULL;
609 	}
610 
611 	if (offset + length > dev->mem_resource[bar].len) {
612 		PMD_INIT_LOG(ERR,
613 			"invalid cap: overflows bar space: %u > %" PRIu64,
614 			offset + length, dev->mem_resource[bar].len);
615 		return NULL;
616 	}
617 
618 	base = dev->mem_resource[bar].addr;
619 	if (base == NULL) {
620 		PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
621 		return NULL;
622 	}
623 
624 	return base + offset;
625 }
626 
627 static int
628 virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
629 {
630 	uint8_t pos;
631 	struct virtio_pci_cap cap;
632 	int ret;
633 
634 	if (rte_eal_pci_map_device(dev)) {
635 		PMD_INIT_LOG(DEBUG, "failed to map pci device!");
636 		return -1;
637 	}
638 
639 	ret = rte_eal_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
640 	if (ret < 0) {
641 		PMD_INIT_LOG(DEBUG, "failed to read pci capability list");
642 		return -1;
643 	}
644 
645 	while (pos) {
646 		ret = rte_eal_pci_read_config(dev, &cap, sizeof(cap), pos);
647 		if (ret < 0) {
648 			PMD_INIT_LOG(ERR,
649 				"failed to read pci cap at pos: %x", pos);
650 			break;
651 		}
652 
653 		if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
654 			PMD_INIT_LOG(DEBUG,
655 				"[%2x] skipping non VNDR cap id: %02x",
656 				pos, cap.cap_vndr);
657 			goto next;
658 		}
659 
660 		PMD_INIT_LOG(DEBUG,
661 			"[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
662 			pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
663 
664 		switch (cap.cfg_type) {
665 		case VIRTIO_PCI_CAP_COMMON_CFG:
666 			hw->common_cfg = get_cfg_addr(dev, &cap);
667 			break;
668 		case VIRTIO_PCI_CAP_NOTIFY_CFG:
669 			rte_eal_pci_read_config(dev, &hw->notify_off_multiplier,
670 						4, pos + sizeof(cap));
671 			hw->notify_base = get_cfg_addr(dev, &cap);
672 			break;
673 		case VIRTIO_PCI_CAP_DEVICE_CFG:
674 			hw->dev_cfg = get_cfg_addr(dev, &cap);
675 			break;
676 		case VIRTIO_PCI_CAP_ISR_CFG:
677 			hw->isr = get_cfg_addr(dev, &cap);
678 			break;
679 		}
680 
681 next:
682 		pos = cap.cap_next;
683 	}
684 
685 	if (hw->common_cfg == NULL || hw->notify_base == NULL ||
686 	    hw->dev_cfg == NULL    || hw->isr == NULL) {
687 		PMD_INIT_LOG(INFO, "no modern virtio pci device found.");
688 		return -1;
689 	}
690 
691 	PMD_INIT_LOG(INFO, "found modern virtio pci device.");
692 
693 	PMD_INIT_LOG(DEBUG, "common cfg mapped at: %p", hw->common_cfg);
694 	PMD_INIT_LOG(DEBUG, "device cfg mapped at: %p", hw->dev_cfg);
695 	PMD_INIT_LOG(DEBUG, "isr cfg mapped at: %p", hw->isr);
696 	PMD_INIT_LOG(DEBUG, "notify base: %p, notify off multiplier: %u",
697 		hw->notify_base, hw->notify_off_multiplier);
698 
699 	return 0;
700 }
701 
702 /*
703  * Return -1:
704  *   if there is error mapping with VFIO/UIO.
705  *   if port map error when driver type is KDRV_NONE.
706  *   if whitelisted but driver type is KDRV_UNKNOWN.
707  * Return 1 if kernel driver is managing the device.
708  * Return 0 on success.
709  */
710 int
711 vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw,
712 	   uint32_t *dev_flags)
713 {
714 	/*
715 	 * Try if we can succeed reading virtio pci caps, which exists
716 	 * only on modern pci device. If failed, we fallback to legacy
717 	 * virtio handling.
718 	 */
719 	if (virtio_read_caps(dev, hw) == 0) {
720 		PMD_INIT_LOG(INFO, "modern virtio pci detected.");
721 		virtio_hw_internal[hw->port_id].vtpci_ops = &modern_ops;
722 		hw->modern = 1;
723 		*dev_flags |= RTE_ETH_DEV_INTR_LSC;
724 		return 0;
725 	}
726 
727 	PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
728 	if (legacy_virtio_resource_init(dev, hw, dev_flags) < 0) {
729 		if (dev->kdrv == RTE_KDRV_UNKNOWN &&
730 		    (!dev->device.devargs ||
731 		     dev->device.devargs->type !=
732 			RTE_DEVTYPE_WHITELISTED_PCI)) {
733 			PMD_INIT_LOG(INFO,
734 				"skip kernel managed virtio device.");
735 			return 1;
736 		}
737 		return -1;
738 	}
739 
740 	virtio_hw_internal[hw->port_id].vtpci_ops = &legacy_ops;
741 	hw->use_msix = legacy_virtio_has_msix(&dev->addr);
742 	hw->modern   = 0;
743 
744 	return 0;
745 }
746