xref: /dpdk/drivers/net/virtio/virtio_pci.c (revision 0e2db9d3cd6499374326f48e57b6abc5d28116a5)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdint.h>
34 
35 #ifdef RTE_EXEC_ENV_LINUXAPP
36  #include <dirent.h>
37  #include <fcntl.h>
38 #endif
39 
40 #include "virtio_pci.h"
41 #include "virtio_logs.h"
42 #include "virtqueue.h"
43 
44 /*
45  * Following macros are derived from linux/pci_regs.h, however,
46  * we can't simply include that header here, as there is no such
47  * file for non-Linux platform.
48  */
49 #define PCI_CAPABILITY_LIST	0x34
50 #define PCI_CAP_ID_VNDR		0x09
51 
52 /*
53  * The remaining space is defined by each driver as the per-driver
54  * configuration space.
55  */
56 #define VIRTIO_PCI_CONFIG(hw) (((hw)->use_msix) ? 24 : 20)
57 
58 static inline int
59 check_vq_phys_addr_ok(struct virtqueue *vq)
60 {
61 	/* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
62 	 * and only accepts 32 bit page frame number.
63 	 * Check if the allocated physical memory exceeds 16TB.
64 	 */
65 	if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >>
66 			(VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
67 		PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
68 		return 0;
69 	}
70 
71 	return 1;
72 }
73 
74 /*
75  * Since we are in legacy mode:
76  * http://ozlabs.org/~rusty/virtio-spec/virtio-0.9.5.pdf
77  *
78  * "Note that this is possible because while the virtio header is PCI (i.e.
79  * little) endian, the device-specific region is encoded in the native endian of
80  * the guest (where such distinction is applicable)."
81  *
82  * For powerpc which supports both, qemu supposes that cpu is big endian and
83  * enforces this for the virtio-net stuff.
84  */
85 static void
86 legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
87 		       void *dst, int length)
88 {
89 #ifdef RTE_ARCH_PPC_64
90 	int size;
91 
92 	while (length > 0) {
93 		if (length >= 4) {
94 			size = 4;
95 			rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size,
96 				VIRTIO_PCI_CONFIG(hw) + offset);
97 			*(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t *)dst);
98 		} else if (length >= 2) {
99 			size = 2;
100 			rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size,
101 				VIRTIO_PCI_CONFIG(hw) + offset);
102 			*(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t *)dst);
103 		} else {
104 			size = 1;
105 			rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size,
106 				VIRTIO_PCI_CONFIG(hw) + offset);
107 		}
108 
109 		dst = (char *)dst + size;
110 		offset += size;
111 		length -= size;
112 	}
113 #else
114 	rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, length,
115 				VIRTIO_PCI_CONFIG(hw) + offset);
116 #endif
117 }
118 
119 static void
120 legacy_write_dev_config(struct virtio_hw *hw, size_t offset,
121 			const void *src, int length)
122 {
123 #ifdef RTE_ARCH_PPC_64
124 	union {
125 		uint32_t u32;
126 		uint16_t u16;
127 	} tmp;
128 	int size;
129 
130 	while (length > 0) {
131 		if (length >= 4) {
132 			size = 4;
133 			tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src);
134 			rte_eal_pci_ioport_write(VTPCI_IO(hw), &tmp.u32, size,
135 				VIRTIO_PCI_CONFIG(hw) + offset);
136 		} else if (length >= 2) {
137 			size = 2;
138 			tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src);
139 			rte_eal_pci_ioport_write(VTPCI_IO(hw), &tmp.u16, size,
140 				VIRTIO_PCI_CONFIG(hw) + offset);
141 		} else {
142 			size = 1;
143 			rte_eal_pci_ioport_write(VTPCI_IO(hw), src, size,
144 				VIRTIO_PCI_CONFIG(hw) + offset);
145 		}
146 
147 		src = (const char *)src + size;
148 		offset += size;
149 		length -= size;
150 	}
151 #else
152 	rte_eal_pci_ioport_write(VTPCI_IO(hw), src, length,
153 				 VIRTIO_PCI_CONFIG(hw) + offset);
154 #endif
155 }
156 
157 static uint64_t
158 legacy_get_features(struct virtio_hw *hw)
159 {
160 	uint32_t dst;
161 
162 	rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 4,
163 				VIRTIO_PCI_HOST_FEATURES);
164 	return dst;
165 }
166 
167 static void
168 legacy_set_features(struct virtio_hw *hw, uint64_t features)
169 {
170 	if ((features >> 32) != 0) {
171 		PMD_DRV_LOG(ERR,
172 			"only 32 bit features are allowed for legacy virtio!");
173 		return;
174 	}
175 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &features, 4,
176 				 VIRTIO_PCI_GUEST_FEATURES);
177 }
178 
179 static uint8_t
180 legacy_get_status(struct virtio_hw *hw)
181 {
182 	uint8_t dst;
183 
184 	rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_STATUS);
185 	return dst;
186 }
187 
188 static void
189 legacy_set_status(struct virtio_hw *hw, uint8_t status)
190 {
191 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &status, 1, VIRTIO_PCI_STATUS);
192 }
193 
194 static void
195 legacy_reset(struct virtio_hw *hw)
196 {
197 	legacy_set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
198 }
199 
200 static uint8_t
201 legacy_get_isr(struct virtio_hw *hw)
202 {
203 	uint8_t dst;
204 
205 	rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_ISR);
206 	return dst;
207 }
208 
209 /* Enable one vector (0) for Link State Intrerrupt */
210 static uint16_t
211 legacy_set_config_irq(struct virtio_hw *hw, uint16_t vec)
212 {
213 	uint16_t dst;
214 
215 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &vec, 2,
216 				 VIRTIO_MSI_CONFIG_VECTOR);
217 	rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2,
218 				VIRTIO_MSI_CONFIG_VECTOR);
219 	return dst;
220 }
221 
222 static uint16_t
223 legacy_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec)
224 {
225 	uint16_t dst;
226 
227 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
228 				 VIRTIO_PCI_QUEUE_SEL);
229 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &vec, 2,
230 				 VIRTIO_MSI_QUEUE_VECTOR);
231 	rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_QUEUE_VECTOR);
232 	return dst;
233 }
234 
235 static uint16_t
236 legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
237 {
238 	uint16_t dst;
239 
240 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &queue_id, 2,
241 				 VIRTIO_PCI_QUEUE_SEL);
242 	rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_PCI_QUEUE_NUM);
243 	return dst;
244 }
245 
246 static int
247 legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
248 {
249 	uint32_t src;
250 
251 	if (!check_vq_phys_addr_ok(vq))
252 		return -1;
253 
254 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
255 			 VIRTIO_PCI_QUEUE_SEL);
256 	src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
257 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
258 
259 	return 0;
260 }
261 
262 static void
263 legacy_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
264 {
265 	uint32_t src = 0;
266 
267 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
268 			 VIRTIO_PCI_QUEUE_SEL);
269 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
270 }
271 
272 static void
273 legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
274 {
275 	rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
276 			 VIRTIO_PCI_QUEUE_NOTIFY);
277 }
278 
279 #ifdef RTE_EXEC_ENV_LINUXAPP
280 static int
281 legacy_virtio_has_msix(const struct rte_pci_addr *loc)
282 {
283 	DIR *d;
284 	char dirname[PATH_MAX];
285 
286 	snprintf(dirname, sizeof(dirname),
287 		     "%s/" PCI_PRI_FMT "/msi_irqs", pci_get_sysfs_path(),
288 		     loc->domain, loc->bus, loc->devid, loc->function);
289 
290 	d = opendir(dirname);
291 	if (d)
292 		closedir(d);
293 
294 	return d != NULL;
295 }
296 #else
297 static int
298 legacy_virtio_has_msix(const struct rte_pci_addr *loc __rte_unused)
299 {
300 	/* nic_uio does not enable interrupts, return 0 (false). */
301 	return 0;
302 }
303 #endif
304 
305 static int
306 legacy_virtio_resource_init(struct rte_pci_device *pci_dev,
307 			    struct virtio_hw *hw, uint32_t *dev_flags)
308 {
309 	if (rte_eal_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0)
310 		return -1;
311 
312 	if (pci_dev->intr_handle.type != RTE_INTR_HANDLE_UNKNOWN)
313 		*dev_flags |= RTE_ETH_DEV_INTR_LSC;
314 	else
315 		*dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
316 
317 	return 0;
318 }
319 
320 const struct virtio_pci_ops legacy_ops = {
321 	.read_dev_cfg	= legacy_read_dev_config,
322 	.write_dev_cfg	= legacy_write_dev_config,
323 	.reset		= legacy_reset,
324 	.get_status	= legacy_get_status,
325 	.set_status	= legacy_set_status,
326 	.get_features	= legacy_get_features,
327 	.set_features	= legacy_set_features,
328 	.get_isr	= legacy_get_isr,
329 	.set_config_irq	= legacy_set_config_irq,
330 	.set_queue_irq  = legacy_set_queue_irq,
331 	.get_queue_num	= legacy_get_queue_num,
332 	.setup_queue	= legacy_setup_queue,
333 	.del_queue	= legacy_del_queue,
334 	.notify_queue	= legacy_notify_queue,
335 };
336 
337 
338 static inline uint8_t
339 io_read8(uint8_t *addr)
340 {
341 	return *(volatile uint8_t *)addr;
342 }
343 
344 static inline void
345 io_write8(uint8_t val, uint8_t *addr)
346 {
347 	*(volatile uint8_t *)addr = val;
348 }
349 
350 static inline uint16_t
351 io_read16(uint16_t *addr)
352 {
353 	return *(volatile uint16_t *)addr;
354 }
355 
356 static inline void
357 io_write16(uint16_t val, uint16_t *addr)
358 {
359 	*(volatile uint16_t *)addr = val;
360 }
361 
362 static inline uint32_t
363 io_read32(uint32_t *addr)
364 {
365 	return *(volatile uint32_t *)addr;
366 }
367 
368 static inline void
369 io_write32(uint32_t val, uint32_t *addr)
370 {
371 	*(volatile uint32_t *)addr = val;
372 }
373 
374 static inline void
375 io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
376 {
377 	io_write32(val & ((1ULL << 32) - 1), lo);
378 	io_write32(val >> 32,		     hi);
379 }
380 
381 static void
382 modern_read_dev_config(struct virtio_hw *hw, size_t offset,
383 		       void *dst, int length)
384 {
385 	int i;
386 	uint8_t *p;
387 	uint8_t old_gen, new_gen;
388 
389 	do {
390 		old_gen = io_read8(&hw->common_cfg->config_generation);
391 
392 		p = dst;
393 		for (i = 0;  i < length; i++)
394 			*p++ = io_read8((uint8_t *)hw->dev_cfg + offset + i);
395 
396 		new_gen = io_read8(&hw->common_cfg->config_generation);
397 	} while (old_gen != new_gen);
398 }
399 
400 static void
401 modern_write_dev_config(struct virtio_hw *hw, size_t offset,
402 			const void *src, int length)
403 {
404 	int i;
405 	const uint8_t *p = src;
406 
407 	for (i = 0;  i < length; i++)
408 		io_write8(*p++, (uint8_t *)hw->dev_cfg + offset + i);
409 }
410 
411 static uint64_t
412 modern_get_features(struct virtio_hw *hw)
413 {
414 	uint32_t features_lo, features_hi;
415 
416 	io_write32(0, &hw->common_cfg->device_feature_select);
417 	features_lo = io_read32(&hw->common_cfg->device_feature);
418 
419 	io_write32(1, &hw->common_cfg->device_feature_select);
420 	features_hi = io_read32(&hw->common_cfg->device_feature);
421 
422 	return ((uint64_t)features_hi << 32) | features_lo;
423 }
424 
425 static void
426 modern_set_features(struct virtio_hw *hw, uint64_t features)
427 {
428 	io_write32(0, &hw->common_cfg->guest_feature_select);
429 	io_write32(features & ((1ULL << 32) - 1),
430 		&hw->common_cfg->guest_feature);
431 
432 	io_write32(1, &hw->common_cfg->guest_feature_select);
433 	io_write32(features >> 32,
434 		&hw->common_cfg->guest_feature);
435 }
436 
437 static uint8_t
438 modern_get_status(struct virtio_hw *hw)
439 {
440 	return io_read8(&hw->common_cfg->device_status);
441 }
442 
443 static void
444 modern_set_status(struct virtio_hw *hw, uint8_t status)
445 {
446 	io_write8(status, &hw->common_cfg->device_status);
447 }
448 
449 static void
450 modern_reset(struct virtio_hw *hw)
451 {
452 	modern_set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
453 	modern_get_status(hw);
454 }
455 
456 static uint8_t
457 modern_get_isr(struct virtio_hw *hw)
458 {
459 	return io_read8(hw->isr);
460 }
461 
462 static uint16_t
463 modern_set_config_irq(struct virtio_hw *hw, uint16_t vec)
464 {
465 	io_write16(vec, &hw->common_cfg->msix_config);
466 	return io_read16(&hw->common_cfg->msix_config);
467 }
468 
469 static uint16_t
470 modern_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec)
471 {
472 	io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
473 	io_write16(vec, &hw->common_cfg->queue_msix_vector);
474 	return io_read16(&hw->common_cfg->queue_msix_vector);
475 }
476 
477 static uint16_t
478 modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
479 {
480 	io_write16(queue_id, &hw->common_cfg->queue_select);
481 	return io_read16(&hw->common_cfg->queue_size);
482 }
483 
484 static int
485 modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
486 {
487 	uint64_t desc_addr, avail_addr, used_addr;
488 	uint16_t notify_off;
489 
490 	if (!check_vq_phys_addr_ok(vq))
491 		return -1;
492 
493 	desc_addr = vq->vq_ring_mem;
494 	avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
495 	used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
496 							 ring[vq->vq_nentries]),
497 				   VIRTIO_PCI_VRING_ALIGN);
498 
499 	io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
500 
501 	io_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo,
502 				      &hw->common_cfg->queue_desc_hi);
503 	io_write64_twopart(avail_addr, &hw->common_cfg->queue_avail_lo,
504 				       &hw->common_cfg->queue_avail_hi);
505 	io_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo,
506 				      &hw->common_cfg->queue_used_hi);
507 
508 	notify_off = io_read16(&hw->common_cfg->queue_notify_off);
509 	vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
510 				notify_off * hw->notify_off_multiplier);
511 
512 	io_write16(1, &hw->common_cfg->queue_enable);
513 
514 	PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
515 	PMD_INIT_LOG(DEBUG, "\t desc_addr: %" PRIx64, desc_addr);
516 	PMD_INIT_LOG(DEBUG, "\t aval_addr: %" PRIx64, avail_addr);
517 	PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr);
518 	PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)",
519 		vq->notify_addr, notify_off);
520 
521 	return 0;
522 }
523 
524 static void
525 modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
526 {
527 	io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
528 
529 	io_write64_twopart(0, &hw->common_cfg->queue_desc_lo,
530 				  &hw->common_cfg->queue_desc_hi);
531 	io_write64_twopart(0, &hw->common_cfg->queue_avail_lo,
532 				  &hw->common_cfg->queue_avail_hi);
533 	io_write64_twopart(0, &hw->common_cfg->queue_used_lo,
534 				  &hw->common_cfg->queue_used_hi);
535 
536 	io_write16(0, &hw->common_cfg->queue_enable);
537 }
538 
539 static void
540 modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq)
541 {
542 	io_write16(1, vq->notify_addr);
543 }
544 
545 const struct virtio_pci_ops modern_ops = {
546 	.read_dev_cfg	= modern_read_dev_config,
547 	.write_dev_cfg	= modern_write_dev_config,
548 	.reset		= modern_reset,
549 	.get_status	= modern_get_status,
550 	.set_status	= modern_set_status,
551 	.get_features	= modern_get_features,
552 	.set_features	= modern_set_features,
553 	.get_isr	= modern_get_isr,
554 	.set_config_irq	= modern_set_config_irq,
555 	.set_queue_irq  = modern_set_queue_irq,
556 	.get_queue_num	= modern_get_queue_num,
557 	.setup_queue	= modern_setup_queue,
558 	.del_queue	= modern_del_queue,
559 	.notify_queue	= modern_notify_queue,
560 };
561 
562 
563 void
564 vtpci_read_dev_config(struct virtio_hw *hw, size_t offset,
565 		      void *dst, int length)
566 {
567 	VTPCI_OPS(hw)->read_dev_cfg(hw, offset, dst, length);
568 }
569 
570 void
571 vtpci_write_dev_config(struct virtio_hw *hw, size_t offset,
572 		       const void *src, int length)
573 {
574 	VTPCI_OPS(hw)->write_dev_cfg(hw, offset, src, length);
575 }
576 
577 uint64_t
578 vtpci_negotiate_features(struct virtio_hw *hw, uint64_t host_features)
579 {
580 	uint64_t features;
581 
582 	/*
583 	 * Limit negotiated features to what the driver, virtqueue, and
584 	 * host all support.
585 	 */
586 	features = host_features & hw->guest_features;
587 	VTPCI_OPS(hw)->set_features(hw, features);
588 
589 	return features;
590 }
591 
592 void
593 vtpci_reset(struct virtio_hw *hw)
594 {
595 	VTPCI_OPS(hw)->set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
596 	/* flush status write */
597 	VTPCI_OPS(hw)->get_status(hw);
598 }
599 
600 void
601 vtpci_reinit_complete(struct virtio_hw *hw)
602 {
603 	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK);
604 }
605 
606 void
607 vtpci_set_status(struct virtio_hw *hw, uint8_t status)
608 {
609 	if (status != VIRTIO_CONFIG_STATUS_RESET)
610 		status |= VTPCI_OPS(hw)->get_status(hw);
611 
612 	VTPCI_OPS(hw)->set_status(hw, status);
613 }
614 
615 uint8_t
616 vtpci_get_status(struct virtio_hw *hw)
617 {
618 	return VTPCI_OPS(hw)->get_status(hw);
619 }
620 
621 uint8_t
622 vtpci_isr(struct virtio_hw *hw)
623 {
624 	return VTPCI_OPS(hw)->get_isr(hw);
625 }
626 
627 static void *
628 get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
629 {
630 	uint8_t  bar    = cap->bar;
631 	uint32_t length = cap->length;
632 	uint32_t offset = cap->offset;
633 	uint8_t *base;
634 
635 	if (bar > 5) {
636 		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
637 		return NULL;
638 	}
639 
640 	if (offset + length < offset) {
641 		PMD_INIT_LOG(ERR, "offset(%u) + length(%u) overflows",
642 			offset, length);
643 		return NULL;
644 	}
645 
646 	if (offset + length > dev->mem_resource[bar].len) {
647 		PMD_INIT_LOG(ERR,
648 			"invalid cap: overflows bar space: %u > %" PRIu64,
649 			offset + length, dev->mem_resource[bar].len);
650 		return NULL;
651 	}
652 
653 	base = dev->mem_resource[bar].addr;
654 	if (base == NULL) {
655 		PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
656 		return NULL;
657 	}
658 
659 	return base + offset;
660 }
661 
662 static int
663 virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
664 {
665 	uint8_t pos;
666 	struct virtio_pci_cap cap;
667 	int ret;
668 
669 	if (rte_eal_pci_map_device(dev)) {
670 		PMD_INIT_LOG(DEBUG, "failed to map pci device!");
671 		return -1;
672 	}
673 
674 	ret = rte_eal_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
675 	if (ret < 0) {
676 		PMD_INIT_LOG(DEBUG, "failed to read pci capability list");
677 		return -1;
678 	}
679 
680 	while (pos) {
681 		ret = rte_eal_pci_read_config(dev, &cap, sizeof(cap), pos);
682 		if (ret < 0) {
683 			PMD_INIT_LOG(ERR,
684 				"failed to read pci cap at pos: %x", pos);
685 			break;
686 		}
687 
688 		if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
689 			PMD_INIT_LOG(DEBUG,
690 				"[%2x] skipping non VNDR cap id: %02x",
691 				pos, cap.cap_vndr);
692 			goto next;
693 		}
694 
695 		PMD_INIT_LOG(DEBUG,
696 			"[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
697 			pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
698 
699 		switch (cap.cfg_type) {
700 		case VIRTIO_PCI_CAP_COMMON_CFG:
701 			hw->common_cfg = get_cfg_addr(dev, &cap);
702 			break;
703 		case VIRTIO_PCI_CAP_NOTIFY_CFG:
704 			rte_eal_pci_read_config(dev, &hw->notify_off_multiplier,
705 						4, pos + sizeof(cap));
706 			hw->notify_base = get_cfg_addr(dev, &cap);
707 			break;
708 		case VIRTIO_PCI_CAP_DEVICE_CFG:
709 			hw->dev_cfg = get_cfg_addr(dev, &cap);
710 			break;
711 		case VIRTIO_PCI_CAP_ISR_CFG:
712 			hw->isr = get_cfg_addr(dev, &cap);
713 			break;
714 		}
715 
716 next:
717 		pos = cap.cap_next;
718 	}
719 
720 	if (hw->common_cfg == NULL || hw->notify_base == NULL ||
721 	    hw->dev_cfg == NULL    || hw->isr == NULL) {
722 		PMD_INIT_LOG(INFO, "no modern virtio pci device found.");
723 		return -1;
724 	}
725 
726 	PMD_INIT_LOG(INFO, "found modern virtio pci device.");
727 
728 	PMD_INIT_LOG(DEBUG, "common cfg mapped at: %p", hw->common_cfg);
729 	PMD_INIT_LOG(DEBUG, "device cfg mapped at: %p", hw->dev_cfg);
730 	PMD_INIT_LOG(DEBUG, "isr cfg mapped at: %p", hw->isr);
731 	PMD_INIT_LOG(DEBUG, "notify base: %p, notify off multiplier: %u",
732 		hw->notify_base, hw->notify_off_multiplier);
733 
734 	return 0;
735 }
736 
737 /*
738  * Return -1:
739  *   if there is error mapping with VFIO/UIO.
740  *   if port map error when driver type is KDRV_NONE.
741  *   if whitelisted but driver type is KDRV_UNKNOWN.
742  * Return 1 if kernel driver is managing the device.
743  * Return 0 on success.
744  */
745 int
746 vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw,
747 	   uint32_t *dev_flags)
748 {
749 	/*
750 	 * Try if we can succeed reading virtio pci caps, which exists
751 	 * only on modern pci device. If failed, we fallback to legacy
752 	 * virtio handling.
753 	 */
754 	if (virtio_read_caps(dev, hw) == 0) {
755 		PMD_INIT_LOG(INFO, "modern virtio pci detected.");
756 		virtio_hw_internal[hw->port_id].vtpci_ops = &modern_ops;
757 		hw->modern = 1;
758 		*dev_flags |= RTE_ETH_DEV_INTR_LSC;
759 		return 0;
760 	}
761 
762 	PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
763 	if (legacy_virtio_resource_init(dev, hw, dev_flags) < 0) {
764 		if (dev->kdrv == RTE_KDRV_UNKNOWN &&
765 		    (!dev->device.devargs ||
766 		     dev->device.devargs->type !=
767 			RTE_DEVTYPE_WHITELISTED_PCI)) {
768 			PMD_INIT_LOG(INFO,
769 				"skip kernel managed virtio device.");
770 			return 1;
771 		}
772 		return -1;
773 	}
774 
775 	virtio_hw_internal[hw->port_id].vtpci_ops = &legacy_ops;
776 	hw->use_msix = legacy_virtio_has_msix(&dev->addr);
777 	hw->modern   = 0;
778 
779 	return 0;
780 }
781