xref: /dpdk/lib/vhost/vhost.h (revision 15677ca2c751b3be2f02429bb006d859dccae0c0)
199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
299a2dd95SBruce Richardson  * Copyright(c) 2010-2018 Intel Corporation
399a2dd95SBruce Richardson  */
499a2dd95SBruce Richardson 
599a2dd95SBruce Richardson #ifndef _VHOST_NET_CDEV_H_
699a2dd95SBruce Richardson #define _VHOST_NET_CDEV_H_
799a2dd95SBruce Richardson #include <stdint.h>
899a2dd95SBruce Richardson #include <stdio.h>
999a2dd95SBruce Richardson #include <stdbool.h>
1072b452c5SDmitry Kozlyuk #include <stdlib.h>
1199a2dd95SBruce Richardson #include <sys/queue.h>
1299a2dd95SBruce Richardson #include <unistd.h>
1399a2dd95SBruce Richardson #include <linux/virtio_net.h>
1499a2dd95SBruce Richardson #include <sys/socket.h>
1599a2dd95SBruce Richardson #include <linux/if.h>
16338ad77cSMike Pattrick #include <sys/mman.h>
1799a2dd95SBruce Richardson 
1899a2dd95SBruce Richardson #include <rte_log.h>
1999a2dd95SBruce Richardson #include <rte_ether.h>
2099a2dd95SBruce Richardson #include <rte_malloc.h>
2153d3f477SJiayu Hu #include <rte_dmadev.h>
2299a2dd95SBruce Richardson 
2399a2dd95SBruce Richardson #include "rte_vhost.h"
2494c16e89SMaxime Coquelin #include "vdpa_driver.h"
2599a2dd95SBruce Richardson 
2699a2dd95SBruce Richardson #include "rte_vhost_async.h"
2799a2dd95SBruce Richardson 
2899a2dd95SBruce Richardson /* Used to indicate that the device is running on a data core */
29ca7036b4SDavid Marchand #define VIRTIO_DEV_RUNNING ((uint32_t)1 << 0)
3099a2dd95SBruce Richardson /* Used to indicate that the device is ready to operate */
31ca7036b4SDavid Marchand #define VIRTIO_DEV_READY ((uint32_t)1 << 1)
3299a2dd95SBruce Richardson /* Used to indicate that the built-in vhost net device backend is enabled */
33ca7036b4SDavid Marchand #define VIRTIO_DEV_BUILTIN_VIRTIO_NET ((uint32_t)1 << 2)
3499a2dd95SBruce Richardson /* Used to indicate that the device has its own data path and configured */
35ca7036b4SDavid Marchand #define VIRTIO_DEV_VDPA_CONFIGURED ((uint32_t)1 << 3)
3699a2dd95SBruce Richardson /* Used to indicate that the feature negotiation failed */
37ca7036b4SDavid Marchand #define VIRTIO_DEV_FEATURES_FAILED ((uint32_t)1 << 4)
38ca7036b4SDavid Marchand /* Used to indicate that the virtio_net tx code should fill TX ol_flags */
39ca7036b4SDavid Marchand #define VIRTIO_DEV_LEGACY_OL_FLAGS ((uint32_t)1 << 5)
40be75dc99SMaxime Coquelin /*  Used to indicate the application has requested statistics collection */
41be75dc99SMaxime Coquelin #define VIRTIO_DEV_STATS_ENABLED ((uint32_t)1 << 6)
421a44f67aSDavid Marchand /*  Used to indicate the application has requested iommu support */
431a44f67aSDavid Marchand #define VIRTIO_DEV_SUPPORT_IOMMU ((uint32_t)1 << 7)
4499a2dd95SBruce Richardson 
4599a2dd95SBruce Richardson /* Backend value set by guest. */
4699a2dd95SBruce Richardson #define VIRTIO_DEV_STOPPED -1
4799a2dd95SBruce Richardson 
4899a2dd95SBruce Richardson #define BUF_VECTOR_MAX 256
4999a2dd95SBruce Richardson 
5099a2dd95SBruce Richardson #define VHOST_LOG_CACHE_NR 32
5199a2dd95SBruce Richardson 
5299a2dd95SBruce Richardson #define MAX_PKT_BURST 32
5399a2dd95SBruce Richardson 
543fe62954SMaxime Coquelin #define VHOST_MAX_ASYNC_IT (MAX_PKT_BURST)
5554105389SMaxime Coquelin #define VHOST_MAX_ASYNC_VEC 2048
5653d3f477SJiayu Hu #define VIRTIO_MAX_RX_PKTLEN 9728U
5753d3f477SJiayu Hu #define VHOST_DMA_MAX_COPY_COMPLETE ((VIRTIO_MAX_RX_PKTLEN / RTE_MBUF_DEFAULT_DATAROOM) \
5853d3f477SJiayu Hu 		* MAX_PKT_BURST)
5999a2dd95SBruce Richardson 
6099a2dd95SBruce Richardson #define PACKED_DESC_ENQUEUE_USED_FLAG(w)	\
6199a2dd95SBruce Richardson 	((w) ? (VRING_DESC_F_AVAIL | VRING_DESC_F_USED | VRING_DESC_F_WRITE) : \
6299a2dd95SBruce Richardson 		VRING_DESC_F_WRITE)
6399a2dd95SBruce Richardson #define PACKED_DESC_DEQUEUE_USED_FLAG(w)	\
6499a2dd95SBruce Richardson 	((w) ? (VRING_DESC_F_AVAIL | VRING_DESC_F_USED) : 0x0)
6599a2dd95SBruce Richardson #define PACKED_DESC_SINGLE_DEQUEUE_FLAG (VRING_DESC_F_NEXT | \
6699a2dd95SBruce Richardson 					 VRING_DESC_F_INDIRECT)
6799a2dd95SBruce Richardson 
6899a2dd95SBruce Richardson #define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \
6999a2dd95SBruce Richardson 			    sizeof(struct vring_packed_desc))
7099a2dd95SBruce Richardson #define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1)
7199a2dd95SBruce Richardson 
7299a2dd95SBruce Richardson #ifdef VHOST_GCC_UNROLL_PRAGMA
7399a2dd95SBruce Richardson #define vhost_for_each_try_unroll(iter, val, size) _Pragma("GCC unroll 4") \
7499a2dd95SBruce Richardson 	for (iter = val; iter < size; iter++)
7599a2dd95SBruce Richardson #endif
7699a2dd95SBruce Richardson 
7799a2dd95SBruce Richardson #ifdef VHOST_CLANG_UNROLL_PRAGMA
7899a2dd95SBruce Richardson #define vhost_for_each_try_unroll(iter, val, size) _Pragma("unroll 4") \
7999a2dd95SBruce Richardson 	for (iter = val; iter < size; iter++)
8099a2dd95SBruce Richardson #endif
8199a2dd95SBruce Richardson 
8299a2dd95SBruce Richardson #ifdef VHOST_ICC_UNROLL_PRAGMA
8399a2dd95SBruce Richardson #define vhost_for_each_try_unroll(iter, val, size) _Pragma("unroll (4)") \
8499a2dd95SBruce Richardson 	for (iter = val; iter < size; iter++)
8599a2dd95SBruce Richardson #endif
8699a2dd95SBruce Richardson 
8799a2dd95SBruce Richardson #ifndef vhost_for_each_try_unroll
8899a2dd95SBruce Richardson #define vhost_for_each_try_unroll(iter, val, num) \
8999a2dd95SBruce Richardson 	for (iter = val; iter < num; iter++)
9099a2dd95SBruce Richardson #endif
9199a2dd95SBruce Richardson 
92357b9359SMaxime Coquelin struct virtio_net;
93a5dd9842SMaxime Coquelin struct vhost_virtqueue;
94a5dd9842SMaxime Coquelin 
955c3e2259SMaxime Coquelin typedef void (*vhost_iotlb_remove_notify)(uint64_t addr, uint64_t off, uint64_t size);
965c3e2259SMaxime Coquelin 
97357b9359SMaxime Coquelin typedef int (*vhost_iotlb_miss_cb)(struct virtio_net *dev, uint64_t iova, uint8_t perm);
98357b9359SMaxime Coquelin 
99a5dd9842SMaxime Coquelin typedef int (*vhost_vring_inject_irq_cb)(struct virtio_net *dev, struct vhost_virtqueue *vq);
10099a2dd95SBruce Richardson /**
1014dbf9316SMaxime Coquelin  * Structure that contains backend-specific ops.
1024dbf9316SMaxime Coquelin  */
1034dbf9316SMaxime Coquelin struct vhost_backend_ops {
1045c3e2259SMaxime Coquelin 	vhost_iotlb_remove_notify iotlb_remove_notify;
105357b9359SMaxime Coquelin 	vhost_iotlb_miss_cb iotlb_miss;
106a5dd9842SMaxime Coquelin 	vhost_vring_inject_irq_cb inject_irq;
1074dbf9316SMaxime Coquelin };
1084dbf9316SMaxime Coquelin 
1094dbf9316SMaxime Coquelin /**
11099a2dd95SBruce Richardson  * Structure contains buffer address, length and descriptor index
11199a2dd95SBruce Richardson  * from vring to do scatter RX.
11299a2dd95SBruce Richardson  */
11399a2dd95SBruce Richardson struct buf_vector {
11499a2dd95SBruce Richardson 	uint64_t buf_iova;
11599a2dd95SBruce Richardson 	uint64_t buf_addr;
11699a2dd95SBruce Richardson 	uint32_t buf_len;
11799a2dd95SBruce Richardson 	uint32_t desc_idx;
11899a2dd95SBruce Richardson };
11999a2dd95SBruce Richardson 
12099a2dd95SBruce Richardson /*
12199a2dd95SBruce Richardson  * Structure contains the info for each batched memory copy.
12299a2dd95SBruce Richardson  */
12399a2dd95SBruce Richardson struct batch_copy_elem {
12499a2dd95SBruce Richardson 	void *dst;
12599a2dd95SBruce Richardson 	void *src;
12699a2dd95SBruce Richardson 	uint32_t len;
12799a2dd95SBruce Richardson 	uint64_t log_addr;
12899a2dd95SBruce Richardson };
12999a2dd95SBruce Richardson 
13099a2dd95SBruce Richardson /*
13199a2dd95SBruce Richardson  * Structure that contains the info for batched dirty logging.
13299a2dd95SBruce Richardson  */
13399a2dd95SBruce Richardson struct log_cache_entry {
13499a2dd95SBruce Richardson 	uint32_t offset;
13599a2dd95SBruce Richardson 	unsigned long val;
13699a2dd95SBruce Richardson };
13799a2dd95SBruce Richardson 
13899a2dd95SBruce Richardson struct vring_used_elem_packed {
13999a2dd95SBruce Richardson 	uint16_t id;
14099a2dd95SBruce Richardson 	uint16_t flags;
14199a2dd95SBruce Richardson 	uint32_t len;
14299a2dd95SBruce Richardson 	uint32_t count;
14399a2dd95SBruce Richardson };
14499a2dd95SBruce Richardson 
1455f89c5e1SMaxime Coquelin /**
146be75dc99SMaxime Coquelin  * Virtqueue statistics
147be75dc99SMaxime Coquelin  */
148be75dc99SMaxime Coquelin struct virtqueue_stats {
149be75dc99SMaxime Coquelin 	uint64_t packets;
150be75dc99SMaxime Coquelin 	uint64_t bytes;
151be75dc99SMaxime Coquelin 	uint64_t multicast;
152be75dc99SMaxime Coquelin 	uint64_t broadcast;
153be75dc99SMaxime Coquelin 	/* Size bins in array as RFC 2819, undersized [0], 64 [1], etc */
154be75dc99SMaxime Coquelin 	uint64_t size_bins[8];
1557247b746SMaxime Coquelin 	uint64_t iotlb_hits;
1567247b746SMaxime Coquelin 	uint64_t iotlb_misses;
15769c94e35SMaxime Coquelin 	uint64_t inflight_submitted;
15869c94e35SMaxime Coquelin 	uint64_t inflight_completed;
159458dc624SMaxime Coquelin 	uint64_t mbuf_alloc_failed;
16011c310c8SMaxime Coquelin 	uint64_t guest_notifications_suppressed;
1610f5d1e0cSEelco Chaudron 	/* Counters below are atomic, and should be incremented as such. */
1625147b641STyler Retzlaff 	RTE_ATOMIC(uint64_t) guest_notifications;
1635147b641STyler Retzlaff 	RTE_ATOMIC(uint64_t) guest_notifications_offloaded;
1645147b641STyler Retzlaff 	RTE_ATOMIC(uint64_t) guest_notifications_error;
165be75dc99SMaxime Coquelin };
166be75dc99SMaxime Coquelin 
167be75dc99SMaxime Coquelin /**
16853d3f477SJiayu Hu  * iovec
16953d3f477SJiayu Hu  */
17053d3f477SJiayu Hu struct vhost_iovec {
17153d3f477SJiayu Hu 	void *src_addr;
17253d3f477SJiayu Hu 	void *dst_addr;
17353d3f477SJiayu Hu 	size_t len;
17453d3f477SJiayu Hu };
17553d3f477SJiayu Hu 
17653d3f477SJiayu Hu /**
17753d3f477SJiayu Hu  * iovec iterator
17853d3f477SJiayu Hu  */
17953d3f477SJiayu Hu struct vhost_iov_iter {
18053d3f477SJiayu Hu 	/** pointer to the iovec array */
18153d3f477SJiayu Hu 	struct vhost_iovec *iov;
18253d3f477SJiayu Hu 	/** number of iovec in this iterator */
18353d3f477SJiayu Hu 	unsigned long nr_segs;
18453d3f477SJiayu Hu };
18553d3f477SJiayu Hu 
18653d3f477SJiayu Hu struct async_dma_vchan_info {
18753d3f477SJiayu Hu 	/* circular array to track if packet copy completes */
18853d3f477SJiayu Hu 	bool **pkts_cmpl_flag_addr;
18953d3f477SJiayu Hu 
19053d3f477SJiayu Hu 	/* max elements in 'pkts_cmpl_flag_addr' */
19153d3f477SJiayu Hu 	uint16_t ring_size;
19253d3f477SJiayu Hu 	/* ring index mask for 'pkts_cmpl_flag_addr' */
19353d3f477SJiayu Hu 	uint16_t ring_mask;
19453d3f477SJiayu Hu 
19553d3f477SJiayu Hu 	/**
19653d3f477SJiayu Hu 	 * DMA virtual channel lock. Although it is able to bind DMA
19753d3f477SJiayu Hu 	 * virtual channels to data plane threads, vhost control plane
19853d3f477SJiayu Hu 	 * thread could call data plane functions too, thus causing
19953d3f477SJiayu Hu 	 * DMA device contention.
20053d3f477SJiayu Hu 	 *
20153d3f477SJiayu Hu 	 * For example, in VM exit case, vhost control plane thread needs
20253d3f477SJiayu Hu 	 * to clear in-flight packets before disable vring, but there could
20353d3f477SJiayu Hu 	 * be anotther data plane thread is enqueuing packets to the same
20453d3f477SJiayu Hu 	 * vring with the same DMA virtual channel. As dmadev PMD functions
20553d3f477SJiayu Hu 	 * are lock-free, the control plane and data plane threads could
20653d3f477SJiayu Hu 	 * operate the same DMA virtual channel at the same time.
20753d3f477SJiayu Hu 	 */
20853d3f477SJiayu Hu 	rte_spinlock_t dma_lock;
20953d3f477SJiayu Hu };
21053d3f477SJiayu Hu 
21153d3f477SJiayu Hu struct async_dma_info {
21253d3f477SJiayu Hu 	struct async_dma_vchan_info *vchans;
21353d3f477SJiayu Hu 	/* number of registered virtual channels */
21453d3f477SJiayu Hu 	uint16_t nr_vchans;
21553d3f477SJiayu Hu };
21653d3f477SJiayu Hu 
21753d3f477SJiayu Hu extern struct async_dma_info dma_copy_track[RTE_DMADEV_DEFAULT_MAX];
21853d3f477SJiayu Hu 
21953d3f477SJiayu Hu /**
2205f89c5e1SMaxime Coquelin  * inflight async packet information
2215f89c5e1SMaxime Coquelin  */
2225f89c5e1SMaxime Coquelin struct async_inflight_info {
2235f89c5e1SMaxime Coquelin 	struct rte_mbuf *mbuf;
2245f89c5e1SMaxime Coquelin 	uint16_t descs; /* num of descs inflight */
2255f89c5e1SMaxime Coquelin 	uint16_t nr_buffers; /* num of buffers inflight for packed ring */
226844e113aSXuan Ding 	struct virtio_net_hdr nethdr;
2275f89c5e1SMaxime Coquelin };
2285f89c5e1SMaxime Coquelin 
229ee8024b3SMaxime Coquelin struct vhost_async {
23053d3f477SJiayu Hu 	struct vhost_iov_iter iov_iter[VHOST_MAX_ASYNC_IT];
23153d3f477SJiayu Hu 	struct vhost_iovec iovec[VHOST_MAX_ASYNC_VEC];
232d5d25cfdSMaxime Coquelin 	uint16_t iter_idx;
233d5d25cfdSMaxime Coquelin 	uint16_t iovec_idx;
234ee8024b3SMaxime Coquelin 
235ee8024b3SMaxime Coquelin 	/* data transfer status */
236ee8024b3SMaxime Coquelin 	struct async_inflight_info *pkts_info;
23753d3f477SJiayu Hu 	/**
23853d3f477SJiayu Hu 	 * Packet reorder array. "true" indicates that DMA device
23953d3f477SJiayu Hu 	 * completes all copies for the packet.
24053d3f477SJiayu Hu 	 *
24153d3f477SJiayu Hu 	 * Note that this array could be written by multiple threads
24253d3f477SJiayu Hu 	 * simultaneously. For example, in the case of thread0 and
24353d3f477SJiayu Hu 	 * thread1 RX packets from NIC and then enqueue packets to
24453d3f477SJiayu Hu 	 * vring0 and vring1 with own DMA device DMA0 and DMA1, it's
24553d3f477SJiayu Hu 	 * possible for thread0 to get completed copies belonging to
24653d3f477SJiayu Hu 	 * vring1 from DMA0, while thread0 is calling rte_vhost_poll
24753d3f477SJiayu Hu 	 * _enqueue_completed() for vring0 and thread1 is calling
24853d3f477SJiayu Hu 	 * rte_vhost_submit_enqueue_burst() for vring1. In this case,
24953d3f477SJiayu Hu 	 * vq->access_lock cannot protect pkts_cmpl_flag of vring1.
25053d3f477SJiayu Hu 	 *
25153d3f477SJiayu Hu 	 * However, since offloading is per-packet basis, each packet
25253d3f477SJiayu Hu 	 * flag will only be written by one thread. And single byte
25353d3f477SJiayu Hu 	 * write is atomic, so no lock for pkts_cmpl_flag is needed.
25453d3f477SJiayu Hu 	 */
25553d3f477SJiayu Hu 	bool *pkts_cmpl_flag;
256ee8024b3SMaxime Coquelin 	uint16_t pkts_idx;
257ee8024b3SMaxime Coquelin 	uint16_t pkts_inflight_n;
258ee8024b3SMaxime Coquelin 	union {
259ee8024b3SMaxime Coquelin 		struct vring_used_elem  *descs_split;
260ee8024b3SMaxime Coquelin 		struct vring_used_elem_packed *buffers_packed;
261ee8024b3SMaxime Coquelin 	};
262ee8024b3SMaxime Coquelin 	union {
263ee8024b3SMaxime Coquelin 		uint16_t desc_idx_split;
264ee8024b3SMaxime Coquelin 		uint16_t buffer_idx_packed;
265ee8024b3SMaxime Coquelin 	};
266ee8024b3SMaxime Coquelin 	union {
267ee8024b3SMaxime Coquelin 		uint16_t last_desc_idx_split;
268ee8024b3SMaxime Coquelin 		uint16_t last_buffer_idx_packed;
269ee8024b3SMaxime Coquelin 	};
270ee8024b3SMaxime Coquelin };
271ee8024b3SMaxime Coquelin 
272*15677ca2SMaxime Coquelin #define VHOST_RECONNECT_VERSION		0x0
273*15677ca2SMaxime Coquelin #define VHOST_MAX_VRING			0x100
274*15677ca2SMaxime Coquelin #define VHOST_MAX_QUEUE_PAIRS		0x80
275*15677ca2SMaxime Coquelin 
276*15677ca2SMaxime Coquelin struct __rte_cache_aligned vhost_reconnect_vring {
277*15677ca2SMaxime Coquelin 	uint16_t last_avail_idx;
278*15677ca2SMaxime Coquelin 	bool avail_wrap_counter;
279*15677ca2SMaxime Coquelin };
280*15677ca2SMaxime Coquelin 
281*15677ca2SMaxime Coquelin struct vhost_reconnect_data {
282*15677ca2SMaxime Coquelin 	uint32_t version;
283*15677ca2SMaxime Coquelin 	uint64_t features;
284*15677ca2SMaxime Coquelin 	uint8_t status;
285*15677ca2SMaxime Coquelin 	struct virtio_net_config config;
286*15677ca2SMaxime Coquelin 	uint32_t nr_vrings;
287*15677ca2SMaxime Coquelin 	struct vhost_reconnect_vring vring[VHOST_MAX_VRING];
288*15677ca2SMaxime Coquelin };
289*15677ca2SMaxime Coquelin 
29099a2dd95SBruce Richardson /**
29199a2dd95SBruce Richardson  * Structure contains variables relevant to RX/TX virtqueues.
29299a2dd95SBruce Richardson  */
293c6552d9aSTyler Retzlaff struct __rte_cache_aligned vhost_virtqueue {
29499a2dd95SBruce Richardson 	union {
29599a2dd95SBruce Richardson 		struct vring_desc	*desc;
29699a2dd95SBruce Richardson 		struct vring_packed_desc   *desc_packed;
29799a2dd95SBruce Richardson 	};
29899a2dd95SBruce Richardson 	union {
29999a2dd95SBruce Richardson 		struct vring_avail	*avail;
30099a2dd95SBruce Richardson 		struct vring_packed_desc_event *driver_event;
30199a2dd95SBruce Richardson 	};
30299a2dd95SBruce Richardson 	union {
30399a2dd95SBruce Richardson 		struct vring_used	*used;
30499a2dd95SBruce Richardson 		struct vring_packed_desc_event *device_event;
30599a2dd95SBruce Richardson 	};
30699a2dd95SBruce Richardson 	uint16_t		size;
30799a2dd95SBruce Richardson 
30899a2dd95SBruce Richardson 	uint16_t		last_avail_idx;
30999a2dd95SBruce Richardson 	uint16_t		last_used_idx;
31099a2dd95SBruce Richardson 	/* Last used index we notify to front end. */
31199a2dd95SBruce Richardson 	uint16_t		signalled_used;
31299a2dd95SBruce Richardson 	bool			signalled_used_valid;
31399a2dd95SBruce Richardson #define VIRTIO_INVALID_EVENTFD		(-1)
31499a2dd95SBruce Richardson #define VIRTIO_UNINITIALIZED_EVENTFD	(-2)
31599a2dd95SBruce Richardson 
31699a2dd95SBruce Richardson 	bool			enabled;
317741dc052SDavid Marchand 	/* Protected by vq->access_lock */
318741dc052SDavid Marchand 	bool			access_ok __rte_guarded_var;
31999a2dd95SBruce Richardson 	bool			ready;
32099a2dd95SBruce Richardson 
32103f77d66SEelco Chaudron 	rte_rwlock_t		access_lock;
32299a2dd95SBruce Richardson 
32399a2dd95SBruce Richardson 
32499a2dd95SBruce Richardson 	union {
32599a2dd95SBruce Richardson 		struct vring_used_elem  *shadow_used_split;
32699a2dd95SBruce Richardson 		struct vring_used_elem_packed *shadow_used_packed;
32799a2dd95SBruce Richardson 	};
32899a2dd95SBruce Richardson 	uint16_t                shadow_used_idx;
32999a2dd95SBruce Richardson 	/* Record packed ring enqueue latest desc cache aligned index */
33099a2dd95SBruce Richardson 	uint16_t		shadow_aligned_idx;
33199a2dd95SBruce Richardson 	/* Record packed ring first dequeue desc index */
33299a2dd95SBruce Richardson 	uint16_t		shadow_last_used_idx;
33399a2dd95SBruce Richardson 
33499a2dd95SBruce Richardson 	uint16_t		batch_copy_nb_elems;
33599a2dd95SBruce Richardson 	struct batch_copy_elem	*batch_copy_elems;
336b81c9346SMaxime Coquelin 	int			numa_node;
33799a2dd95SBruce Richardson 	bool			used_wrap_counter;
33899a2dd95SBruce Richardson 	bool			avail_wrap_counter;
33999a2dd95SBruce Richardson 
34099a2dd95SBruce Richardson 	/* Physical address of used ring, for logging */
34199a2dd95SBruce Richardson 	uint16_t		log_cache_nb_elem;
34299a2dd95SBruce Richardson 	uint64_t		log_guest_addr;
34399a2dd95SBruce Richardson 	struct log_cache_entry	*log_cache;
34499a2dd95SBruce Richardson 
34599a2dd95SBruce Richardson 	rte_rwlock_t	iotlb_lock;
34699a2dd95SBruce Richardson 
34799a2dd95SBruce Richardson 	/* Used to notify the guest (trigger interrupt) */
34899a2dd95SBruce Richardson 	int			callfd;
34999a2dd95SBruce Richardson 	/* Currently unused as polling mode is enabled */
35099a2dd95SBruce Richardson 	int			kickfd;
35199a2dd95SBruce Richardson 
35257e414e3SDavid Marchand 	/* Index of this vq in dev->virtqueue[] */
35357e414e3SDavid Marchand 	uint32_t		index;
35457e414e3SDavid Marchand 
35599a2dd95SBruce Richardson 	/* inflight share memory info */
35699a2dd95SBruce Richardson 	union {
35799a2dd95SBruce Richardson 		struct rte_vhost_inflight_info_split *inflight_split;
35899a2dd95SBruce Richardson 		struct rte_vhost_inflight_info_packed *inflight_packed;
35999a2dd95SBruce Richardson 	};
36099a2dd95SBruce Richardson 	struct rte_vhost_resubmit_info *resubmit_inflight;
36199a2dd95SBruce Richardson 	uint64_t		global_counter;
36299a2dd95SBruce Richardson 
3634b02c267SDavid Marchand 	struct vhost_async	*async __rte_guarded_var;
36499a2dd95SBruce Richardson 
36599a2dd95SBruce Richardson 	int			notif_enable;
36699a2dd95SBruce Richardson #define VIRTIO_UNINITIALIZED_NOTIF	(-1)
36799a2dd95SBruce Richardson 
36899a2dd95SBruce Richardson 	struct vhost_vring_addr ring_addrs;
369be75dc99SMaxime Coquelin 	struct virtqueue_stats	stats;
37011c310c8SMaxime Coquelin 
3715147b641STyler Retzlaff 	RTE_ATOMIC(bool) irq_pending;
372*15677ca2SMaxime Coquelin 	struct vhost_reconnect_vring *reconnect_log;
373c6552d9aSTyler Retzlaff };
37499a2dd95SBruce Richardson 
37599a2dd95SBruce Richardson /* Virtio device status as per Virtio specification */
37699a2dd95SBruce Richardson #define VIRTIO_DEVICE_STATUS_RESET		0x00
37799a2dd95SBruce Richardson #define VIRTIO_DEVICE_STATUS_ACK		0x01
37899a2dd95SBruce Richardson #define VIRTIO_DEVICE_STATUS_DRIVER		0x02
37999a2dd95SBruce Richardson #define VIRTIO_DEVICE_STATUS_DRIVER_OK		0x04
38099a2dd95SBruce Richardson #define VIRTIO_DEVICE_STATUS_FEATURES_OK	0x08
38199a2dd95SBruce Richardson #define VIRTIO_DEVICE_STATUS_DEV_NEED_RESET	0x40
38299a2dd95SBruce Richardson #define VIRTIO_DEVICE_STATUS_FAILED		0x80
38399a2dd95SBruce Richardson 
38499a2dd95SBruce Richardson /* Declare IOMMU related bits for older kernels */
38599a2dd95SBruce Richardson #ifndef VIRTIO_F_IOMMU_PLATFORM
38699a2dd95SBruce Richardson 
38799a2dd95SBruce Richardson #define VIRTIO_F_IOMMU_PLATFORM 33
38899a2dd95SBruce Richardson 
38999a2dd95SBruce Richardson struct vhost_iotlb_msg {
39099a2dd95SBruce Richardson 	__u64 iova;
39199a2dd95SBruce Richardson 	__u64 size;
39299a2dd95SBruce Richardson 	__u64 uaddr;
39399a2dd95SBruce Richardson #define VHOST_ACCESS_RO      0x1
39499a2dd95SBruce Richardson #define VHOST_ACCESS_WO      0x2
39599a2dd95SBruce Richardson #define VHOST_ACCESS_RW      0x3
39699a2dd95SBruce Richardson 	__u8 perm;
39799a2dd95SBruce Richardson #define VHOST_IOTLB_MISS           1
39899a2dd95SBruce Richardson #define VHOST_IOTLB_UPDATE         2
39999a2dd95SBruce Richardson #define VHOST_IOTLB_INVALIDATE     3
40099a2dd95SBruce Richardson #define VHOST_IOTLB_ACCESS_FAIL    4
40199a2dd95SBruce Richardson 	__u8 type;
40299a2dd95SBruce Richardson };
40399a2dd95SBruce Richardson 
40499a2dd95SBruce Richardson #define VHOST_IOTLB_MSG 0x1
40599a2dd95SBruce Richardson 
40699a2dd95SBruce Richardson struct vhost_msg {
40799a2dd95SBruce Richardson 	int type;
40899a2dd95SBruce Richardson 	union {
40999a2dd95SBruce Richardson 		struct vhost_iotlb_msg iotlb;
41099a2dd95SBruce Richardson 		__u8 padding[64];
41199a2dd95SBruce Richardson 	};
41299a2dd95SBruce Richardson };
41399a2dd95SBruce Richardson #endif
41499a2dd95SBruce Richardson 
41599a2dd95SBruce Richardson /*
41699a2dd95SBruce Richardson  * Define virtio 1.0 for older kernels
41799a2dd95SBruce Richardson  */
41899a2dd95SBruce Richardson #ifndef VIRTIO_F_VERSION_1
41999a2dd95SBruce Richardson  #define VIRTIO_F_VERSION_1 32
42099a2dd95SBruce Richardson #endif
42199a2dd95SBruce Richardson 
42299a2dd95SBruce Richardson /* Declare packed ring related bits for older kernels */
42399a2dd95SBruce Richardson #ifndef VIRTIO_F_RING_PACKED
42499a2dd95SBruce Richardson 
42599a2dd95SBruce Richardson #define VIRTIO_F_RING_PACKED 34
42699a2dd95SBruce Richardson 
42799a2dd95SBruce Richardson struct vring_packed_desc {
42899a2dd95SBruce Richardson 	uint64_t addr;
42999a2dd95SBruce Richardson 	uint32_t len;
43099a2dd95SBruce Richardson 	uint16_t id;
43199a2dd95SBruce Richardson 	uint16_t flags;
43299a2dd95SBruce Richardson };
43399a2dd95SBruce Richardson 
43499a2dd95SBruce Richardson struct vring_packed_desc_event {
43599a2dd95SBruce Richardson 	uint16_t off_wrap;
43699a2dd95SBruce Richardson 	uint16_t flags;
43799a2dd95SBruce Richardson };
43899a2dd95SBruce Richardson #endif
43999a2dd95SBruce Richardson 
44099a2dd95SBruce Richardson /*
44199a2dd95SBruce Richardson  * Declare below packed ring defines unconditionally
44299a2dd95SBruce Richardson  * as Kernel header might use different names.
44399a2dd95SBruce Richardson  */
44499a2dd95SBruce Richardson #define VRING_DESC_F_AVAIL	(1ULL << 7)
44599a2dd95SBruce Richardson #define VRING_DESC_F_USED	(1ULL << 15)
44699a2dd95SBruce Richardson 
44799a2dd95SBruce Richardson #define VRING_EVENT_F_ENABLE 0x0
44899a2dd95SBruce Richardson #define VRING_EVENT_F_DISABLE 0x1
44999a2dd95SBruce Richardson #define VRING_EVENT_F_DESC 0x2
45099a2dd95SBruce Richardson 
45199a2dd95SBruce Richardson /*
45299a2dd95SBruce Richardson  * Available and used descs are in same order
45399a2dd95SBruce Richardson  */
45499a2dd95SBruce Richardson #ifndef VIRTIO_F_IN_ORDER
45599a2dd95SBruce Richardson #define VIRTIO_F_IN_ORDER      35
45699a2dd95SBruce Richardson #endif
45799a2dd95SBruce Richardson 
45899a2dd95SBruce Richardson /* Features supported by this builtin vhost-user net driver. */
45999a2dd95SBruce Richardson #define VIRTIO_NET_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
46099a2dd95SBruce Richardson 				(1ULL << VIRTIO_F_ANY_LAYOUT) | \
46199a2dd95SBruce Richardson 				(1ULL << VIRTIO_NET_F_CTRL_VQ) | \
46299a2dd95SBruce Richardson 				(1ULL << VIRTIO_NET_F_MQ)      | \
46399a2dd95SBruce Richardson 				(1ULL << VIRTIO_F_VERSION_1)   | \
46499a2dd95SBruce Richardson 				(1ULL << VIRTIO_NET_F_GSO) | \
46599a2dd95SBruce Richardson 				(1ULL << VIRTIO_NET_F_HOST_TSO4) | \
46699a2dd95SBruce Richardson 				(1ULL << VIRTIO_NET_F_HOST_TSO6) | \
46799a2dd95SBruce Richardson 				(1ULL << VIRTIO_NET_F_HOST_UFO) | \
46899a2dd95SBruce Richardson 				(1ULL << VIRTIO_NET_F_HOST_ECN) | \
46999a2dd95SBruce Richardson 				(1ULL << VIRTIO_NET_F_CSUM)    | \
47099a2dd95SBruce Richardson 				(1ULL << VIRTIO_NET_F_GUEST_CSUM) | \
47199a2dd95SBruce Richardson 				(1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
47299a2dd95SBruce Richardson 				(1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
47399a2dd95SBruce Richardson 				(1ULL << VIRTIO_NET_F_GUEST_UFO) | \
47499a2dd95SBruce Richardson 				(1ULL << VIRTIO_NET_F_GUEST_ECN) | \
47599a2dd95SBruce Richardson 				(1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
47699a2dd95SBruce Richardson 				(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
47799a2dd95SBruce Richardson 				(1ULL << VIRTIO_F_IN_ORDER) | \
4784789eb43SMaxime Coquelin 				(1ULL << VIRTIO_F_IOMMU_PLATFORM))
47999a2dd95SBruce Richardson 
48099a2dd95SBruce Richardson 
48199a2dd95SBruce Richardson struct guest_page {
48299a2dd95SBruce Richardson 	uint64_t guest_phys_addr;
4832ec35974SXuan Ding 	uint64_t host_iova;
48452ade97eSXuan Ding 	uint64_t host_user_addr;
48599a2dd95SBruce Richardson 	uint64_t size;
48699a2dd95SBruce Richardson };
48799a2dd95SBruce Richardson 
48899a2dd95SBruce Richardson struct inflight_mem_info {
48999a2dd95SBruce Richardson 	int		fd;
49099a2dd95SBruce Richardson 	void		*addr;
49199a2dd95SBruce Richardson 	uint64_t	size;
49299a2dd95SBruce Richardson };
49399a2dd95SBruce Richardson 
49499a2dd95SBruce Richardson /**
49599a2dd95SBruce Richardson  * Device structure contains all configuration information relating
49699a2dd95SBruce Richardson  * to the device.
49799a2dd95SBruce Richardson  */
498c6552d9aSTyler Retzlaff struct __rte_cache_aligned virtio_net {
49999a2dd95SBruce Richardson 	/* Frontend (QEMU) memory and memory region information */
50099a2dd95SBruce Richardson 	struct rte_vhost_memory	*mem;
50199a2dd95SBruce Richardson 	uint64_t		features;
50299a2dd95SBruce Richardson 	uint64_t		protocol_features;
50399a2dd95SBruce Richardson 	int			vid;
50499a2dd95SBruce Richardson 	uint32_t		flags;
50599a2dd95SBruce Richardson 	uint16_t		vhost_hlen;
50699a2dd95SBruce Richardson 	/* to tell if we need broadcast rarp packet */
5075147b641STyler Retzlaff 	RTE_ATOMIC(int16_t)	broadcast_rarp;
50899a2dd95SBruce Richardson 	uint32_t		nr_vring;
50999a2dd95SBruce Richardson 	int			async_copy;
5107c61fa08SXuan Ding 
51199a2dd95SBruce Richardson 	int			extbuf;
51299a2dd95SBruce Richardson 	int			linearbuf;
51399a2dd95SBruce Richardson 	struct vhost_virtqueue	*virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
514a54f046dSMaxime Coquelin 
515a54f046dSMaxime Coquelin 	rte_rwlock_t	iotlb_pending_lock;
516a54f046dSMaxime Coquelin 	struct vhost_iotlb_entry *iotlb_pool;
517a54f046dSMaxime Coquelin 	TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list;
518a54f046dSMaxime Coquelin 	TAILQ_HEAD(, vhost_iotlb_entry) iotlb_pending_list;
519a54f046dSMaxime Coquelin 	int				iotlb_cache_nr;
520a54f046dSMaxime Coquelin 	rte_spinlock_t	iotlb_free_lock;
521a54f046dSMaxime Coquelin 	SLIST_HEAD(, vhost_iotlb_entry) iotlb_free_list;
522a54f046dSMaxime Coquelin 
52399a2dd95SBruce Richardson 	struct inflight_mem_info *inflight_info;
52499a2dd95SBruce Richardson #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
52599a2dd95SBruce Richardson 	char			ifname[IF_NAME_SZ];
52699a2dd95SBruce Richardson 	uint64_t		log_size;
52799a2dd95SBruce Richardson 	uint64_t		log_base;
52899a2dd95SBruce Richardson 	uint64_t		log_addr;
52999a2dd95SBruce Richardson 	struct rte_ether_addr	mac;
53099a2dd95SBruce Richardson 	uint16_t		mtu;
53199a2dd95SBruce Richardson 	uint8_t			status;
53299a2dd95SBruce Richardson 
533ab4bb424SMaxime Coquelin 	struct rte_vhost_device_ops const *notify_ops;
53499a2dd95SBruce Richardson 
53599a2dd95SBruce Richardson 	uint32_t		nr_guest_pages;
53699a2dd95SBruce Richardson 	uint32_t		max_guest_pages;
53799a2dd95SBruce Richardson 	struct guest_page       *guest_pages;
53899a2dd95SBruce Richardson 
53971998eb6SNobuhiro Miki 	int			backend_req_fd;
54071998eb6SNobuhiro Miki 	rte_spinlock_t		backend_req_lock;
54199a2dd95SBruce Richardson 
54299a2dd95SBruce Richardson 	int			postcopy_ufd;
54399a2dd95SBruce Richardson 	int			postcopy_listening;
5440adb8eccSMaxime Coquelin 	int			vduse_ctrl_fd;
5450adb8eccSMaxime Coquelin 	int			vduse_dev_fd;
54699a2dd95SBruce Richardson 
547474f4d78SMaxime Coquelin 	struct vhost_virtqueue	*cvq;
548474f4d78SMaxime Coquelin 
54999a2dd95SBruce Richardson 	struct rte_vdpa_device *vdpa_dev;
55099a2dd95SBruce Richardson 
55199a2dd95SBruce Richardson 	/* context data for the external message handlers */
55299a2dd95SBruce Richardson 	void			*extern_data;
55399a2dd95SBruce Richardson 	/* pre and post vhost user message handlers for the device */
55499a2dd95SBruce Richardson 	struct rte_vhost_user_extern_ops extern_ops;
5554dbf9316SMaxime Coquelin 
5564dbf9316SMaxime Coquelin 	struct vhost_backend_ops *backend_ops;
557*15677ca2SMaxime Coquelin 
558*15677ca2SMaxime Coquelin 	struct vhost_reconnect_data *reconnect_log;
559c6552d9aSTyler Retzlaff };
56099a2dd95SBruce Richardson 
561*15677ca2SMaxime Coquelin static __rte_always_inline void
562*15677ca2SMaxime Coquelin vhost_virtqueue_reconnect_log_split(struct vhost_virtqueue *vq)
563*15677ca2SMaxime Coquelin {
564*15677ca2SMaxime Coquelin 	if (vq->reconnect_log != NULL)
565*15677ca2SMaxime Coquelin 		vq->reconnect_log->last_avail_idx = vq->last_avail_idx;
566*15677ca2SMaxime Coquelin }
567*15677ca2SMaxime Coquelin 
568*15677ca2SMaxime Coquelin static __rte_always_inline void
569*15677ca2SMaxime Coquelin vhost_virtqueue_reconnect_log_packed(struct vhost_virtqueue *vq)
570*15677ca2SMaxime Coquelin {
571*15677ca2SMaxime Coquelin 	if (vq->reconnect_log != NULL) {
572*15677ca2SMaxime Coquelin 		vq->reconnect_log->last_avail_idx = vq->last_avail_idx;
573*15677ca2SMaxime Coquelin 		vq->reconnect_log->avail_wrap_counter = vq->avail_wrap_counter;
574*15677ca2SMaxime Coquelin 	}
575*15677ca2SMaxime Coquelin }
576*15677ca2SMaxime Coquelin 
57790d6e52bSDavid Marchand static inline void
57890d6e52bSDavid Marchand vq_assert_lock__(struct virtio_net *dev, struct vhost_virtqueue *vq, const char *func)
57990d6e52bSDavid Marchand 	__rte_assert_exclusive_lock(&vq->access_lock)
58090d6e52bSDavid Marchand {
58103f77d66SEelco Chaudron 	if (unlikely(!rte_rwlock_write_is_locked(&vq->access_lock)))
58290d6e52bSDavid Marchand 		rte_panic("VHOST_CONFIG: (%s) %s() called without access lock taken.\n",
58390d6e52bSDavid Marchand 			dev->ifname, func);
58490d6e52bSDavid Marchand }
58590d6e52bSDavid Marchand #define vq_assert_lock(dev, vq) vq_assert_lock__(dev, vq, __func__)
58690d6e52bSDavid Marchand 
58799a2dd95SBruce Richardson static __rte_always_inline bool
58899a2dd95SBruce Richardson vq_is_packed(struct virtio_net *dev)
58999a2dd95SBruce Richardson {
59099a2dd95SBruce Richardson 	return dev->features & (1ull << VIRTIO_F_RING_PACKED);
59199a2dd95SBruce Richardson }
59299a2dd95SBruce Richardson 
59399a2dd95SBruce Richardson static inline bool
59499a2dd95SBruce Richardson desc_is_avail(struct vring_packed_desc *desc, bool wrap_counter)
59599a2dd95SBruce Richardson {
5965147b641STyler Retzlaff 	uint16_t flags = rte_atomic_load_explicit((unsigned short __rte_atomic *)&desc->flags,
5975147b641STyler Retzlaff 		rte_memory_order_acquire);
59899a2dd95SBruce Richardson 
59999a2dd95SBruce Richardson 	return wrap_counter == !!(flags & VRING_DESC_F_AVAIL) &&
60099a2dd95SBruce Richardson 		wrap_counter != !!(flags & VRING_DESC_F_USED);
60199a2dd95SBruce Richardson }
60299a2dd95SBruce Richardson 
60399a2dd95SBruce Richardson static inline void
60499a2dd95SBruce Richardson vq_inc_last_used_packed(struct vhost_virtqueue *vq, uint16_t num)
60599a2dd95SBruce Richardson {
60699a2dd95SBruce Richardson 	vq->last_used_idx += num;
60799a2dd95SBruce Richardson 	if (vq->last_used_idx >= vq->size) {
60899a2dd95SBruce Richardson 		vq->used_wrap_counter ^= 1;
60999a2dd95SBruce Richardson 		vq->last_used_idx -= vq->size;
61099a2dd95SBruce Richardson 	}
61199a2dd95SBruce Richardson }
61299a2dd95SBruce Richardson 
61399a2dd95SBruce Richardson static inline void
61499a2dd95SBruce Richardson vq_inc_last_avail_packed(struct vhost_virtqueue *vq, uint16_t num)
61599a2dd95SBruce Richardson {
61699a2dd95SBruce Richardson 	vq->last_avail_idx += num;
61799a2dd95SBruce Richardson 	if (vq->last_avail_idx >= vq->size) {
61899a2dd95SBruce Richardson 		vq->avail_wrap_counter ^= 1;
61999a2dd95SBruce Richardson 		vq->last_avail_idx -= vq->size;
62099a2dd95SBruce Richardson 	}
621*15677ca2SMaxime Coquelin 	vhost_virtqueue_reconnect_log_packed(vq);
62299a2dd95SBruce Richardson }
62399a2dd95SBruce Richardson 
62499a2dd95SBruce Richardson void __vhost_log_cache_write(struct virtio_net *dev,
62599a2dd95SBruce Richardson 		struct vhost_virtqueue *vq,
62699a2dd95SBruce Richardson 		uint64_t addr, uint64_t len);
62799a2dd95SBruce Richardson void __vhost_log_cache_write_iova(struct virtio_net *dev,
62899a2dd95SBruce Richardson 		struct vhost_virtqueue *vq,
629bf42fb30SDavid Marchand 		uint64_t iova, uint64_t len)
630bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock);
63199a2dd95SBruce Richardson void __vhost_log_cache_sync(struct virtio_net *dev,
63299a2dd95SBruce Richardson 		struct vhost_virtqueue *vq);
633bf42fb30SDavid Marchand 
63499a2dd95SBruce Richardson void __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len);
63599a2dd95SBruce Richardson void __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
636bf42fb30SDavid Marchand 			    uint64_t iova, uint64_t len)
637bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock);
63899a2dd95SBruce Richardson 
63999a2dd95SBruce Richardson static __rte_always_inline void
64099a2dd95SBruce Richardson vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
64199a2dd95SBruce Richardson {
64299a2dd95SBruce Richardson 	if (unlikely(dev->features & (1ULL << VHOST_F_LOG_ALL)))
64399a2dd95SBruce Richardson 		__vhost_log_write(dev, addr, len);
64499a2dd95SBruce Richardson }
64599a2dd95SBruce Richardson 
64699a2dd95SBruce Richardson static __rte_always_inline void
64799a2dd95SBruce Richardson vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
64899a2dd95SBruce Richardson {
64999a2dd95SBruce Richardson 	if (unlikely(dev->features & (1ULL << VHOST_F_LOG_ALL)))
65099a2dd95SBruce Richardson 		__vhost_log_cache_sync(dev, vq);
65199a2dd95SBruce Richardson }
65299a2dd95SBruce Richardson 
65399a2dd95SBruce Richardson static __rte_always_inline void
65499a2dd95SBruce Richardson vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
65599a2dd95SBruce Richardson 			uint64_t addr, uint64_t len)
65699a2dd95SBruce Richardson {
65799a2dd95SBruce Richardson 	if (unlikely(dev->features & (1ULL << VHOST_F_LOG_ALL)))
65899a2dd95SBruce Richardson 		__vhost_log_cache_write(dev, vq, addr, len);
65999a2dd95SBruce Richardson }
66099a2dd95SBruce Richardson 
66199a2dd95SBruce Richardson static __rte_always_inline void
66299a2dd95SBruce Richardson vhost_log_cache_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
66399a2dd95SBruce Richardson 			uint64_t offset, uint64_t len)
66499a2dd95SBruce Richardson {
66599a2dd95SBruce Richardson 	if (unlikely(dev->features & (1ULL << VHOST_F_LOG_ALL))) {
66699a2dd95SBruce Richardson 		if (unlikely(vq->log_guest_addr == 0))
66799a2dd95SBruce Richardson 			return;
66899a2dd95SBruce Richardson 		__vhost_log_cache_write(dev, vq, vq->log_guest_addr + offset,
66999a2dd95SBruce Richardson 					len);
67099a2dd95SBruce Richardson 	}
67199a2dd95SBruce Richardson }
67299a2dd95SBruce Richardson 
67399a2dd95SBruce Richardson static __rte_always_inline void
67499a2dd95SBruce Richardson vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
67599a2dd95SBruce Richardson 		     uint64_t offset, uint64_t len)
67699a2dd95SBruce Richardson {
67799a2dd95SBruce Richardson 	if (unlikely(dev->features & (1ULL << VHOST_F_LOG_ALL))) {
67899a2dd95SBruce Richardson 		if (unlikely(vq->log_guest_addr == 0))
67999a2dd95SBruce Richardson 			return;
68099a2dd95SBruce Richardson 		__vhost_log_write(dev, vq->log_guest_addr + offset, len);
68199a2dd95SBruce Richardson 	}
68299a2dd95SBruce Richardson }
68399a2dd95SBruce Richardson 
68499a2dd95SBruce Richardson static __rte_always_inline void
68599a2dd95SBruce Richardson vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
68699a2dd95SBruce Richardson 			   uint64_t iova, uint64_t len)
687bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
68899a2dd95SBruce Richardson {
68999a2dd95SBruce Richardson 	if (likely(!(dev->features & (1ULL << VHOST_F_LOG_ALL))))
69099a2dd95SBruce Richardson 		return;
69199a2dd95SBruce Richardson 
69299a2dd95SBruce Richardson 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
69399a2dd95SBruce Richardson 		__vhost_log_cache_write_iova(dev, vq, iova, len);
69499a2dd95SBruce Richardson 	else
69599a2dd95SBruce Richardson 		__vhost_log_cache_write(dev, vq, iova, len);
69699a2dd95SBruce Richardson }
69799a2dd95SBruce Richardson 
69899a2dd95SBruce Richardson static __rte_always_inline void
69999a2dd95SBruce Richardson vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
70099a2dd95SBruce Richardson 			   uint64_t iova, uint64_t len)
701bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
70299a2dd95SBruce Richardson {
70399a2dd95SBruce Richardson 	if (likely(!(dev->features & (1ULL << VHOST_F_LOG_ALL))))
70499a2dd95SBruce Richardson 		return;
70599a2dd95SBruce Richardson 
70699a2dd95SBruce Richardson 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
70799a2dd95SBruce Richardson 		__vhost_log_write_iova(dev, vq, iova, len);
70899a2dd95SBruce Richardson 	else
70999a2dd95SBruce Richardson 		__vhost_log_write(dev, iova, len);
71099a2dd95SBruce Richardson }
71199a2dd95SBruce Richardson 
71299a2dd95SBruce Richardson extern int vhost_config_log_level;
7130e21c7c0SDavid Marchand #define RTE_LOGTYPE_VHOST_CONFIG vhost_config_log_level
71499a2dd95SBruce Richardson extern int vhost_data_log_level;
7150e21c7c0SDavid Marchand #define RTE_LOGTYPE_VHOST_DATA vhost_data_log_level
71699a2dd95SBruce Richardson 
7170f1dc8cbSTyler Retzlaff #define VHOST_CONFIG_LOG(prefix, level, ...) \
7180f1dc8cbSTyler Retzlaff 	RTE_LOG_LINE_PREFIX(level, VHOST_CONFIG, "(%s) ", prefix, __VA_ARGS__)
71999a2dd95SBruce Richardson 
7200f1dc8cbSTyler Retzlaff #define VHOST_DATA_LOG(prefix, level, ...) \
7210f1dc8cbSTyler Retzlaff 	RTE_LOG_DP_LINE_PREFIX(level, VHOST_DATA, "(%s) ", prefix, __VA_ARGS__)
72299a2dd95SBruce Richardson 
72399a2dd95SBruce Richardson #ifdef RTE_LIBRTE_VHOST_DEBUG
72499a2dd95SBruce Richardson #define VHOST_MAX_PRINT_BUFF 6072
72599a2dd95SBruce Richardson #define PRINT_PACKET(device, addr, size, header) do { \
72699a2dd95SBruce Richardson 	char *pkt_addr = (char *)(addr); \
72799a2dd95SBruce Richardson 	unsigned int index; \
72899a2dd95SBruce Richardson 	char packet[VHOST_MAX_PRINT_BUFF]; \
72999a2dd95SBruce Richardson 	\
73099a2dd95SBruce Richardson 	if ((header)) \
73199a2dd95SBruce Richardson 		snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%d) Header size %d: ", (device->vid), (size)); \
73299a2dd95SBruce Richardson 	else \
73399a2dd95SBruce Richardson 		snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%d) Packet size %d: ", (device->vid), (size)); \
73499a2dd95SBruce Richardson 	for (index = 0; index < (size); index++) { \
73599a2dd95SBruce Richardson 		snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \
73699a2dd95SBruce Richardson 			"%02hhx ", pkt_addr[index]); \
73799a2dd95SBruce Richardson 	} \
73899a2dd95SBruce Richardson 	snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \
73999a2dd95SBruce Richardson 	\
74065b078dcSDavid Marchand 	RTE_LOG_DP(DEBUG, VHOST_DATA, "(%s) %s", dev->ifname, packet); \
74199a2dd95SBruce Richardson } while (0)
74299a2dd95SBruce Richardson #else
74399a2dd95SBruce Richardson #define PRINT_PACKET(device, addr, size, header) do {} while (0)
74499a2dd95SBruce Richardson #endif
74599a2dd95SBruce Richardson 
74653d3f477SJiayu Hu extern struct virtio_net *vhost_devices[RTE_MAX_VHOST_DEVICE];
74799a2dd95SBruce Richardson 
74899a2dd95SBruce Richardson #define VHOST_BINARY_SEARCH_THRESH 256
74999a2dd95SBruce Richardson 
75099a2dd95SBruce Richardson static __rte_always_inline int guest_page_addrcmp(const void *p1,
75199a2dd95SBruce Richardson 						const void *p2)
75299a2dd95SBruce Richardson {
75399a2dd95SBruce Richardson 	const struct guest_page *page1 = (const struct guest_page *)p1;
75499a2dd95SBruce Richardson 	const struct guest_page *page2 = (const struct guest_page *)p2;
75599a2dd95SBruce Richardson 
75699a2dd95SBruce Richardson 	if (page1->guest_phys_addr > page2->guest_phys_addr)
75799a2dd95SBruce Richardson 		return 1;
75899a2dd95SBruce Richardson 	if (page1->guest_phys_addr < page2->guest_phys_addr)
75999a2dd95SBruce Richardson 		return -1;
76099a2dd95SBruce Richardson 
76199a2dd95SBruce Richardson 	return 0;
76299a2dd95SBruce Richardson }
76399a2dd95SBruce Richardson 
7648b764f46SYuan Wang static __rte_always_inline int guest_page_rangecmp(const void *p1, const void *p2)
7658b764f46SYuan Wang {
7668b764f46SYuan Wang 	const struct guest_page *page1 = (const struct guest_page *)p1;
7678b764f46SYuan Wang 	const struct guest_page *page2 = (const struct guest_page *)p2;
7688b764f46SYuan Wang 
7698b764f46SYuan Wang 	if (page1->guest_phys_addr >= page2->guest_phys_addr) {
7708b764f46SYuan Wang 		if (page1->guest_phys_addr < page2->guest_phys_addr + page2->size)
7718b764f46SYuan Wang 			return 0;
7728b764f46SYuan Wang 		else
7738b764f46SYuan Wang 			return 1;
7748b764f46SYuan Wang 	} else
7758b764f46SYuan Wang 		return -1;
7768b764f46SYuan Wang }
7778b764f46SYuan Wang 
77899a2dd95SBruce Richardson static __rte_always_inline rte_iova_t
77999a2dd95SBruce Richardson gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa,
78099a2dd95SBruce Richardson 	uint64_t gpa_size, uint64_t *hpa_size)
78199a2dd95SBruce Richardson {
78299a2dd95SBruce Richardson 	uint32_t i;
78399a2dd95SBruce Richardson 	struct guest_page *page;
78499a2dd95SBruce Richardson 	struct guest_page key;
78599a2dd95SBruce Richardson 
78699a2dd95SBruce Richardson 	*hpa_size = gpa_size;
78799a2dd95SBruce Richardson 	if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {
7888b764f46SYuan Wang 		key.guest_phys_addr = gpa;
78999a2dd95SBruce Richardson 		page = bsearch(&key, dev->guest_pages, dev->nr_guest_pages,
7908b764f46SYuan Wang 			       sizeof(struct guest_page), guest_page_rangecmp);
79199a2dd95SBruce Richardson 		if (page) {
79299a2dd95SBruce Richardson 			if (gpa + gpa_size <=
79399a2dd95SBruce Richardson 					page->guest_phys_addr + page->size) {
79499a2dd95SBruce Richardson 				return gpa - page->guest_phys_addr +
7952ec35974SXuan Ding 					page->host_iova;
79699a2dd95SBruce Richardson 			} else if (gpa < page->guest_phys_addr +
79799a2dd95SBruce Richardson 						page->size) {
79899a2dd95SBruce Richardson 				*hpa_size = page->guest_phys_addr +
79999a2dd95SBruce Richardson 					page->size - gpa;
80099a2dd95SBruce Richardson 				return gpa - page->guest_phys_addr +
8012ec35974SXuan Ding 					page->host_iova;
80299a2dd95SBruce Richardson 			}
80399a2dd95SBruce Richardson 		}
80499a2dd95SBruce Richardson 	} else {
80599a2dd95SBruce Richardson 		for (i = 0; i < dev->nr_guest_pages; i++) {
80699a2dd95SBruce Richardson 			page = &dev->guest_pages[i];
80799a2dd95SBruce Richardson 
80899a2dd95SBruce Richardson 			if (gpa >= page->guest_phys_addr) {
80999a2dd95SBruce Richardson 				if (gpa + gpa_size <=
81099a2dd95SBruce Richardson 					page->guest_phys_addr + page->size) {
81199a2dd95SBruce Richardson 					return gpa - page->guest_phys_addr +
8122ec35974SXuan Ding 						page->host_iova;
81399a2dd95SBruce Richardson 				} else if (gpa < page->guest_phys_addr +
81499a2dd95SBruce Richardson 							page->size) {
81599a2dd95SBruce Richardson 					*hpa_size = page->guest_phys_addr +
81699a2dd95SBruce Richardson 						page->size - gpa;
81799a2dd95SBruce Richardson 					return gpa - page->guest_phys_addr +
8182ec35974SXuan Ding 						page->host_iova;
81999a2dd95SBruce Richardson 				}
82099a2dd95SBruce Richardson 			}
82199a2dd95SBruce Richardson 		}
82299a2dd95SBruce Richardson 	}
82399a2dd95SBruce Richardson 
82499a2dd95SBruce Richardson 	*hpa_size = 0;
82599a2dd95SBruce Richardson 	return 0;
82699a2dd95SBruce Richardson }
82799a2dd95SBruce Richardson 
82899a2dd95SBruce Richardson /* Convert guest physical address to host physical address */
82999a2dd95SBruce Richardson static __rte_always_inline rte_iova_t
83099a2dd95SBruce Richardson gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
83199a2dd95SBruce Richardson {
83299a2dd95SBruce Richardson 	rte_iova_t hpa;
83399a2dd95SBruce Richardson 	uint64_t hpa_size;
83499a2dd95SBruce Richardson 
83599a2dd95SBruce Richardson 	hpa = gpa_to_first_hpa(dev, gpa, size, &hpa_size);
83699a2dd95SBruce Richardson 	return hpa_size == size ? hpa : 0;
83799a2dd95SBruce Richardson }
83899a2dd95SBruce Richardson 
83999a2dd95SBruce Richardson static __rte_always_inline uint64_t
84099a2dd95SBruce Richardson hva_to_gpa(struct virtio_net *dev, uint64_t vva, uint64_t len)
84199a2dd95SBruce Richardson {
84299a2dd95SBruce Richardson 	struct rte_vhost_mem_region *r;
84399a2dd95SBruce Richardson 	uint32_t i;
84499a2dd95SBruce Richardson 
84599a2dd95SBruce Richardson 	if (unlikely(!dev || !dev->mem))
84699a2dd95SBruce Richardson 		return 0;
84799a2dd95SBruce Richardson 
84899a2dd95SBruce Richardson 	for (i = 0; i < dev->mem->nregions; i++) {
84999a2dd95SBruce Richardson 		r = &dev->mem->regions[i];
85099a2dd95SBruce Richardson 
85199a2dd95SBruce Richardson 		if (vva >= r->host_user_addr &&
85299a2dd95SBruce Richardson 		    vva + len <  r->host_user_addr + r->size) {
85399a2dd95SBruce Richardson 			return r->guest_phys_addr + vva - r->host_user_addr;
85499a2dd95SBruce Richardson 		}
85599a2dd95SBruce Richardson 	}
85699a2dd95SBruce Richardson 	return 0;
85799a2dd95SBruce Richardson }
85899a2dd95SBruce Richardson 
85999a2dd95SBruce Richardson static __rte_always_inline struct virtio_net *
86099a2dd95SBruce Richardson get_device(int vid)
86199a2dd95SBruce Richardson {
8621c80a404SDavid Marchand 	struct virtio_net *dev = NULL;
8631c80a404SDavid Marchand 
8641c80a404SDavid Marchand 	if (likely(vid >= 0 && vid < RTE_MAX_VHOST_DEVICE))
8651c80a404SDavid Marchand 		dev = vhost_devices[vid];
86699a2dd95SBruce Richardson 
86799a2dd95SBruce Richardson 	if (unlikely(!dev)) {
8680e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG("device", ERR, "(%d) device not found.", vid);
86999a2dd95SBruce Richardson 	}
87099a2dd95SBruce Richardson 
87199a2dd95SBruce Richardson 	return dev;
87299a2dd95SBruce Richardson }
87399a2dd95SBruce Richardson 
8744dbf9316SMaxime Coquelin int vhost_new_device(struct vhost_backend_ops *ops);
87599a2dd95SBruce Richardson void cleanup_device(struct virtio_net *dev, int destroy);
87699a2dd95SBruce Richardson void reset_device(struct virtio_net *dev);
87799a2dd95SBruce Richardson void vhost_destroy_device(int);
87899a2dd95SBruce Richardson void vhost_destroy_device_notify(struct virtio_net *dev);
87999a2dd95SBruce Richardson 
88099a2dd95SBruce Richardson void cleanup_vq(struct vhost_virtqueue *vq, int destroy);
88199a2dd95SBruce Richardson void cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq);
88299a2dd95SBruce Richardson void free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq);
88399a2dd95SBruce Richardson 
88499a2dd95SBruce Richardson int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx);
88599a2dd95SBruce Richardson 
88699a2dd95SBruce Richardson void vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *dev);
88799a2dd95SBruce Richardson 
88899a2dd95SBruce Richardson void vhost_set_ifname(int, const char *if_name, unsigned int if_len);
8891a44f67aSDavid Marchand void vhost_setup_virtio_net(int vid, bool enable, bool legacy_ol_flags, bool stats_enabled,
8901a44f67aSDavid Marchand 	bool support_iommu);
89199a2dd95SBruce Richardson void vhost_enable_extbuf(int vid);
89299a2dd95SBruce Richardson void vhost_enable_linearbuf(int vid);
89399a2dd95SBruce Richardson int vhost_enable_guest_notification(struct virtio_net *dev,
89499a2dd95SBruce Richardson 		struct vhost_virtqueue *vq, int enable);
89599a2dd95SBruce Richardson 
896ab4bb424SMaxime Coquelin struct rte_vhost_device_ops const *vhost_driver_callback_get(const char *path);
89799a2dd95SBruce Richardson 
89899a2dd95SBruce Richardson /*
89999a2dd95SBruce Richardson  * Backend-specific cleanup.
90099a2dd95SBruce Richardson  *
90199a2dd95SBruce Richardson  * TODO: fix it; we have one backend now
90299a2dd95SBruce Richardson  */
90399a2dd95SBruce Richardson void vhost_backend_cleanup(struct virtio_net *dev);
90499a2dd95SBruce Richardson 
90599a2dd95SBruce Richardson uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
906bf42fb30SDavid Marchand 			uint64_t iova, uint64_t *len, uint8_t perm)
907bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock);
90899a2dd95SBruce Richardson void *vhost_alloc_copy_ind_table(struct virtio_net *dev,
90999a2dd95SBruce Richardson 			struct vhost_virtqueue *vq,
910bf42fb30SDavid Marchand 			uint64_t desc_addr, uint64_t desc_len)
911bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock);
912bf42fb30SDavid Marchand int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
913741dc052SDavid Marchand 	__rte_exclusive_locks_required(&vq->access_lock)
914bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock);
91599a2dd95SBruce Richardson uint64_t translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
916bf42fb30SDavid Marchand 		uint64_t log_addr)
917bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock);
918741dc052SDavid Marchand void vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
919741dc052SDavid Marchand 	__rte_exclusive_locks_required(&vq->access_lock);
92099a2dd95SBruce Richardson 
92199a2dd95SBruce Richardson static __rte_always_inline uint64_t
92299a2dd95SBruce Richardson vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
92399a2dd95SBruce Richardson 			uint64_t iova, uint64_t *len, uint8_t perm)
924bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
92599a2dd95SBruce Richardson {
92699a2dd95SBruce Richardson 	if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
92799a2dd95SBruce Richardson 		return rte_vhost_va_from_guest_pa(dev->mem, iova, len);
92899a2dd95SBruce Richardson 
92999a2dd95SBruce Richardson 	return __vhost_iova_to_vva(dev, vq, iova, len, perm);
93099a2dd95SBruce Richardson }
93199a2dd95SBruce Richardson 
93299a2dd95SBruce Richardson #define vhost_avail_event(vr) \
93399a2dd95SBruce Richardson 	(*(volatile uint16_t*)&(vr)->used->ring[(vr)->size])
93499a2dd95SBruce Richardson #define vhost_used_event(vr) \
93599a2dd95SBruce Richardson 	(*(volatile uint16_t*)&(vr)->avail->ring[(vr)->size])
93699a2dd95SBruce Richardson 
93799a2dd95SBruce Richardson /*
93899a2dd95SBruce Richardson  * The following is used with VIRTIO_RING_F_EVENT_IDX.
93999a2dd95SBruce Richardson  * Assuming a given event_idx value from the other size, if we have
94099a2dd95SBruce Richardson  * just incremented index from old to new_idx, should we trigger an
94199a2dd95SBruce Richardson  * event?
94299a2dd95SBruce Richardson  */
94399a2dd95SBruce Richardson static __rte_always_inline int
94499a2dd95SBruce Richardson vhost_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
94599a2dd95SBruce Richardson {
94699a2dd95SBruce Richardson 	return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old);
94799a2dd95SBruce Richardson }
94899a2dd95SBruce Richardson 
94999a2dd95SBruce Richardson static __rte_always_inline void
950d761d455SEelco Chaudron vhost_vring_inject_irq(struct virtio_net *dev, struct vhost_virtqueue *vq)
951d761d455SEelco Chaudron {
95211c310c8SMaxime Coquelin 	bool expected = false;
95311c310c8SMaxime Coquelin 
95411c310c8SMaxime Coquelin 	if (dev->notify_ops->guest_notify) {
9555147b641STyler Retzlaff 		if (rte_atomic_compare_exchange_strong_explicit(&vq->irq_pending, &expected, true,
9565147b641STyler Retzlaff 				  rte_memory_order_release, rte_memory_order_relaxed)) {
95711c310c8SMaxime Coquelin 			if (dev->notify_ops->guest_notify(dev->vid, vq->index)) {
958d761d455SEelco Chaudron 				if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
9595147b641STyler Retzlaff 					rte_atomic_fetch_add_explicit(
9605147b641STyler Retzlaff 						&vq->stats.guest_notifications_offloaded,
9615147b641STyler Retzlaff 						1, rte_memory_order_relaxed);
962d761d455SEelco Chaudron 				return;
963d761d455SEelco Chaudron 			}
964d761d455SEelco Chaudron 
96511c310c8SMaxime Coquelin 			/* Offloading failed, fallback to direct IRQ injection */
9665147b641STyler Retzlaff 			rte_atomic_store_explicit(&vq->irq_pending, false,
9675147b641STyler Retzlaff 				rte_memory_order_release);
96811c310c8SMaxime Coquelin 		} else {
96911c310c8SMaxime Coquelin 			vq->stats.guest_notifications_suppressed++;
97011c310c8SMaxime Coquelin 			return;
97111c310c8SMaxime Coquelin 		}
97211c310c8SMaxime Coquelin 	}
97311c310c8SMaxime Coquelin 
974a5dd9842SMaxime Coquelin 	if (dev->backend_ops->inject_irq(dev, vq)) {
975d761d455SEelco Chaudron 		if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
9765147b641STyler Retzlaff 			rte_atomic_fetch_add_explicit(&vq->stats.guest_notifications_error,
9775147b641STyler Retzlaff 				1, rte_memory_order_relaxed);
978d761d455SEelco Chaudron 		return;
979d761d455SEelco Chaudron 	}
980d761d455SEelco Chaudron 
981d761d455SEelco Chaudron 	if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
9825147b641STyler Retzlaff 		rte_atomic_fetch_add_explicit(&vq->stats.guest_notifications,
9835147b641STyler Retzlaff 			1, rte_memory_order_relaxed);
984d761d455SEelco Chaudron 	if (dev->notify_ops->guest_notified)
985d761d455SEelco Chaudron 		dev->notify_ops->guest_notified(dev->vid);
986d761d455SEelco Chaudron }
987d761d455SEelco Chaudron 
988d761d455SEelco Chaudron static __rte_always_inline void
98999a2dd95SBruce Richardson vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
99099a2dd95SBruce Richardson {
99199a2dd95SBruce Richardson 	/* Flush used->idx update before we read avail->flags. */
9925147b641STyler Retzlaff 	rte_atomic_thread_fence(rte_memory_order_seq_cst);
99399a2dd95SBruce Richardson 
99499a2dd95SBruce Richardson 	/* Don't kick guest if we don't reach index specified by guest. */
99599a2dd95SBruce Richardson 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
99699a2dd95SBruce Richardson 		uint16_t old = vq->signalled_used;
99799a2dd95SBruce Richardson 		uint16_t new = vq->last_used_idx;
99899a2dd95SBruce Richardson 		bool signalled_used_valid = vq->signalled_used_valid;
99999a2dd95SBruce Richardson 
100099a2dd95SBruce Richardson 		vq->signalled_used = new;
100199a2dd95SBruce Richardson 		vq->signalled_used_valid = true;
100299a2dd95SBruce Richardson 
10030e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, DEBUG,
10040e21c7c0SDavid Marchand 			"%s: used_event_idx=%d, old=%d, new=%d",
100536c525a0SDavid Marchand 			__func__, vhost_used_event(vq), old, new);
100699a2dd95SBruce Richardson 
1007a5dd9842SMaxime Coquelin 		if (vhost_need_event(vhost_used_event(vq), new, old) ||
1008a5dd9842SMaxime Coquelin 				unlikely(!signalled_used_valid))
1009d761d455SEelco Chaudron 			vhost_vring_inject_irq(dev, vq);
101099a2dd95SBruce Richardson 	} else {
101199a2dd95SBruce Richardson 		/* Kick the guest if necessary. */
1012a5dd9842SMaxime Coquelin 		if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
1013d761d455SEelco Chaudron 			vhost_vring_inject_irq(dev, vq);
101499a2dd95SBruce Richardson 	}
101599a2dd95SBruce Richardson }
101699a2dd95SBruce Richardson 
101799a2dd95SBruce Richardson static __rte_always_inline void
101899a2dd95SBruce Richardson vhost_vring_call_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
101999a2dd95SBruce Richardson {
102099a2dd95SBruce Richardson 	uint16_t old, new, off, off_wrap;
102199a2dd95SBruce Richardson 	bool signalled_used_valid, kick = false;
102299a2dd95SBruce Richardson 
102399a2dd95SBruce Richardson 	/* Flush used desc update. */
10245147b641STyler Retzlaff 	rte_atomic_thread_fence(rte_memory_order_seq_cst);
102599a2dd95SBruce Richardson 
102699a2dd95SBruce Richardson 	if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
102799a2dd95SBruce Richardson 		if (vq->driver_event->flags !=
102899a2dd95SBruce Richardson 				VRING_EVENT_F_DISABLE)
102999a2dd95SBruce Richardson 			kick = true;
103099a2dd95SBruce Richardson 		goto kick;
103199a2dd95SBruce Richardson 	}
103299a2dd95SBruce Richardson 
103399a2dd95SBruce Richardson 	old = vq->signalled_used;
103499a2dd95SBruce Richardson 	new = vq->last_used_idx;
103599a2dd95SBruce Richardson 	vq->signalled_used = new;
103699a2dd95SBruce Richardson 	signalled_used_valid = vq->signalled_used_valid;
103799a2dd95SBruce Richardson 	vq->signalled_used_valid = true;
103899a2dd95SBruce Richardson 
103999a2dd95SBruce Richardson 	if (vq->driver_event->flags != VRING_EVENT_F_DESC) {
104099a2dd95SBruce Richardson 		if (vq->driver_event->flags != VRING_EVENT_F_DISABLE)
104199a2dd95SBruce Richardson 			kick = true;
104299a2dd95SBruce Richardson 		goto kick;
104399a2dd95SBruce Richardson 	}
104499a2dd95SBruce Richardson 
104599a2dd95SBruce Richardson 	if (unlikely(!signalled_used_valid)) {
104699a2dd95SBruce Richardson 		kick = true;
104799a2dd95SBruce Richardson 		goto kick;
104899a2dd95SBruce Richardson 	}
104999a2dd95SBruce Richardson 
10505147b641STyler Retzlaff 	rte_atomic_thread_fence(rte_memory_order_acquire);
105199a2dd95SBruce Richardson 
105299a2dd95SBruce Richardson 	off_wrap = vq->driver_event->off_wrap;
105399a2dd95SBruce Richardson 	off = off_wrap & ~(1 << 15);
105499a2dd95SBruce Richardson 
105599a2dd95SBruce Richardson 	if (new <= old)
105699a2dd95SBruce Richardson 		old -= vq->size;
105799a2dd95SBruce Richardson 
105899a2dd95SBruce Richardson 	if (vq->used_wrap_counter != off_wrap >> 15)
105999a2dd95SBruce Richardson 		off -= vq->size;
106099a2dd95SBruce Richardson 
106199a2dd95SBruce Richardson 	if (vhost_need_event(off, new, old))
106299a2dd95SBruce Richardson 		kick = true;
106399a2dd95SBruce Richardson kick:
1064a5dd9842SMaxime Coquelin 	if (kick)
1065d761d455SEelco Chaudron 		vhost_vring_inject_irq(dev, vq);
106699a2dd95SBruce Richardson }
106799a2dd95SBruce Richardson 
106899a2dd95SBruce Richardson static __rte_always_inline void
106999a2dd95SBruce Richardson free_ind_table(void *idesc)
107099a2dd95SBruce Richardson {
107199a2dd95SBruce Richardson 	rte_free(idesc);
107299a2dd95SBruce Richardson }
107399a2dd95SBruce Richardson 
107499a2dd95SBruce Richardson static __rte_always_inline void
107599a2dd95SBruce Richardson restore_mbuf(struct rte_mbuf *m)
107699a2dd95SBruce Richardson {
107799a2dd95SBruce Richardson 	uint32_t mbuf_size, priv_size;
107899a2dd95SBruce Richardson 
107999a2dd95SBruce Richardson 	while (m) {
108099a2dd95SBruce Richardson 		priv_size = rte_pktmbuf_priv_size(m->pool);
108199a2dd95SBruce Richardson 		mbuf_size = sizeof(struct rte_mbuf) + priv_size;
108299a2dd95SBruce Richardson 		/* start of buffer is after mbuf structure and priv data */
108399a2dd95SBruce Richardson 
108499a2dd95SBruce Richardson 		m->buf_addr = (char *)m + mbuf_size;
1085e811e2d7SShijith Thotton 		rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size);
108699a2dd95SBruce Richardson 		m = m->next;
108799a2dd95SBruce Richardson 	}
108899a2dd95SBruce Richardson }
108999a2dd95SBruce Richardson 
109099a2dd95SBruce Richardson static __rte_always_inline bool
109199a2dd95SBruce Richardson mbuf_is_consumed(struct rte_mbuf *m)
109299a2dd95SBruce Richardson {
109399a2dd95SBruce Richardson 	while (m) {
109499a2dd95SBruce Richardson 		if (rte_mbuf_refcnt_read(m) > 1)
109599a2dd95SBruce Richardson 			return false;
109699a2dd95SBruce Richardson 		m = m->next;
109799a2dd95SBruce Richardson 	}
109899a2dd95SBruce Richardson 
109999a2dd95SBruce Richardson 	return true;
110099a2dd95SBruce Richardson }
1101338ad77cSMike Pattrick 
11022018fabfSDavid Marchand void mem_set_dump(struct virtio_net *dev, void *ptr, size_t size, bool enable, uint64_t alignment);
1103d761d455SEelco Chaudron 
110499a2dd95SBruce Richardson #endif /* _VHOST_NET_CDEV_H_ */
1105