xref: /dpdk/lib/vhost/virtio_net.c (revision 6d7e741be18ab1e6ecce46edb2516318305c3c73)
199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
299a2dd95SBruce Richardson  * Copyright(c) 2010-2016 Intel Corporation
399a2dd95SBruce Richardson  */
499a2dd95SBruce Richardson 
599a2dd95SBruce Richardson #include <stdint.h>
699a2dd95SBruce Richardson #include <stdbool.h>
799a2dd95SBruce Richardson #include <linux/virtio_net.h>
899a2dd95SBruce Richardson 
999a2dd95SBruce Richardson #include <rte_mbuf.h>
1099a2dd95SBruce Richardson #include <rte_memcpy.h>
11ca7036b4SDavid Marchand #include <rte_net.h>
1299a2dd95SBruce Richardson #include <rte_ether.h>
1399a2dd95SBruce Richardson #include <rte_ip.h>
1453d3f477SJiayu Hu #include <rte_dmadev.h>
1599a2dd95SBruce Richardson #include <rte_vhost.h>
1699a2dd95SBruce Richardson #include <rte_tcp.h>
1799a2dd95SBruce Richardson #include <rte_udp.h>
1899a2dd95SBruce Richardson #include <rte_sctp.h>
1999a2dd95SBruce Richardson #include <rte_arp.h>
2099a2dd95SBruce Richardson #include <rte_spinlock.h>
2199a2dd95SBruce Richardson #include <rte_malloc.h>
2299a2dd95SBruce Richardson #include <rte_vhost_async.h>
2399a2dd95SBruce Richardson 
2499a2dd95SBruce Richardson #include "iotlb.h"
2599a2dd95SBruce Richardson #include "vhost.h"
2699a2dd95SBruce Richardson 
2799a2dd95SBruce Richardson #define MAX_BATCH_LEN 256
2899a2dd95SBruce Richardson 
293753ebf0SYuan Wang static __rte_always_inline uint16_t
30fe8477ebSCheng Jiang async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq,
313753ebf0SYuan Wang 		struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
323753ebf0SYuan Wang 		uint16_t vchan_id, bool legacy_ol_flags);
333753ebf0SYuan Wang 
3453d3f477SJiayu Hu /* DMA device copy operation tracking array. */
3553d3f477SJiayu Hu struct async_dma_info dma_copy_track[RTE_DMADEV_DEFAULT_MAX];
3653d3f477SJiayu Hu 
3799a2dd95SBruce Richardson static  __rte_always_inline bool
3899a2dd95SBruce Richardson rxvq_is_mergeable(struct virtio_net *dev)
3999a2dd95SBruce Richardson {
4099a2dd95SBruce Richardson 	return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF);
4199a2dd95SBruce Richardson }
4299a2dd95SBruce Richardson 
4399a2dd95SBruce Richardson static  __rte_always_inline bool
4499a2dd95SBruce Richardson virtio_net_is_inorder(struct virtio_net *dev)
4599a2dd95SBruce Richardson {
4699a2dd95SBruce Richardson 	return dev->features & (1ULL << VIRTIO_F_IN_ORDER);
4799a2dd95SBruce Richardson }
4899a2dd95SBruce Richardson 
4999a2dd95SBruce Richardson static bool
5099a2dd95SBruce Richardson is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring)
5199a2dd95SBruce Richardson {
5299a2dd95SBruce Richardson 	return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring;
5399a2dd95SBruce Richardson }
5499a2dd95SBruce Richardson 
55be75dc99SMaxime Coquelin static inline void
5610be3321SMorten Brørup vhost_queue_stats_update(const struct virtio_net *dev, struct vhost_virtqueue *vq,
57be75dc99SMaxime Coquelin 		struct rte_mbuf **pkts, uint16_t count)
5803f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
59be75dc99SMaxime Coquelin {
60be75dc99SMaxime Coquelin 	struct virtqueue_stats *stats = &vq->stats;
61be75dc99SMaxime Coquelin 	int i;
62be75dc99SMaxime Coquelin 
63be75dc99SMaxime Coquelin 	if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED))
64be75dc99SMaxime Coquelin 		return;
65be75dc99SMaxime Coquelin 
66be75dc99SMaxime Coquelin 	for (i = 0; i < count; i++) {
6710be3321SMorten Brørup 		const struct rte_ether_addr *ea;
6810be3321SMorten Brørup 		const struct rte_mbuf *pkt = pkts[i];
69be75dc99SMaxime Coquelin 		uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt);
70be75dc99SMaxime Coquelin 
71be75dc99SMaxime Coquelin 		stats->packets++;
72be75dc99SMaxime Coquelin 		stats->bytes += pkt_len;
73be75dc99SMaxime Coquelin 
7410be3321SMorten Brørup 		if (pkt_len >= 1024)
7510be3321SMorten Brørup 			stats->size_bins[6 + (pkt_len > 1518)]++;
7610be3321SMorten Brørup 		else if (pkt_len <= 64)
7710be3321SMorten Brørup 			stats->size_bins[pkt_len >> 6]++;
78be75dc99SMaxime Coquelin 		else
7910be3321SMorten Brørup 			stats->size_bins[32UL - rte_clz32(pkt_len) - 5]++;
80be75dc99SMaxime Coquelin 
8110be3321SMorten Brørup 		ea = rte_pktmbuf_mtod(pkt, const struct rte_ether_addr *);
8210be3321SMorten Brørup 		RTE_BUILD_BUG_ON(offsetof(struct virtqueue_stats, broadcast) !=
8310be3321SMorten Brørup 				offsetof(struct virtqueue_stats, multicast) + sizeof(uint64_t));
8410be3321SMorten Brørup 		if (unlikely(rte_is_multicast_ether_addr(ea)))
8510be3321SMorten Brørup 			(&stats->multicast)[rte_is_broadcast_ether_addr(ea)]++;
86be75dc99SMaxime Coquelin 	}
87be75dc99SMaxime Coquelin }
88be75dc99SMaxime Coquelin 
8953d3f477SJiayu Hu static __rte_always_inline int64_t
9053d3f477SJiayu Hu vhost_async_dma_transfer_one(struct virtio_net *dev, struct vhost_virtqueue *vq,
9153d3f477SJiayu Hu 		int16_t dma_id, uint16_t vchan_id, uint16_t flag_idx,
9253d3f477SJiayu Hu 		struct vhost_iov_iter *pkt)
9303f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
9453d3f477SJiayu Hu {
9553d3f477SJiayu Hu 	struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id];
9653d3f477SJiayu Hu 	uint16_t ring_mask = dma_info->ring_mask;
9753d3f477SJiayu Hu 	static bool vhost_async_dma_copy_log;
9853d3f477SJiayu Hu 
9953d3f477SJiayu Hu 
10053d3f477SJiayu Hu 	struct vhost_iovec *iov = pkt->iov;
10153d3f477SJiayu Hu 	int copy_idx = 0;
10253d3f477SJiayu Hu 	uint32_t nr_segs = pkt->nr_segs;
10353d3f477SJiayu Hu 	uint16_t i;
10453d3f477SJiayu Hu 
10553d3f477SJiayu Hu 	if (rte_dma_burst_capacity(dma_id, vchan_id) < nr_segs)
10653d3f477SJiayu Hu 		return -1;
10753d3f477SJiayu Hu 
10853d3f477SJiayu Hu 	for (i = 0; i < nr_segs; i++) {
10953d3f477SJiayu Hu 		copy_idx = rte_dma_copy(dma_id, vchan_id, (rte_iova_t)iov[i].src_addr,
11053d3f477SJiayu Hu 				(rte_iova_t)iov[i].dst_addr, iov[i].len, RTE_DMA_OP_FLAG_LLC);
11153d3f477SJiayu Hu 		/**
11253d3f477SJiayu Hu 		 * Since all memory is pinned and DMA vChannel
11353d3f477SJiayu Hu 		 * ring has enough space, failure should be a
11453d3f477SJiayu Hu 		 * rare case. If failure happens, it means DMA
11553d3f477SJiayu Hu 		 * device encounters serious errors; in this
11653d3f477SJiayu Hu 		 * case, please stop async data-path and check
11753d3f477SJiayu Hu 		 * what has happened to DMA device.
11853d3f477SJiayu Hu 		 */
11953d3f477SJiayu Hu 		if (unlikely(copy_idx < 0)) {
12053d3f477SJiayu Hu 			if (!vhost_async_dma_copy_log) {
1210e21c7c0SDavid Marchand 				VHOST_DATA_LOG(dev->ifname, ERR,
1220e21c7c0SDavid Marchand 					"DMA copy failed for channel %d:%u",
12336c525a0SDavid Marchand 					dma_id, vchan_id);
12453d3f477SJiayu Hu 				vhost_async_dma_copy_log = true;
12553d3f477SJiayu Hu 			}
12653d3f477SJiayu Hu 			return -1;
12753d3f477SJiayu Hu 		}
12853d3f477SJiayu Hu 	}
12953d3f477SJiayu Hu 
13053d3f477SJiayu Hu 	/**
13153d3f477SJiayu Hu 	 * Only store packet completion flag address in the last copy's
13253d3f477SJiayu Hu 	 * slot, and other slots are set to NULL.
13353d3f477SJiayu Hu 	 */
13453d3f477SJiayu Hu 	dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask] = &vq->async->pkts_cmpl_flag[flag_idx];
13553d3f477SJiayu Hu 
13653d3f477SJiayu Hu 	return nr_segs;
13753d3f477SJiayu Hu }
13853d3f477SJiayu Hu 
13953d3f477SJiayu Hu static __rte_always_inline uint16_t
14053d3f477SJiayu Hu vhost_async_dma_transfer(struct virtio_net *dev, struct vhost_virtqueue *vq,
14153d3f477SJiayu Hu 		int16_t dma_id, uint16_t vchan_id, uint16_t head_idx,
14253d3f477SJiayu Hu 		struct vhost_iov_iter *pkts, uint16_t nr_pkts)
14303f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
14453d3f477SJiayu Hu {
14553d3f477SJiayu Hu 	struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id];
14653d3f477SJiayu Hu 	int64_t ret, nr_copies = 0;
14753d3f477SJiayu Hu 	uint16_t pkt_idx;
14853d3f477SJiayu Hu 
14953d3f477SJiayu Hu 	rte_spinlock_lock(&dma_info->dma_lock);
15053d3f477SJiayu Hu 
15153d3f477SJiayu Hu 	for (pkt_idx = 0; pkt_idx < nr_pkts; pkt_idx++) {
15253d3f477SJiayu Hu 		ret = vhost_async_dma_transfer_one(dev, vq, dma_id, vchan_id, head_idx,
15353d3f477SJiayu Hu 				&pkts[pkt_idx]);
15453d3f477SJiayu Hu 		if (unlikely(ret < 0))
15553d3f477SJiayu Hu 			break;
15653d3f477SJiayu Hu 
15753d3f477SJiayu Hu 		nr_copies += ret;
15853d3f477SJiayu Hu 		head_idx++;
15953d3f477SJiayu Hu 		if (head_idx >= vq->size)
16053d3f477SJiayu Hu 			head_idx -= vq->size;
16153d3f477SJiayu Hu 	}
16253d3f477SJiayu Hu 
16353d3f477SJiayu Hu 	if (likely(nr_copies > 0))
16453d3f477SJiayu Hu 		rte_dma_submit(dma_id, vchan_id);
16553d3f477SJiayu Hu 
16653d3f477SJiayu Hu 	rte_spinlock_unlock(&dma_info->dma_lock);
16753d3f477SJiayu Hu 
16853d3f477SJiayu Hu 	return pkt_idx;
16953d3f477SJiayu Hu }
17053d3f477SJiayu Hu 
17153d3f477SJiayu Hu static __rte_always_inline uint16_t
17253d3f477SJiayu Hu vhost_async_dma_check_completed(struct virtio_net *dev, int16_t dma_id, uint16_t vchan_id,
17353d3f477SJiayu Hu 		uint16_t max_pkts)
17453d3f477SJiayu Hu {
17553d3f477SJiayu Hu 	struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id];
17653d3f477SJiayu Hu 	uint16_t ring_mask = dma_info->ring_mask;
17753d3f477SJiayu Hu 	uint16_t last_idx = 0;
17853d3f477SJiayu Hu 	uint16_t nr_copies;
17953d3f477SJiayu Hu 	uint16_t copy_idx;
18053d3f477SJiayu Hu 	uint16_t i;
18153d3f477SJiayu Hu 	bool has_error = false;
18253d3f477SJiayu Hu 	static bool vhost_async_dma_complete_log;
18353d3f477SJiayu Hu 
18453d3f477SJiayu Hu 	rte_spinlock_lock(&dma_info->dma_lock);
18553d3f477SJiayu Hu 
18653d3f477SJiayu Hu 	/**
18753d3f477SJiayu Hu 	 * Print error log for debugging, if DMA reports error during
18853d3f477SJiayu Hu 	 * DMA transfer. We do not handle error in vhost level.
18953d3f477SJiayu Hu 	 */
19053d3f477SJiayu Hu 	nr_copies = rte_dma_completed(dma_id, vchan_id, max_pkts, &last_idx, &has_error);
19153d3f477SJiayu Hu 	if (unlikely(!vhost_async_dma_complete_log && has_error)) {
1920e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR,
1930e21c7c0SDavid Marchand 			"DMA completion failure on channel %d:%u",
19453d3f477SJiayu Hu 			dma_id, vchan_id);
19553d3f477SJiayu Hu 		vhost_async_dma_complete_log = true;
19653d3f477SJiayu Hu 	} else if (nr_copies == 0) {
19753d3f477SJiayu Hu 		goto out;
19853d3f477SJiayu Hu 	}
19953d3f477SJiayu Hu 
20053d3f477SJiayu Hu 	copy_idx = last_idx - nr_copies + 1;
20153d3f477SJiayu Hu 	for (i = 0; i < nr_copies; i++) {
20253d3f477SJiayu Hu 		bool *flag;
20353d3f477SJiayu Hu 
20453d3f477SJiayu Hu 		flag = dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask];
20553d3f477SJiayu Hu 		if (flag) {
20653d3f477SJiayu Hu 			/**
20753d3f477SJiayu Hu 			 * Mark the packet flag as received. The flag
20853d3f477SJiayu Hu 			 * could belong to another virtqueue but write
20953d3f477SJiayu Hu 			 * is atomic.
21053d3f477SJiayu Hu 			 */
21153d3f477SJiayu Hu 			*flag = true;
21253d3f477SJiayu Hu 			dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask] = NULL;
21353d3f477SJiayu Hu 		}
21453d3f477SJiayu Hu 		copy_idx++;
21553d3f477SJiayu Hu 	}
21653d3f477SJiayu Hu 
21753d3f477SJiayu Hu out:
21853d3f477SJiayu Hu 	rte_spinlock_unlock(&dma_info->dma_lock);
21953d3f477SJiayu Hu 	return nr_copies;
22053d3f477SJiayu Hu }
22153d3f477SJiayu Hu 
22299a2dd95SBruce Richardson static inline void
22399a2dd95SBruce Richardson do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq)
224bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
22599a2dd95SBruce Richardson {
22699a2dd95SBruce Richardson 	struct batch_copy_elem *elem = vq->batch_copy_elems;
22799a2dd95SBruce Richardson 	uint16_t count = vq->batch_copy_nb_elems;
22899a2dd95SBruce Richardson 	int i;
22999a2dd95SBruce Richardson 
23099a2dd95SBruce Richardson 	for (i = 0; i < count; i++) {
23199a2dd95SBruce Richardson 		rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
23299a2dd95SBruce Richardson 		vhost_log_cache_write_iova(dev, vq, elem[i].log_addr,
23399a2dd95SBruce Richardson 					   elem[i].len);
23499a2dd95SBruce Richardson 		PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0);
23599a2dd95SBruce Richardson 	}
23699a2dd95SBruce Richardson 
23799a2dd95SBruce Richardson 	vq->batch_copy_nb_elems = 0;
23899a2dd95SBruce Richardson }
23999a2dd95SBruce Richardson 
24099a2dd95SBruce Richardson static inline void
24199a2dd95SBruce Richardson do_data_copy_dequeue(struct vhost_virtqueue *vq)
24299a2dd95SBruce Richardson {
24399a2dd95SBruce Richardson 	struct batch_copy_elem *elem = vq->batch_copy_elems;
24499a2dd95SBruce Richardson 	uint16_t count = vq->batch_copy_nb_elems;
24599a2dd95SBruce Richardson 	int i;
24699a2dd95SBruce Richardson 
24799a2dd95SBruce Richardson 	for (i = 0; i < count; i++)
24899a2dd95SBruce Richardson 		rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
24999a2dd95SBruce Richardson 
25099a2dd95SBruce Richardson 	vq->batch_copy_nb_elems = 0;
25199a2dd95SBruce Richardson }
25299a2dd95SBruce Richardson 
25399a2dd95SBruce Richardson static __rte_always_inline void
25499a2dd95SBruce Richardson do_flush_shadow_used_ring_split(struct virtio_net *dev,
25599a2dd95SBruce Richardson 			struct vhost_virtqueue *vq,
25699a2dd95SBruce Richardson 			uint16_t to, uint16_t from, uint16_t size)
25799a2dd95SBruce Richardson {
25899a2dd95SBruce Richardson 	rte_memcpy(&vq->used->ring[to],
25999a2dd95SBruce Richardson 			&vq->shadow_used_split[from],
26099a2dd95SBruce Richardson 			size * sizeof(struct vring_used_elem));
26199a2dd95SBruce Richardson 	vhost_log_cache_used_vring(dev, vq,
26299a2dd95SBruce Richardson 			offsetof(struct vring_used, ring[to]),
26399a2dd95SBruce Richardson 			size * sizeof(struct vring_used_elem));
26499a2dd95SBruce Richardson }
26599a2dd95SBruce Richardson 
26699a2dd95SBruce Richardson static __rte_always_inline void
26799a2dd95SBruce Richardson flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
26899a2dd95SBruce Richardson {
26999a2dd95SBruce Richardson 	uint16_t used_idx = vq->last_used_idx & (vq->size - 1);
27099a2dd95SBruce Richardson 
27199a2dd95SBruce Richardson 	if (used_idx + vq->shadow_used_idx <= vq->size) {
27299a2dd95SBruce Richardson 		do_flush_shadow_used_ring_split(dev, vq, used_idx, 0,
27399a2dd95SBruce Richardson 					  vq->shadow_used_idx);
27499a2dd95SBruce Richardson 	} else {
27599a2dd95SBruce Richardson 		uint16_t size;
27699a2dd95SBruce Richardson 
27799a2dd95SBruce Richardson 		/* update used ring interval [used_idx, vq->size] */
27899a2dd95SBruce Richardson 		size = vq->size - used_idx;
27999a2dd95SBruce Richardson 		do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size);
28099a2dd95SBruce Richardson 
28199a2dd95SBruce Richardson 		/* update the left half used ring interval [0, left_size] */
28299a2dd95SBruce Richardson 		do_flush_shadow_used_ring_split(dev, vq, 0, size,
28399a2dd95SBruce Richardson 					  vq->shadow_used_idx - size);
28499a2dd95SBruce Richardson 	}
28599a2dd95SBruce Richardson 	vq->last_used_idx += vq->shadow_used_idx;
28699a2dd95SBruce Richardson 
28799a2dd95SBruce Richardson 	vhost_log_cache_sync(dev, vq);
28899a2dd95SBruce Richardson 
2895147b641STyler Retzlaff 	rte_atomic_fetch_add_explicit((unsigned short __rte_atomic *)&vq->used->idx,
2905147b641STyler Retzlaff 		vq->shadow_used_idx, rte_memory_order_release);
29199a2dd95SBruce Richardson 	vq->shadow_used_idx = 0;
29299a2dd95SBruce Richardson 	vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
29399a2dd95SBruce Richardson 		sizeof(vq->used->idx));
29499a2dd95SBruce Richardson }
29599a2dd95SBruce Richardson 
29699a2dd95SBruce Richardson static __rte_always_inline void
29799a2dd95SBruce Richardson update_shadow_used_ring_split(struct vhost_virtqueue *vq,
29899a2dd95SBruce Richardson 			 uint16_t desc_idx, uint32_t len)
29999a2dd95SBruce Richardson {
30099a2dd95SBruce Richardson 	uint16_t i = vq->shadow_used_idx++;
30199a2dd95SBruce Richardson 
30299a2dd95SBruce Richardson 	vq->shadow_used_split[i].id  = desc_idx;
30399a2dd95SBruce Richardson 	vq->shadow_used_split[i].len = len;
30499a2dd95SBruce Richardson }
30599a2dd95SBruce Richardson 
30699a2dd95SBruce Richardson static __rte_always_inline void
30799a2dd95SBruce Richardson vhost_flush_enqueue_shadow_packed(struct virtio_net *dev,
30899a2dd95SBruce Richardson 				  struct vhost_virtqueue *vq)
30999a2dd95SBruce Richardson {
31099a2dd95SBruce Richardson 	int i;
31199a2dd95SBruce Richardson 	uint16_t used_idx = vq->last_used_idx;
31299a2dd95SBruce Richardson 	uint16_t head_idx = vq->last_used_idx;
31399a2dd95SBruce Richardson 	uint16_t head_flags = 0;
31499a2dd95SBruce Richardson 
31599a2dd95SBruce Richardson 	/* Split loop in two to save memory barriers */
31699a2dd95SBruce Richardson 	for (i = 0; i < vq->shadow_used_idx; i++) {
31799a2dd95SBruce Richardson 		vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id;
31899a2dd95SBruce Richardson 		vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len;
31999a2dd95SBruce Richardson 
32099a2dd95SBruce Richardson 		used_idx += vq->shadow_used_packed[i].count;
32199a2dd95SBruce Richardson 		if (used_idx >= vq->size)
32299a2dd95SBruce Richardson 			used_idx -= vq->size;
32399a2dd95SBruce Richardson 	}
32499a2dd95SBruce Richardson 
32599a2dd95SBruce Richardson 	/* The ordering for storing desc flags needs to be enforced. */
3265147b641STyler Retzlaff 	rte_atomic_thread_fence(rte_memory_order_release);
32799a2dd95SBruce Richardson 
32899a2dd95SBruce Richardson 	for (i = 0; i < vq->shadow_used_idx; i++) {
32999a2dd95SBruce Richardson 		uint16_t flags;
33099a2dd95SBruce Richardson 
33199a2dd95SBruce Richardson 		if (vq->shadow_used_packed[i].len)
33299a2dd95SBruce Richardson 			flags = VRING_DESC_F_WRITE;
33399a2dd95SBruce Richardson 		else
33499a2dd95SBruce Richardson 			flags = 0;
33599a2dd95SBruce Richardson 
33699a2dd95SBruce Richardson 		if (vq->used_wrap_counter) {
33799a2dd95SBruce Richardson 			flags |= VRING_DESC_F_USED;
33899a2dd95SBruce Richardson 			flags |= VRING_DESC_F_AVAIL;
33999a2dd95SBruce Richardson 		} else {
34099a2dd95SBruce Richardson 			flags &= ~VRING_DESC_F_USED;
34199a2dd95SBruce Richardson 			flags &= ~VRING_DESC_F_AVAIL;
34299a2dd95SBruce Richardson 		}
34399a2dd95SBruce Richardson 
34499a2dd95SBruce Richardson 		if (i > 0) {
34599a2dd95SBruce Richardson 			vq->desc_packed[vq->last_used_idx].flags = flags;
34699a2dd95SBruce Richardson 
34799a2dd95SBruce Richardson 			vhost_log_cache_used_vring(dev, vq,
34899a2dd95SBruce Richardson 					vq->last_used_idx *
34999a2dd95SBruce Richardson 					sizeof(struct vring_packed_desc),
35099a2dd95SBruce Richardson 					sizeof(struct vring_packed_desc));
35199a2dd95SBruce Richardson 		} else {
35299a2dd95SBruce Richardson 			head_idx = vq->last_used_idx;
35399a2dd95SBruce Richardson 			head_flags = flags;
35499a2dd95SBruce Richardson 		}
35599a2dd95SBruce Richardson 
35699a2dd95SBruce Richardson 		vq_inc_last_used_packed(vq, vq->shadow_used_packed[i].count);
35799a2dd95SBruce Richardson 	}
35899a2dd95SBruce Richardson 
35999a2dd95SBruce Richardson 	vq->desc_packed[head_idx].flags = head_flags;
36099a2dd95SBruce Richardson 
36199a2dd95SBruce Richardson 	vhost_log_cache_used_vring(dev, vq,
36299a2dd95SBruce Richardson 				head_idx *
36399a2dd95SBruce Richardson 				sizeof(struct vring_packed_desc),
36499a2dd95SBruce Richardson 				sizeof(struct vring_packed_desc));
36599a2dd95SBruce Richardson 
36699a2dd95SBruce Richardson 	vq->shadow_used_idx = 0;
36799a2dd95SBruce Richardson 	vhost_log_cache_sync(dev, vq);
36899a2dd95SBruce Richardson }
36999a2dd95SBruce Richardson 
37099a2dd95SBruce Richardson static __rte_always_inline void
37199a2dd95SBruce Richardson vhost_flush_dequeue_shadow_packed(struct virtio_net *dev,
37299a2dd95SBruce Richardson 				  struct vhost_virtqueue *vq)
37399a2dd95SBruce Richardson {
37499a2dd95SBruce Richardson 	struct vring_used_elem_packed *used_elem = &vq->shadow_used_packed[0];
37599a2dd95SBruce Richardson 
37699a2dd95SBruce Richardson 	vq->desc_packed[vq->shadow_last_used_idx].id = used_elem->id;
37799a2dd95SBruce Richardson 	/* desc flags is the synchronization point for virtio packed vring */
3785147b641STyler Retzlaff 	rte_atomic_store_explicit(
3795147b641STyler Retzlaff 		(unsigned short __rte_atomic *)&vq->desc_packed[vq->shadow_last_used_idx].flags,
3805147b641STyler Retzlaff 		used_elem->flags, rte_memory_order_release);
38199a2dd95SBruce Richardson 
38299a2dd95SBruce Richardson 	vhost_log_cache_used_vring(dev, vq, vq->shadow_last_used_idx *
38399a2dd95SBruce Richardson 				   sizeof(struct vring_packed_desc),
38499a2dd95SBruce Richardson 				   sizeof(struct vring_packed_desc));
38599a2dd95SBruce Richardson 	vq->shadow_used_idx = 0;
38699a2dd95SBruce Richardson 	vhost_log_cache_sync(dev, vq);
38799a2dd95SBruce Richardson }
38899a2dd95SBruce Richardson 
38999a2dd95SBruce Richardson static __rte_always_inline void
39099a2dd95SBruce Richardson vhost_flush_enqueue_batch_packed(struct virtio_net *dev,
39199a2dd95SBruce Richardson 				 struct vhost_virtqueue *vq,
39299a2dd95SBruce Richardson 				 uint64_t *lens,
39399a2dd95SBruce Richardson 				 uint16_t *ids)
39499a2dd95SBruce Richardson {
39599a2dd95SBruce Richardson 	uint16_t i;
39699a2dd95SBruce Richardson 	uint16_t flags;
3973ad55b8eSBalazs Nemeth 	uint16_t last_used_idx;
3983ad55b8eSBalazs Nemeth 	struct vring_packed_desc *desc_base;
39999a2dd95SBruce Richardson 
4003ad55b8eSBalazs Nemeth 	last_used_idx = vq->last_used_idx;
4013ad55b8eSBalazs Nemeth 	desc_base = &vq->desc_packed[last_used_idx];
4023ad55b8eSBalazs Nemeth 
40399a2dd95SBruce Richardson 	flags = PACKED_DESC_ENQUEUE_USED_FLAG(vq->used_wrap_counter);
40499a2dd95SBruce Richardson 
40599a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
406d18db804SBalazs Nemeth 		desc_base[i].id = ids[i];
407d18db804SBalazs Nemeth 		desc_base[i].len = lens[i];
40899a2dd95SBruce Richardson 	}
40999a2dd95SBruce Richardson 
4105147b641STyler Retzlaff 	rte_atomic_thread_fence(rte_memory_order_release);
41199a2dd95SBruce Richardson 
412d18db804SBalazs Nemeth 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
413d18db804SBalazs Nemeth 		desc_base[i].flags = flags;
414d18db804SBalazs Nemeth 	}
41599a2dd95SBruce Richardson 
416d18db804SBalazs Nemeth 	vhost_log_cache_used_vring(dev, vq, last_used_idx *
41799a2dd95SBruce Richardson 				   sizeof(struct vring_packed_desc),
41899a2dd95SBruce Richardson 				   sizeof(struct vring_packed_desc) *
41999a2dd95SBruce Richardson 				   PACKED_BATCH_SIZE);
42099a2dd95SBruce Richardson 	vhost_log_cache_sync(dev, vq);
42199a2dd95SBruce Richardson 
42299a2dd95SBruce Richardson 	vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
42399a2dd95SBruce Richardson }
42499a2dd95SBruce Richardson 
42599a2dd95SBruce Richardson static __rte_always_inline void
426f9ebb02bSCheng Jiang vhost_async_shadow_enqueue_packed_batch(struct vhost_virtqueue *vq,
427f9ebb02bSCheng Jiang 				 uint64_t *lens,
428f9ebb02bSCheng Jiang 				 uint16_t *ids)
4294b02c267SDavid Marchand 	__rte_exclusive_locks_required(&vq->access_lock)
430f9ebb02bSCheng Jiang {
431f9ebb02bSCheng Jiang 	uint16_t i;
432f9ebb02bSCheng Jiang 	struct vhost_async *async = vq->async;
433f9ebb02bSCheng Jiang 
434f9ebb02bSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
435f9ebb02bSCheng Jiang 		async->buffers_packed[async->buffer_idx_packed].id  = ids[i];
436f9ebb02bSCheng Jiang 		async->buffers_packed[async->buffer_idx_packed].len = lens[i];
437f9ebb02bSCheng Jiang 		async->buffers_packed[async->buffer_idx_packed].count = 1;
438f9ebb02bSCheng Jiang 		async->buffer_idx_packed++;
439f9ebb02bSCheng Jiang 		if (async->buffer_idx_packed >= vq->size)
440f9ebb02bSCheng Jiang 			async->buffer_idx_packed -= vq->size;
441f9ebb02bSCheng Jiang 	}
442f9ebb02bSCheng Jiang }
443f9ebb02bSCheng Jiang 
444f9ebb02bSCheng Jiang static __rte_always_inline void
445c2fa52bfSCheng Jiang vhost_async_shadow_dequeue_packed_batch(struct vhost_virtqueue *vq, uint16_t *ids)
44603f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
447c2fa52bfSCheng Jiang {
448c2fa52bfSCheng Jiang 	uint16_t i;
449c2fa52bfSCheng Jiang 	struct vhost_async *async = vq->async;
450c2fa52bfSCheng Jiang 
451c2fa52bfSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
452c2fa52bfSCheng Jiang 		async->buffers_packed[async->buffer_idx_packed].id  = ids[i];
453c2fa52bfSCheng Jiang 		async->buffers_packed[async->buffer_idx_packed].len = 0;
454c2fa52bfSCheng Jiang 		async->buffers_packed[async->buffer_idx_packed].count = 1;
455c2fa52bfSCheng Jiang 
456c2fa52bfSCheng Jiang 		async->buffer_idx_packed++;
457c2fa52bfSCheng Jiang 		if (async->buffer_idx_packed >= vq->size)
458c2fa52bfSCheng Jiang 			async->buffer_idx_packed -= vq->size;
459c2fa52bfSCheng Jiang 	}
460c2fa52bfSCheng Jiang }
461c2fa52bfSCheng Jiang 
462c2fa52bfSCheng Jiang static __rte_always_inline void
46399a2dd95SBruce Richardson vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq,
46499a2dd95SBruce Richardson 					  uint16_t id)
46599a2dd95SBruce Richardson {
46699a2dd95SBruce Richardson 	vq->shadow_used_packed[0].id = id;
46799a2dd95SBruce Richardson 
46899a2dd95SBruce Richardson 	if (!vq->shadow_used_idx) {
46999a2dd95SBruce Richardson 		vq->shadow_last_used_idx = vq->last_used_idx;
47099a2dd95SBruce Richardson 		vq->shadow_used_packed[0].flags =
47199a2dd95SBruce Richardson 			PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter);
47299a2dd95SBruce Richardson 		vq->shadow_used_packed[0].len = 0;
47399a2dd95SBruce Richardson 		vq->shadow_used_packed[0].count = 1;
47499a2dd95SBruce Richardson 		vq->shadow_used_idx++;
47599a2dd95SBruce Richardson 	}
47699a2dd95SBruce Richardson 
47799a2dd95SBruce Richardson 	vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
47899a2dd95SBruce Richardson }
47999a2dd95SBruce Richardson 
48099a2dd95SBruce Richardson static __rte_always_inline void
48199a2dd95SBruce Richardson vhost_shadow_dequeue_batch_packed(struct virtio_net *dev,
48299a2dd95SBruce Richardson 				  struct vhost_virtqueue *vq,
48399a2dd95SBruce Richardson 				  uint16_t *ids)
48499a2dd95SBruce Richardson {
48599a2dd95SBruce Richardson 	uint16_t flags;
48699a2dd95SBruce Richardson 	uint16_t i;
48799a2dd95SBruce Richardson 	uint16_t begin;
48899a2dd95SBruce Richardson 
48999a2dd95SBruce Richardson 	flags = PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter);
49099a2dd95SBruce Richardson 
49199a2dd95SBruce Richardson 	if (!vq->shadow_used_idx) {
49299a2dd95SBruce Richardson 		vq->shadow_last_used_idx = vq->last_used_idx;
49399a2dd95SBruce Richardson 		vq->shadow_used_packed[0].id  = ids[0];
49499a2dd95SBruce Richardson 		vq->shadow_used_packed[0].len = 0;
49599a2dd95SBruce Richardson 		vq->shadow_used_packed[0].count = 1;
49699a2dd95SBruce Richardson 		vq->shadow_used_packed[0].flags = flags;
49799a2dd95SBruce Richardson 		vq->shadow_used_idx++;
49899a2dd95SBruce Richardson 		begin = 1;
49999a2dd95SBruce Richardson 	} else
50099a2dd95SBruce Richardson 		begin = 0;
50199a2dd95SBruce Richardson 
50299a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) {
50399a2dd95SBruce Richardson 		vq->desc_packed[vq->last_used_idx + i].id = ids[i];
50499a2dd95SBruce Richardson 		vq->desc_packed[vq->last_used_idx + i].len = 0;
50599a2dd95SBruce Richardson 	}
50699a2dd95SBruce Richardson 
5075147b641STyler Retzlaff 	rte_atomic_thread_fence(rte_memory_order_release);
50899a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE)
50999a2dd95SBruce Richardson 		vq->desc_packed[vq->last_used_idx + i].flags = flags;
51099a2dd95SBruce Richardson 
51199a2dd95SBruce Richardson 	vhost_log_cache_used_vring(dev, vq, vq->last_used_idx *
51299a2dd95SBruce Richardson 				   sizeof(struct vring_packed_desc),
51399a2dd95SBruce Richardson 				   sizeof(struct vring_packed_desc) *
51499a2dd95SBruce Richardson 				   PACKED_BATCH_SIZE);
51599a2dd95SBruce Richardson 	vhost_log_cache_sync(dev, vq);
51699a2dd95SBruce Richardson 
51799a2dd95SBruce Richardson 	vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
51899a2dd95SBruce Richardson }
51999a2dd95SBruce Richardson 
52099a2dd95SBruce Richardson static __rte_always_inline void
52199a2dd95SBruce Richardson vhost_shadow_dequeue_single_packed(struct vhost_virtqueue *vq,
52299a2dd95SBruce Richardson 				   uint16_t buf_id,
52399a2dd95SBruce Richardson 				   uint16_t count)
52499a2dd95SBruce Richardson {
52599a2dd95SBruce Richardson 	uint16_t flags;
52699a2dd95SBruce Richardson 
52799a2dd95SBruce Richardson 	flags = vq->desc_packed[vq->last_used_idx].flags;
52899a2dd95SBruce Richardson 	if (vq->used_wrap_counter) {
52999a2dd95SBruce Richardson 		flags |= VRING_DESC_F_USED;
53099a2dd95SBruce Richardson 		flags |= VRING_DESC_F_AVAIL;
53199a2dd95SBruce Richardson 	} else {
53299a2dd95SBruce Richardson 		flags &= ~VRING_DESC_F_USED;
53399a2dd95SBruce Richardson 		flags &= ~VRING_DESC_F_AVAIL;
53499a2dd95SBruce Richardson 	}
53599a2dd95SBruce Richardson 
53699a2dd95SBruce Richardson 	if (!vq->shadow_used_idx) {
53799a2dd95SBruce Richardson 		vq->shadow_last_used_idx = vq->last_used_idx;
53899a2dd95SBruce Richardson 
53999a2dd95SBruce Richardson 		vq->shadow_used_packed[0].id  = buf_id;
54099a2dd95SBruce Richardson 		vq->shadow_used_packed[0].len = 0;
54199a2dd95SBruce Richardson 		vq->shadow_used_packed[0].flags = flags;
54299a2dd95SBruce Richardson 		vq->shadow_used_idx++;
54399a2dd95SBruce Richardson 	} else {
54499a2dd95SBruce Richardson 		vq->desc_packed[vq->last_used_idx].id = buf_id;
54599a2dd95SBruce Richardson 		vq->desc_packed[vq->last_used_idx].len = 0;
54699a2dd95SBruce Richardson 		vq->desc_packed[vq->last_used_idx].flags = flags;
54799a2dd95SBruce Richardson 	}
54899a2dd95SBruce Richardson 
54999a2dd95SBruce Richardson 	vq_inc_last_used_packed(vq, count);
55099a2dd95SBruce Richardson }
55199a2dd95SBruce Richardson 
55299a2dd95SBruce Richardson static __rte_always_inline void
55399a2dd95SBruce Richardson vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq,
55499a2dd95SBruce Richardson 					   uint16_t buf_id,
55599a2dd95SBruce Richardson 					   uint16_t count)
55699a2dd95SBruce Richardson {
55799a2dd95SBruce Richardson 	uint16_t flags;
55899a2dd95SBruce Richardson 
55999a2dd95SBruce Richardson 	vq->shadow_used_packed[0].id = buf_id;
56099a2dd95SBruce Richardson 
56199a2dd95SBruce Richardson 	flags = vq->desc_packed[vq->last_used_idx].flags;
56299a2dd95SBruce Richardson 	if (vq->used_wrap_counter) {
56399a2dd95SBruce Richardson 		flags |= VRING_DESC_F_USED;
56499a2dd95SBruce Richardson 		flags |= VRING_DESC_F_AVAIL;
56599a2dd95SBruce Richardson 	} else {
56699a2dd95SBruce Richardson 		flags &= ~VRING_DESC_F_USED;
56799a2dd95SBruce Richardson 		flags &= ~VRING_DESC_F_AVAIL;
56899a2dd95SBruce Richardson 	}
56999a2dd95SBruce Richardson 
57099a2dd95SBruce Richardson 	if (!vq->shadow_used_idx) {
57199a2dd95SBruce Richardson 		vq->shadow_last_used_idx = vq->last_used_idx;
57299a2dd95SBruce Richardson 		vq->shadow_used_packed[0].len = 0;
57399a2dd95SBruce Richardson 		vq->shadow_used_packed[0].flags = flags;
57499a2dd95SBruce Richardson 		vq->shadow_used_idx++;
57599a2dd95SBruce Richardson 	}
57699a2dd95SBruce Richardson 
57799a2dd95SBruce Richardson 	vq_inc_last_used_packed(vq, count);
57899a2dd95SBruce Richardson }
57999a2dd95SBruce Richardson 
58099a2dd95SBruce Richardson static __rte_always_inline void
581873e8dadSCheng Jiang vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq,
582873e8dadSCheng Jiang 				   uint32_t *len,
583873e8dadSCheng Jiang 				   uint16_t *id,
584873e8dadSCheng Jiang 				   uint16_t *count,
58599a2dd95SBruce Richardson 				   uint16_t num_buffers)
58699a2dd95SBruce Richardson {
58799a2dd95SBruce Richardson 	uint16_t i;
588873e8dadSCheng Jiang 
58999a2dd95SBruce Richardson 	for (i = 0; i < num_buffers; i++) {
59099a2dd95SBruce Richardson 		/* enqueue shadow flush action aligned with batch num */
59199a2dd95SBruce Richardson 		if (!vq->shadow_used_idx)
59299a2dd95SBruce Richardson 			vq->shadow_aligned_idx = vq->last_used_idx &
59399a2dd95SBruce Richardson 				PACKED_BATCH_MASK;
59499a2dd95SBruce Richardson 		vq->shadow_used_packed[vq->shadow_used_idx].id  = id[i];
59599a2dd95SBruce Richardson 		vq->shadow_used_packed[vq->shadow_used_idx].len = len[i];
59699a2dd95SBruce Richardson 		vq->shadow_used_packed[vq->shadow_used_idx].count = count[i];
59799a2dd95SBruce Richardson 		vq->shadow_aligned_idx += count[i];
59899a2dd95SBruce Richardson 		vq->shadow_used_idx++;
59999a2dd95SBruce Richardson 	}
600873e8dadSCheng Jiang }
601873e8dadSCheng Jiang 
602873e8dadSCheng Jiang static __rte_always_inline void
603637711f0SCheng Jiang vhost_async_shadow_enqueue_packed(struct vhost_virtqueue *vq,
604637711f0SCheng Jiang 				   uint32_t *len,
605637711f0SCheng Jiang 				   uint16_t *id,
606637711f0SCheng Jiang 				   uint16_t *count,
607637711f0SCheng Jiang 				   uint16_t num_buffers)
6084b02c267SDavid Marchand 	__rte_exclusive_locks_required(&vq->access_lock)
609637711f0SCheng Jiang {
610637711f0SCheng Jiang 	uint16_t i;
611637711f0SCheng Jiang 	struct vhost_async *async = vq->async;
612637711f0SCheng Jiang 
613637711f0SCheng Jiang 	for (i = 0; i < num_buffers; i++) {
614637711f0SCheng Jiang 		async->buffers_packed[async->buffer_idx_packed].id  = id[i];
615637711f0SCheng Jiang 		async->buffers_packed[async->buffer_idx_packed].len = len[i];
616637711f0SCheng Jiang 		async->buffers_packed[async->buffer_idx_packed].count = count[i];
617637711f0SCheng Jiang 		async->buffer_idx_packed++;
618637711f0SCheng Jiang 		if (async->buffer_idx_packed >= vq->size)
619637711f0SCheng Jiang 			async->buffer_idx_packed -= vq->size;
620637711f0SCheng Jiang 	}
621637711f0SCheng Jiang }
622637711f0SCheng Jiang 
623637711f0SCheng Jiang static __rte_always_inline void
624873e8dadSCheng Jiang vhost_shadow_enqueue_single_packed(struct virtio_net *dev,
625873e8dadSCheng Jiang 				   struct vhost_virtqueue *vq,
626873e8dadSCheng Jiang 				   uint32_t *len,
627873e8dadSCheng Jiang 				   uint16_t *id,
628873e8dadSCheng Jiang 				   uint16_t *count,
629873e8dadSCheng Jiang 				   uint16_t num_buffers)
630bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
631873e8dadSCheng Jiang {
632873e8dadSCheng Jiang 	vhost_shadow_enqueue_packed(vq, len, id, count, num_buffers);
63399a2dd95SBruce Richardson 
63499a2dd95SBruce Richardson 	if (vq->shadow_aligned_idx >= PACKED_BATCH_SIZE) {
63599a2dd95SBruce Richardson 		do_data_copy_enqueue(dev, vq);
63699a2dd95SBruce Richardson 		vhost_flush_enqueue_shadow_packed(dev, vq);
63799a2dd95SBruce Richardson 	}
63899a2dd95SBruce Richardson }
63999a2dd95SBruce Richardson 
64099a2dd95SBruce Richardson /* avoid write operation when necessary, to lessen cache issues */
64199a2dd95SBruce Richardson #define ASSIGN_UNLESS_EQUAL(var, val) do {	\
64299a2dd95SBruce Richardson 	if ((var) != (val))			\
64399a2dd95SBruce Richardson 		(var) = (val);			\
64499a2dd95SBruce Richardson } while (0)
64599a2dd95SBruce Richardson 
64699a2dd95SBruce Richardson static __rte_always_inline void
64799a2dd95SBruce Richardson virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
64899a2dd95SBruce Richardson {
649daa02b5cSOlivier Matz 	uint64_t csum_l4 = m_buf->ol_flags & RTE_MBUF_F_TX_L4_MASK;
65099a2dd95SBruce Richardson 
651daa02b5cSOlivier Matz 	if (m_buf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)
652daa02b5cSOlivier Matz 		csum_l4 |= RTE_MBUF_F_TX_TCP_CKSUM;
65399a2dd95SBruce Richardson 
65499a2dd95SBruce Richardson 	if (csum_l4) {
6557316b4fdSMaxime Coquelin 		/*
6567316b4fdSMaxime Coquelin 		 * Pseudo-header checksum must be set as per Virtio spec.
6577316b4fdSMaxime Coquelin 		 *
6587316b4fdSMaxime Coquelin 		 * Note: We don't propagate rte_net_intel_cksum_prepare()
6597316b4fdSMaxime Coquelin 		 * errors, as it would have an impact on performance, and an
6607316b4fdSMaxime Coquelin 		 * error would mean the packet is dropped by the guest instead
6617316b4fdSMaxime Coquelin 		 * of being dropped here.
6627316b4fdSMaxime Coquelin 		 */
6637316b4fdSMaxime Coquelin 		rte_net_intel_cksum_prepare(m_buf);
6647316b4fdSMaxime Coquelin 
66599a2dd95SBruce Richardson 		net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
66699a2dd95SBruce Richardson 		net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len;
66799a2dd95SBruce Richardson 
66899a2dd95SBruce Richardson 		switch (csum_l4) {
669daa02b5cSOlivier Matz 		case RTE_MBUF_F_TX_TCP_CKSUM:
67099a2dd95SBruce Richardson 			net_hdr->csum_offset = (offsetof(struct rte_tcp_hdr,
67199a2dd95SBruce Richardson 						cksum));
67299a2dd95SBruce Richardson 			break;
673daa02b5cSOlivier Matz 		case RTE_MBUF_F_TX_UDP_CKSUM:
67499a2dd95SBruce Richardson 			net_hdr->csum_offset = (offsetof(struct rte_udp_hdr,
67599a2dd95SBruce Richardson 						dgram_cksum));
67699a2dd95SBruce Richardson 			break;
677daa02b5cSOlivier Matz 		case RTE_MBUF_F_TX_SCTP_CKSUM:
67899a2dd95SBruce Richardson 			net_hdr->csum_offset = (offsetof(struct rte_sctp_hdr,
67999a2dd95SBruce Richardson 						cksum));
68099a2dd95SBruce Richardson 			break;
68199a2dd95SBruce Richardson 		}
68299a2dd95SBruce Richardson 	} else {
68399a2dd95SBruce Richardson 		ASSIGN_UNLESS_EQUAL(net_hdr->csum_start, 0);
68499a2dd95SBruce Richardson 		ASSIGN_UNLESS_EQUAL(net_hdr->csum_offset, 0);
68599a2dd95SBruce Richardson 		ASSIGN_UNLESS_EQUAL(net_hdr->flags, 0);
68699a2dd95SBruce Richardson 	}
68799a2dd95SBruce Richardson 
68899a2dd95SBruce Richardson 	/* IP cksum verification cannot be bypassed, then calculate here */
689daa02b5cSOlivier Matz 	if (m_buf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
69099a2dd95SBruce Richardson 		struct rte_ipv4_hdr *ipv4_hdr;
69199a2dd95SBruce Richardson 
69299a2dd95SBruce Richardson 		ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct rte_ipv4_hdr *,
69399a2dd95SBruce Richardson 						   m_buf->l2_len);
69499a2dd95SBruce Richardson 		ipv4_hdr->hdr_checksum = 0;
69599a2dd95SBruce Richardson 		ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
69699a2dd95SBruce Richardson 	}
69799a2dd95SBruce Richardson 
698daa02b5cSOlivier Matz 	if (m_buf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
699daa02b5cSOlivier Matz 		if (m_buf->ol_flags & RTE_MBUF_F_TX_IPV4)
70099a2dd95SBruce Richardson 			net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
70199a2dd95SBruce Richardson 		else
70299a2dd95SBruce Richardson 			net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
70399a2dd95SBruce Richardson 		net_hdr->gso_size = m_buf->tso_segsz;
70499a2dd95SBruce Richardson 		net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len
70599a2dd95SBruce Richardson 					+ m_buf->l4_len;
706daa02b5cSOlivier Matz 	} else if (m_buf->ol_flags & RTE_MBUF_F_TX_UDP_SEG) {
70799a2dd95SBruce Richardson 		net_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
70899a2dd95SBruce Richardson 		net_hdr->gso_size = m_buf->tso_segsz;
70999a2dd95SBruce Richardson 		net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len +
71099a2dd95SBruce Richardson 			m_buf->l4_len;
71199a2dd95SBruce Richardson 	} else {
71299a2dd95SBruce Richardson 		ASSIGN_UNLESS_EQUAL(net_hdr->gso_type, 0);
71399a2dd95SBruce Richardson 		ASSIGN_UNLESS_EQUAL(net_hdr->gso_size, 0);
71499a2dd95SBruce Richardson 		ASSIGN_UNLESS_EQUAL(net_hdr->hdr_len, 0);
71599a2dd95SBruce Richardson 	}
71699a2dd95SBruce Richardson }
71799a2dd95SBruce Richardson 
71899a2dd95SBruce Richardson static __rte_always_inline int
71999a2dd95SBruce Richardson map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
72099a2dd95SBruce Richardson 		struct buf_vector *buf_vec, uint16_t *vec_idx,
72199a2dd95SBruce Richardson 		uint64_t desc_iova, uint64_t desc_len, uint8_t perm)
722bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
72399a2dd95SBruce Richardson {
72499a2dd95SBruce Richardson 	uint16_t vec_id = *vec_idx;
72599a2dd95SBruce Richardson 
72699a2dd95SBruce Richardson 	while (desc_len) {
72799a2dd95SBruce Richardson 		uint64_t desc_addr;
72899a2dd95SBruce Richardson 		uint64_t desc_chunck_len = desc_len;
72999a2dd95SBruce Richardson 
73099a2dd95SBruce Richardson 		if (unlikely(vec_id >= BUF_VECTOR_MAX))
73199a2dd95SBruce Richardson 			return -1;
73299a2dd95SBruce Richardson 
73399a2dd95SBruce Richardson 		desc_addr = vhost_iova_to_vva(dev, vq,
73499a2dd95SBruce Richardson 				desc_iova,
73599a2dd95SBruce Richardson 				&desc_chunck_len,
73699a2dd95SBruce Richardson 				perm);
73799a2dd95SBruce Richardson 		if (unlikely(!desc_addr))
73899a2dd95SBruce Richardson 			return -1;
73999a2dd95SBruce Richardson 
74099a2dd95SBruce Richardson 		rte_prefetch0((void *)(uintptr_t)desc_addr);
74199a2dd95SBruce Richardson 
74299a2dd95SBruce Richardson 		buf_vec[vec_id].buf_iova = desc_iova;
74399a2dd95SBruce Richardson 		buf_vec[vec_id].buf_addr = desc_addr;
74499a2dd95SBruce Richardson 		buf_vec[vec_id].buf_len  = desc_chunck_len;
74599a2dd95SBruce Richardson 
74699a2dd95SBruce Richardson 		desc_len -= desc_chunck_len;
74799a2dd95SBruce Richardson 		desc_iova += desc_chunck_len;
74899a2dd95SBruce Richardson 		vec_id++;
74999a2dd95SBruce Richardson 	}
75099a2dd95SBruce Richardson 	*vec_idx = vec_id;
75199a2dd95SBruce Richardson 
75299a2dd95SBruce Richardson 	return 0;
75399a2dd95SBruce Richardson }
75499a2dd95SBruce Richardson 
75599a2dd95SBruce Richardson static __rte_always_inline int
75699a2dd95SBruce Richardson fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
75799a2dd95SBruce Richardson 			 uint32_t avail_idx, uint16_t *vec_idx,
75899a2dd95SBruce Richardson 			 struct buf_vector *buf_vec, uint16_t *desc_chain_head,
75999a2dd95SBruce Richardson 			 uint32_t *desc_chain_len, uint8_t perm)
760bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
76199a2dd95SBruce Richardson {
76299a2dd95SBruce Richardson 	uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
76399a2dd95SBruce Richardson 	uint16_t vec_id = *vec_idx;
76499a2dd95SBruce Richardson 	uint32_t len    = 0;
76599a2dd95SBruce Richardson 	uint64_t dlen;
76699a2dd95SBruce Richardson 	uint32_t nr_descs = vq->size;
76799a2dd95SBruce Richardson 	uint32_t cnt    = 0;
76899a2dd95SBruce Richardson 	struct vring_desc *descs = vq->desc;
76999a2dd95SBruce Richardson 	struct vring_desc *idesc = NULL;
77099a2dd95SBruce Richardson 
77199a2dd95SBruce Richardson 	if (unlikely(idx >= vq->size))
77299a2dd95SBruce Richardson 		return -1;
77399a2dd95SBruce Richardson 
77499a2dd95SBruce Richardson 	*desc_chain_head = idx;
77599a2dd95SBruce Richardson 
77699a2dd95SBruce Richardson 	if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {
77799a2dd95SBruce Richardson 		dlen = vq->desc[idx].len;
77899a2dd95SBruce Richardson 		nr_descs = dlen / sizeof(struct vring_desc);
77999a2dd95SBruce Richardson 		if (unlikely(nr_descs > vq->size))
78099a2dd95SBruce Richardson 			return -1;
78199a2dd95SBruce Richardson 
78299a2dd95SBruce Richardson 		descs = (struct vring_desc *)(uintptr_t)
78399a2dd95SBruce Richardson 			vhost_iova_to_vva(dev, vq, vq->desc[idx].addr,
78499a2dd95SBruce Richardson 						&dlen,
78599a2dd95SBruce Richardson 						VHOST_ACCESS_RO);
78699a2dd95SBruce Richardson 		if (unlikely(!descs))
78799a2dd95SBruce Richardson 			return -1;
78899a2dd95SBruce Richardson 
78999a2dd95SBruce Richardson 		if (unlikely(dlen < vq->desc[idx].len)) {
79099a2dd95SBruce Richardson 			/*
79199a2dd95SBruce Richardson 			 * The indirect desc table is not contiguous
79299a2dd95SBruce Richardson 			 * in process VA space, we have to copy it.
79399a2dd95SBruce Richardson 			 */
79499a2dd95SBruce Richardson 			idesc = vhost_alloc_copy_ind_table(dev, vq,
79599a2dd95SBruce Richardson 					vq->desc[idx].addr, vq->desc[idx].len);
79699a2dd95SBruce Richardson 			if (unlikely(!idesc))
79799a2dd95SBruce Richardson 				return -1;
79899a2dd95SBruce Richardson 
79999a2dd95SBruce Richardson 			descs = idesc;
80099a2dd95SBruce Richardson 		}
80199a2dd95SBruce Richardson 
80299a2dd95SBruce Richardson 		idx = 0;
80399a2dd95SBruce Richardson 	}
80499a2dd95SBruce Richardson 
80599a2dd95SBruce Richardson 	while (1) {
80699a2dd95SBruce Richardson 		if (unlikely(idx >= nr_descs || cnt++ >= nr_descs)) {
80799a2dd95SBruce Richardson 			free_ind_table(idesc);
80899a2dd95SBruce Richardson 			return -1;
80999a2dd95SBruce Richardson 		}
81099a2dd95SBruce Richardson 
81199a2dd95SBruce Richardson 		dlen = descs[idx].len;
81299a2dd95SBruce Richardson 		len += dlen;
81399a2dd95SBruce Richardson 
81499a2dd95SBruce Richardson 		if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
81599a2dd95SBruce Richardson 						descs[idx].addr, dlen,
81699a2dd95SBruce Richardson 						perm))) {
81799a2dd95SBruce Richardson 			free_ind_table(idesc);
81899a2dd95SBruce Richardson 			return -1;
81999a2dd95SBruce Richardson 		}
82099a2dd95SBruce Richardson 
82199a2dd95SBruce Richardson 		if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
82299a2dd95SBruce Richardson 			break;
82399a2dd95SBruce Richardson 
82499a2dd95SBruce Richardson 		idx = descs[idx].next;
82599a2dd95SBruce Richardson 	}
82699a2dd95SBruce Richardson 
82799a2dd95SBruce Richardson 	*desc_chain_len = len;
82899a2dd95SBruce Richardson 	*vec_idx = vec_id;
82999a2dd95SBruce Richardson 
83099a2dd95SBruce Richardson 	if (unlikely(!!idesc))
83199a2dd95SBruce Richardson 		free_ind_table(idesc);
83299a2dd95SBruce Richardson 
83399a2dd95SBruce Richardson 	return 0;
83499a2dd95SBruce Richardson }
83599a2dd95SBruce Richardson 
83699a2dd95SBruce Richardson /*
83799a2dd95SBruce Richardson  * Returns -1 on fail, 0 on success
83899a2dd95SBruce Richardson  */
83999a2dd95SBruce Richardson static inline int
84099a2dd95SBruce Richardson reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
8414226aa9cSMaxime Coquelin 				uint64_t size, struct buf_vector *buf_vec,
84299a2dd95SBruce Richardson 				uint16_t *num_buffers, uint16_t avail_head,
84399a2dd95SBruce Richardson 				uint16_t *nr_vec)
844bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
84599a2dd95SBruce Richardson {
84699a2dd95SBruce Richardson 	uint16_t cur_idx;
84799a2dd95SBruce Richardson 	uint16_t vec_idx = 0;
84899a2dd95SBruce Richardson 	uint16_t max_tries, tries = 0;
84999a2dd95SBruce Richardson 
85099a2dd95SBruce Richardson 	uint16_t head_idx = 0;
85199a2dd95SBruce Richardson 	uint32_t len = 0;
85299a2dd95SBruce Richardson 
85399a2dd95SBruce Richardson 	*num_buffers = 0;
85499a2dd95SBruce Richardson 	cur_idx  = vq->last_avail_idx;
85599a2dd95SBruce Richardson 
85699a2dd95SBruce Richardson 	if (rxvq_is_mergeable(dev))
85799a2dd95SBruce Richardson 		max_tries = vq->size - 1;
85899a2dd95SBruce Richardson 	else
85999a2dd95SBruce Richardson 		max_tries = 1;
86099a2dd95SBruce Richardson 
86199a2dd95SBruce Richardson 	while (size > 0) {
86299a2dd95SBruce Richardson 		if (unlikely(cur_idx == avail_head))
86399a2dd95SBruce Richardson 			return -1;
86499a2dd95SBruce Richardson 		/*
86599a2dd95SBruce Richardson 		 * if we tried all available ring items, and still
86699a2dd95SBruce Richardson 		 * can't get enough buf, it means something abnormal
86799a2dd95SBruce Richardson 		 * happened.
86899a2dd95SBruce Richardson 		 */
86999a2dd95SBruce Richardson 		if (unlikely(++tries > max_tries))
87099a2dd95SBruce Richardson 			return -1;
87199a2dd95SBruce Richardson 
87299a2dd95SBruce Richardson 		if (unlikely(fill_vec_buf_split(dev, vq, cur_idx,
87399a2dd95SBruce Richardson 						&vec_idx, buf_vec,
87499a2dd95SBruce Richardson 						&head_idx, &len,
87599a2dd95SBruce Richardson 						VHOST_ACCESS_RW) < 0))
87699a2dd95SBruce Richardson 			return -1;
87799a2dd95SBruce Richardson 		len = RTE_MIN(len, size);
87899a2dd95SBruce Richardson 		update_shadow_used_ring_split(vq, head_idx, len);
87999a2dd95SBruce Richardson 		size -= len;
88099a2dd95SBruce Richardson 
88199a2dd95SBruce Richardson 		cur_idx++;
88299a2dd95SBruce Richardson 		*num_buffers += 1;
88399a2dd95SBruce Richardson 	}
88499a2dd95SBruce Richardson 
88599a2dd95SBruce Richardson 	*nr_vec = vec_idx;
88699a2dd95SBruce Richardson 
88799a2dd95SBruce Richardson 	return 0;
88899a2dd95SBruce Richardson }
88999a2dd95SBruce Richardson 
89099a2dd95SBruce Richardson static __rte_always_inline int
89199a2dd95SBruce Richardson fill_vec_buf_packed_indirect(struct virtio_net *dev,
89299a2dd95SBruce Richardson 			struct vhost_virtqueue *vq,
89399a2dd95SBruce Richardson 			struct vring_packed_desc *desc, uint16_t *vec_idx,
89499a2dd95SBruce Richardson 			struct buf_vector *buf_vec, uint32_t *len, uint8_t perm)
895bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
89699a2dd95SBruce Richardson {
89799a2dd95SBruce Richardson 	uint16_t i;
89899a2dd95SBruce Richardson 	uint32_t nr_descs;
89999a2dd95SBruce Richardson 	uint16_t vec_id = *vec_idx;
90099a2dd95SBruce Richardson 	uint64_t dlen;
90199a2dd95SBruce Richardson 	struct vring_packed_desc *descs, *idescs = NULL;
90299a2dd95SBruce Richardson 
90399a2dd95SBruce Richardson 	dlen = desc->len;
90499a2dd95SBruce Richardson 	descs = (struct vring_packed_desc *)(uintptr_t)
90599a2dd95SBruce Richardson 		vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO);
90699a2dd95SBruce Richardson 	if (unlikely(!descs))
90799a2dd95SBruce Richardson 		return -1;
90899a2dd95SBruce Richardson 
90999a2dd95SBruce Richardson 	if (unlikely(dlen < desc->len)) {
91099a2dd95SBruce Richardson 		/*
91199a2dd95SBruce Richardson 		 * The indirect desc table is not contiguous
91299a2dd95SBruce Richardson 		 * in process VA space, we have to copy it.
91399a2dd95SBruce Richardson 		 */
91499a2dd95SBruce Richardson 		idescs = vhost_alloc_copy_ind_table(dev,
91599a2dd95SBruce Richardson 				vq, desc->addr, desc->len);
91699a2dd95SBruce Richardson 		if (unlikely(!idescs))
91799a2dd95SBruce Richardson 			return -1;
91899a2dd95SBruce Richardson 
91999a2dd95SBruce Richardson 		descs = idescs;
92099a2dd95SBruce Richardson 	}
92199a2dd95SBruce Richardson 
92299a2dd95SBruce Richardson 	nr_descs =  desc->len / sizeof(struct vring_packed_desc);
92399a2dd95SBruce Richardson 	if (unlikely(nr_descs >= vq->size)) {
92499a2dd95SBruce Richardson 		free_ind_table(idescs);
92599a2dd95SBruce Richardson 		return -1;
92699a2dd95SBruce Richardson 	}
92799a2dd95SBruce Richardson 
92899a2dd95SBruce Richardson 	for (i = 0; i < nr_descs; i++) {
92999a2dd95SBruce Richardson 		if (unlikely(vec_id >= BUF_VECTOR_MAX)) {
93099a2dd95SBruce Richardson 			free_ind_table(idescs);
93199a2dd95SBruce Richardson 			return -1;
93299a2dd95SBruce Richardson 		}
93399a2dd95SBruce Richardson 
93499a2dd95SBruce Richardson 		dlen = descs[i].len;
93599a2dd95SBruce Richardson 		*len += dlen;
93699a2dd95SBruce Richardson 		if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
93799a2dd95SBruce Richardson 						descs[i].addr, dlen,
93899a2dd95SBruce Richardson 						perm)))
93999a2dd95SBruce Richardson 			return -1;
94099a2dd95SBruce Richardson 	}
94199a2dd95SBruce Richardson 	*vec_idx = vec_id;
94299a2dd95SBruce Richardson 
94399a2dd95SBruce Richardson 	if (unlikely(!!idescs))
94499a2dd95SBruce Richardson 		free_ind_table(idescs);
94599a2dd95SBruce Richardson 
94699a2dd95SBruce Richardson 	return 0;
94799a2dd95SBruce Richardson }
94899a2dd95SBruce Richardson 
94999a2dd95SBruce Richardson static __rte_always_inline int
95099a2dd95SBruce Richardson fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
95199a2dd95SBruce Richardson 				uint16_t avail_idx, uint16_t *desc_count,
95299a2dd95SBruce Richardson 				struct buf_vector *buf_vec, uint16_t *vec_idx,
95399a2dd95SBruce Richardson 				uint16_t *buf_id, uint32_t *len, uint8_t perm)
954bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
95599a2dd95SBruce Richardson {
95699a2dd95SBruce Richardson 	bool wrap_counter = vq->avail_wrap_counter;
95799a2dd95SBruce Richardson 	struct vring_packed_desc *descs = vq->desc_packed;
95899a2dd95SBruce Richardson 	uint16_t vec_id = *vec_idx;
95999a2dd95SBruce Richardson 	uint64_t dlen;
96099a2dd95SBruce Richardson 
96199a2dd95SBruce Richardson 	if (avail_idx < vq->last_avail_idx)
96299a2dd95SBruce Richardson 		wrap_counter ^= 1;
96399a2dd95SBruce Richardson 
96499a2dd95SBruce Richardson 	/*
96599a2dd95SBruce Richardson 	 * Perform a load-acquire barrier in desc_is_avail to
96699a2dd95SBruce Richardson 	 * enforce the ordering between desc flags and desc
96799a2dd95SBruce Richardson 	 * content.
96899a2dd95SBruce Richardson 	 */
96999a2dd95SBruce Richardson 	if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter)))
97099a2dd95SBruce Richardson 		return -1;
97199a2dd95SBruce Richardson 
97299a2dd95SBruce Richardson 	*desc_count = 0;
97399a2dd95SBruce Richardson 	*len = 0;
97499a2dd95SBruce Richardson 
97599a2dd95SBruce Richardson 	while (1) {
97699a2dd95SBruce Richardson 		if (unlikely(vec_id >= BUF_VECTOR_MAX))
97799a2dd95SBruce Richardson 			return -1;
97899a2dd95SBruce Richardson 
97999a2dd95SBruce Richardson 		if (unlikely(*desc_count >= vq->size))
98099a2dd95SBruce Richardson 			return -1;
98199a2dd95SBruce Richardson 
98299a2dd95SBruce Richardson 		*desc_count += 1;
98399a2dd95SBruce Richardson 		*buf_id = descs[avail_idx].id;
98499a2dd95SBruce Richardson 
98599a2dd95SBruce Richardson 		if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) {
98699a2dd95SBruce Richardson 			if (unlikely(fill_vec_buf_packed_indirect(dev, vq,
98799a2dd95SBruce Richardson 							&descs[avail_idx],
98899a2dd95SBruce Richardson 							&vec_id, buf_vec,
98999a2dd95SBruce Richardson 							len, perm) < 0))
99099a2dd95SBruce Richardson 				return -1;
99199a2dd95SBruce Richardson 		} else {
99299a2dd95SBruce Richardson 			dlen = descs[avail_idx].len;
99399a2dd95SBruce Richardson 			*len += dlen;
99499a2dd95SBruce Richardson 
99599a2dd95SBruce Richardson 			if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
99699a2dd95SBruce Richardson 							descs[avail_idx].addr,
99799a2dd95SBruce Richardson 							dlen,
99899a2dd95SBruce Richardson 							perm)))
99999a2dd95SBruce Richardson 				return -1;
100099a2dd95SBruce Richardson 		}
100199a2dd95SBruce Richardson 
100299a2dd95SBruce Richardson 		if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0)
100399a2dd95SBruce Richardson 			break;
100499a2dd95SBruce Richardson 
100599a2dd95SBruce Richardson 		if (++avail_idx >= vq->size) {
100699a2dd95SBruce Richardson 			avail_idx -= vq->size;
100799a2dd95SBruce Richardson 			wrap_counter ^= 1;
100899a2dd95SBruce Richardson 		}
100999a2dd95SBruce Richardson 	}
101099a2dd95SBruce Richardson 
101199a2dd95SBruce Richardson 	*vec_idx = vec_id;
101299a2dd95SBruce Richardson 
101399a2dd95SBruce Richardson 	return 0;
101499a2dd95SBruce Richardson }
101599a2dd95SBruce Richardson 
101699a2dd95SBruce Richardson static __rte_noinline void
101799a2dd95SBruce Richardson copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
101899a2dd95SBruce Richardson 		struct buf_vector *buf_vec,
101999a2dd95SBruce Richardson 		struct virtio_net_hdr_mrg_rxbuf *hdr)
1020bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
102199a2dd95SBruce Richardson {
102299a2dd95SBruce Richardson 	uint64_t len;
102399a2dd95SBruce Richardson 	uint64_t remain = dev->vhost_hlen;
102499a2dd95SBruce Richardson 	uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
102599a2dd95SBruce Richardson 	uint64_t iova = buf_vec->buf_iova;
102699a2dd95SBruce Richardson 
102799a2dd95SBruce Richardson 	while (remain) {
102899a2dd95SBruce Richardson 		len = RTE_MIN(remain,
102999a2dd95SBruce Richardson 				buf_vec->buf_len);
103099a2dd95SBruce Richardson 		dst = buf_vec->buf_addr;
103199a2dd95SBruce Richardson 		rte_memcpy((void *)(uintptr_t)dst,
103299a2dd95SBruce Richardson 				(void *)(uintptr_t)src,
103399a2dd95SBruce Richardson 				len);
103499a2dd95SBruce Richardson 
103599a2dd95SBruce Richardson 		PRINT_PACKET(dev, (uintptr_t)dst,
103699a2dd95SBruce Richardson 				(uint32_t)len, 0);
103799a2dd95SBruce Richardson 		vhost_log_cache_write_iova(dev, vq,
103899a2dd95SBruce Richardson 				iova, len);
103999a2dd95SBruce Richardson 
104099a2dd95SBruce Richardson 		remain -= len;
104199a2dd95SBruce Richardson 		iova += len;
104299a2dd95SBruce Richardson 		src += len;
104399a2dd95SBruce Richardson 		buf_vec++;
104499a2dd95SBruce Richardson 	}
104599a2dd95SBruce Richardson }
104699a2dd95SBruce Richardson 
104799a2dd95SBruce Richardson static __rte_always_inline int
104802798b07SMaxime Coquelin async_iter_initialize(struct virtio_net *dev, struct vhost_async *async)
1049dbfa4c0bSMaxime Coquelin {
105053d3f477SJiayu Hu 	struct vhost_iov_iter *iter;
1051dbfa4c0bSMaxime Coquelin 
1052dbfa4c0bSMaxime Coquelin 	if (unlikely(async->iovec_idx >= VHOST_MAX_ASYNC_VEC)) {
10530e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR, "no more async iovec available");
1054dbfa4c0bSMaxime Coquelin 		return -1;
1055dbfa4c0bSMaxime Coquelin 	}
1056dbfa4c0bSMaxime Coquelin 
1057dbfa4c0bSMaxime Coquelin 	iter = async->iov_iter + async->iter_idx;
1058dbfa4c0bSMaxime Coquelin 	iter->iov = async->iovec + async->iovec_idx;
1059dbfa4c0bSMaxime Coquelin 	iter->nr_segs = 0;
1060dbfa4c0bSMaxime Coquelin 
1061dbfa4c0bSMaxime Coquelin 	return 0;
1062dbfa4c0bSMaxime Coquelin }
1063dbfa4c0bSMaxime Coquelin 
1064dbfa4c0bSMaxime Coquelin static __rte_always_inline int
106502798b07SMaxime Coquelin async_iter_add_iovec(struct virtio_net *dev, struct vhost_async *async,
106602798b07SMaxime Coquelin 		void *src, void *dst, size_t len)
1067dbfa4c0bSMaxime Coquelin {
106853d3f477SJiayu Hu 	struct vhost_iov_iter *iter;
106953d3f477SJiayu Hu 	struct vhost_iovec *iovec;
1070dbfa4c0bSMaxime Coquelin 
1071dbfa4c0bSMaxime Coquelin 	if (unlikely(async->iovec_idx >= VHOST_MAX_ASYNC_VEC)) {
1072dbfa4c0bSMaxime Coquelin 		static bool vhost_max_async_vec_log;
1073dbfa4c0bSMaxime Coquelin 
1074dbfa4c0bSMaxime Coquelin 		if (!vhost_max_async_vec_log) {
10750e21c7c0SDavid Marchand 			VHOST_DATA_LOG(dev->ifname, ERR, "no more async iovec available");
1076dbfa4c0bSMaxime Coquelin 			vhost_max_async_vec_log = true;
1077dbfa4c0bSMaxime Coquelin 		}
1078dbfa4c0bSMaxime Coquelin 
1079dbfa4c0bSMaxime Coquelin 		return -1;
1080dbfa4c0bSMaxime Coquelin 	}
1081dbfa4c0bSMaxime Coquelin 
1082dbfa4c0bSMaxime Coquelin 	iter = async->iov_iter + async->iter_idx;
1083dbfa4c0bSMaxime Coquelin 	iovec = async->iovec + async->iovec_idx;
1084dbfa4c0bSMaxime Coquelin 
1085dbfa4c0bSMaxime Coquelin 	iovec->src_addr = src;
1086dbfa4c0bSMaxime Coquelin 	iovec->dst_addr = dst;
1087dbfa4c0bSMaxime Coquelin 	iovec->len = len;
1088dbfa4c0bSMaxime Coquelin 
1089dbfa4c0bSMaxime Coquelin 	iter->nr_segs++;
1090dbfa4c0bSMaxime Coquelin 	async->iovec_idx++;
1091dbfa4c0bSMaxime Coquelin 
1092dbfa4c0bSMaxime Coquelin 	return 0;
1093dbfa4c0bSMaxime Coquelin }
1094dbfa4c0bSMaxime Coquelin 
1095dbfa4c0bSMaxime Coquelin static __rte_always_inline void
1096dbfa4c0bSMaxime Coquelin async_iter_finalize(struct vhost_async *async)
1097dbfa4c0bSMaxime Coquelin {
1098dbfa4c0bSMaxime Coquelin 	async->iter_idx++;
1099dbfa4c0bSMaxime Coquelin }
1100dbfa4c0bSMaxime Coquelin 
1101dbfa4c0bSMaxime Coquelin static __rte_always_inline void
1102dbfa4c0bSMaxime Coquelin async_iter_cancel(struct vhost_async *async)
1103dbfa4c0bSMaxime Coquelin {
110453d3f477SJiayu Hu 	struct vhost_iov_iter *iter;
1105dbfa4c0bSMaxime Coquelin 
1106dbfa4c0bSMaxime Coquelin 	iter = async->iov_iter + async->iter_idx;
1107dbfa4c0bSMaxime Coquelin 	async->iovec_idx -= iter->nr_segs;
1108dbfa4c0bSMaxime Coquelin 	iter->nr_segs = 0;
1109dbfa4c0bSMaxime Coquelin 	iter->iov = NULL;
1110dbfa4c0bSMaxime Coquelin }
1111dbfa4c0bSMaxime Coquelin 
1112dbfa4c0bSMaxime Coquelin static __rte_always_inline void
1113dbfa4c0bSMaxime Coquelin async_iter_reset(struct vhost_async *async)
1114dbfa4c0bSMaxime Coquelin {
1115dbfa4c0bSMaxime Coquelin 	async->iter_idx = 0;
1116dbfa4c0bSMaxime Coquelin 	async->iovec_idx = 0;
1117dbfa4c0bSMaxime Coquelin }
1118dbfa4c0bSMaxime Coquelin 
1119dbfa4c0bSMaxime Coquelin static __rte_always_inline int
1120d796fee5SXuan Ding async_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq,
1121dbfa4c0bSMaxime Coquelin 		struct rte_mbuf *m, uint32_t mbuf_offset,
1122d796fee5SXuan Ding 		uint64_t buf_iova, uint32_t cpy_len, bool to_desc)
112303f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
1124bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
1125dbfa4c0bSMaxime Coquelin {
1126dbfa4c0bSMaxime Coquelin 	struct vhost_async *async = vq->async;
1127dbfa4c0bSMaxime Coquelin 	uint64_t mapped_len;
1128dbfa4c0bSMaxime Coquelin 	uint32_t buf_offset = 0;
1129d796fee5SXuan Ding 	void *src, *dst;
11302ec35974SXuan Ding 	void *host_iova;
1131dbfa4c0bSMaxime Coquelin 
1132dbfa4c0bSMaxime Coquelin 	while (cpy_len) {
11332ec35974SXuan Ding 		host_iova = (void *)(uintptr_t)gpa_to_first_hpa(dev,
1134dbfa4c0bSMaxime Coquelin 				buf_iova + buf_offset, cpy_len, &mapped_len);
11352ec35974SXuan Ding 		if (unlikely(!host_iova)) {
11360e21c7c0SDavid Marchand 			VHOST_DATA_LOG(dev->ifname, ERR,
11370e21c7c0SDavid Marchand 				"%s: failed to get host iova.",
113836c525a0SDavid Marchand 				__func__);
1139dbfa4c0bSMaxime Coquelin 			return -1;
1140dbfa4c0bSMaxime Coquelin 		}
1141dbfa4c0bSMaxime Coquelin 
1142d796fee5SXuan Ding 		if (to_desc) {
1143d796fee5SXuan Ding 			src = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset);
1144d796fee5SXuan Ding 			dst = host_iova;
1145d796fee5SXuan Ding 		} else {
1146d796fee5SXuan Ding 			src = host_iova;
1147d796fee5SXuan Ding 			dst = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset);
1148d796fee5SXuan Ding 		}
1149d796fee5SXuan Ding 
1150d796fee5SXuan Ding 		if (unlikely(async_iter_add_iovec(dev, async, src, dst, (size_t)mapped_len)))
1151dbfa4c0bSMaxime Coquelin 			return -1;
1152dbfa4c0bSMaxime Coquelin 
1153dbfa4c0bSMaxime Coquelin 		cpy_len -= (uint32_t)mapped_len;
1154dbfa4c0bSMaxime Coquelin 		mbuf_offset += (uint32_t)mapped_len;
1155dbfa4c0bSMaxime Coquelin 		buf_offset += (uint32_t)mapped_len;
1156dbfa4c0bSMaxime Coquelin 	}
1157dbfa4c0bSMaxime Coquelin 
1158dbfa4c0bSMaxime Coquelin 	return 0;
1159dbfa4c0bSMaxime Coquelin }
1160dbfa4c0bSMaxime Coquelin 
1161b84e85e3SMaxime Coquelin static __rte_always_inline void
11626d823bb3SXuan Ding sync_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq,
1163b84e85e3SMaxime Coquelin 		struct rte_mbuf *m, uint32_t mbuf_offset,
11646d823bb3SXuan Ding 		uint64_t buf_addr, uint64_t buf_iova, uint32_t cpy_len, bool to_desc)
1165bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
1166b84e85e3SMaxime Coquelin {
1167b84e85e3SMaxime Coquelin 	struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
1168b84e85e3SMaxime Coquelin 
1169b84e85e3SMaxime Coquelin 	if (likely(cpy_len > MAX_BATCH_LEN || vq->batch_copy_nb_elems >= vq->size)) {
11706d823bb3SXuan Ding 		if (to_desc) {
1171b84e85e3SMaxime Coquelin 			rte_memcpy((void *)((uintptr_t)(buf_addr)),
1172b84e85e3SMaxime Coquelin 				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
1173b84e85e3SMaxime Coquelin 				cpy_len);
1174cd79d1b0SXuan Ding 			vhost_log_cache_write_iova(dev, vq, buf_iova, cpy_len);
1175cd79d1b0SXuan Ding 			PRINT_PACKET(dev, (uintptr_t)(buf_addr), cpy_len, 0);
11766d823bb3SXuan Ding 		} else {
11776d823bb3SXuan Ding 			rte_memcpy(rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
11786d823bb3SXuan Ding 				(void *)((uintptr_t)(buf_addr)),
11796d823bb3SXuan Ding 				cpy_len);
11806d823bb3SXuan Ding 		}
1181b84e85e3SMaxime Coquelin 	} else {
11826d823bb3SXuan Ding 		if (to_desc) {
1183b84e85e3SMaxime Coquelin 			batch_copy[vq->batch_copy_nb_elems].dst =
1184b84e85e3SMaxime Coquelin 				(void *)((uintptr_t)(buf_addr));
1185b84e85e3SMaxime Coquelin 			batch_copy[vq->batch_copy_nb_elems].src =
1186b84e85e3SMaxime Coquelin 				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
1187cd79d1b0SXuan Ding 			batch_copy[vq->batch_copy_nb_elems].log_addr = buf_iova;
11886d823bb3SXuan Ding 		} else {
11896d823bb3SXuan Ding 			batch_copy[vq->batch_copy_nb_elems].dst =
11906d823bb3SXuan Ding 				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
11916d823bb3SXuan Ding 			batch_copy[vq->batch_copy_nb_elems].src =
11926d823bb3SXuan Ding 				(void *)((uintptr_t)(buf_addr));
11936d823bb3SXuan Ding 		}
119416b29775SMaxime Coquelin 		batch_copy[vq->batch_copy_nb_elems].len = cpy_len;
1195b84e85e3SMaxime Coquelin 		vq->batch_copy_nb_elems++;
1196b84e85e3SMaxime Coquelin 	}
1197b84e85e3SMaxime Coquelin }
1198b84e85e3SMaxime Coquelin 
1199dbfa4c0bSMaxime Coquelin static __rte_always_inline int
1200816a565bSMaxime Coquelin mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
120199a2dd95SBruce Richardson 		struct rte_mbuf *m, struct buf_vector *buf_vec,
1202816a565bSMaxime Coquelin 		uint16_t nr_vec, uint16_t num_buffers, bool is_async)
120303f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
1204bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
120599a2dd95SBruce Richardson {
120699a2dd95SBruce Richardson 	uint32_t vec_idx = 0;
120799a2dd95SBruce Richardson 	uint32_t mbuf_offset, mbuf_avail;
120899a2dd95SBruce Richardson 	uint32_t buf_offset, buf_avail;
120999a2dd95SBruce Richardson 	uint64_t buf_addr, buf_iova, buf_len;
121099a2dd95SBruce Richardson 	uint32_t cpy_len;
121199a2dd95SBruce Richardson 	uint64_t hdr_addr;
121299a2dd95SBruce Richardson 	struct rte_mbuf *hdr_mbuf;
121399a2dd95SBruce Richardson 	struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
1214816a565bSMaxime Coquelin 	struct vhost_async *async = vq->async;
121599a2dd95SBruce Richardson 
1216b84e85e3SMaxime Coquelin 	if (unlikely(m == NULL))
1217b84e85e3SMaxime Coquelin 		return -1;
121899a2dd95SBruce Richardson 
121999a2dd95SBruce Richardson 	buf_addr = buf_vec[vec_idx].buf_addr;
122099a2dd95SBruce Richardson 	buf_iova = buf_vec[vec_idx].buf_iova;
122199a2dd95SBruce Richardson 	buf_len = buf_vec[vec_idx].buf_len;
122299a2dd95SBruce Richardson 
1223b84e85e3SMaxime Coquelin 	if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1))
1224b84e85e3SMaxime Coquelin 		return -1;
122599a2dd95SBruce Richardson 
122699a2dd95SBruce Richardson 	hdr_mbuf = m;
122799a2dd95SBruce Richardson 	hdr_addr = buf_addr;
122899a2dd95SBruce Richardson 	if (unlikely(buf_len < dev->vhost_hlen)) {
122999a2dd95SBruce Richardson 		memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf));
123099a2dd95SBruce Richardson 		hdr = &tmp_hdr;
123199a2dd95SBruce Richardson 	} else
123299a2dd95SBruce Richardson 		hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
123399a2dd95SBruce Richardson 
12340e21c7c0SDavid Marchand 	VHOST_DATA_LOG(dev->ifname, DEBUG, "RX: num merge buffers %d", num_buffers);
123599a2dd95SBruce Richardson 
123699a2dd95SBruce Richardson 	if (unlikely(buf_len < dev->vhost_hlen)) {
123799a2dd95SBruce Richardson 		buf_offset = dev->vhost_hlen - buf_len;
123899a2dd95SBruce Richardson 		vec_idx++;
123999a2dd95SBruce Richardson 		buf_addr = buf_vec[vec_idx].buf_addr;
124099a2dd95SBruce Richardson 		buf_iova = buf_vec[vec_idx].buf_iova;
124199a2dd95SBruce Richardson 		buf_len = buf_vec[vec_idx].buf_len;
124299a2dd95SBruce Richardson 		buf_avail = buf_len - buf_offset;
124399a2dd95SBruce Richardson 	} else {
124499a2dd95SBruce Richardson 		buf_offset = dev->vhost_hlen;
124599a2dd95SBruce Richardson 		buf_avail = buf_len - dev->vhost_hlen;
124699a2dd95SBruce Richardson 	}
124799a2dd95SBruce Richardson 
124899a2dd95SBruce Richardson 	mbuf_avail  = rte_pktmbuf_data_len(m);
124999a2dd95SBruce Richardson 	mbuf_offset = 0;
1250816a565bSMaxime Coquelin 
1251816a565bSMaxime Coquelin 	if (is_async) {
125202798b07SMaxime Coquelin 		if (async_iter_initialize(dev, async))
1253816a565bSMaxime Coquelin 			return -1;
1254816a565bSMaxime Coquelin 	}
1255816a565bSMaxime Coquelin 
125699a2dd95SBruce Richardson 	while (mbuf_avail != 0 || m->next != NULL) {
125799a2dd95SBruce Richardson 		/* done with current buf, get the next one */
125899a2dd95SBruce Richardson 		if (buf_avail == 0) {
125999a2dd95SBruce Richardson 			vec_idx++;
1260b84e85e3SMaxime Coquelin 			if (unlikely(vec_idx >= nr_vec))
1261b84e85e3SMaxime Coquelin 				goto error;
126299a2dd95SBruce Richardson 
126399a2dd95SBruce Richardson 			buf_addr = buf_vec[vec_idx].buf_addr;
126499a2dd95SBruce Richardson 			buf_iova = buf_vec[vec_idx].buf_iova;
126599a2dd95SBruce Richardson 			buf_len = buf_vec[vec_idx].buf_len;
126699a2dd95SBruce Richardson 
126799a2dd95SBruce Richardson 			buf_offset = 0;
126899a2dd95SBruce Richardson 			buf_avail  = buf_len;
126999a2dd95SBruce Richardson 		}
127099a2dd95SBruce Richardson 
127199a2dd95SBruce Richardson 		/* done with current mbuf, get the next one */
127299a2dd95SBruce Richardson 		if (mbuf_avail == 0) {
127399a2dd95SBruce Richardson 			m = m->next;
127499a2dd95SBruce Richardson 
127599a2dd95SBruce Richardson 			mbuf_offset = 0;
127699a2dd95SBruce Richardson 			mbuf_avail  = rte_pktmbuf_data_len(m);
127799a2dd95SBruce Richardson 		}
127899a2dd95SBruce Richardson 
127999a2dd95SBruce Richardson 		if (hdr_addr) {
128099a2dd95SBruce Richardson 			virtio_enqueue_offload(hdr_mbuf, &hdr->hdr);
128199a2dd95SBruce Richardson 			if (rxvq_is_mergeable(dev))
128299a2dd95SBruce Richardson 				ASSIGN_UNLESS_EQUAL(hdr->num_buffers,
128399a2dd95SBruce Richardson 						num_buffers);
128499a2dd95SBruce Richardson 
128599a2dd95SBruce Richardson 			if (unlikely(hdr == &tmp_hdr)) {
128699a2dd95SBruce Richardson 				copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr);
128799a2dd95SBruce Richardson 			} else {
128899a2dd95SBruce Richardson 				PRINT_PACKET(dev, (uintptr_t)hdr_addr,
128999a2dd95SBruce Richardson 						dev->vhost_hlen, 0);
129099a2dd95SBruce Richardson 				vhost_log_cache_write_iova(dev, vq,
129199a2dd95SBruce Richardson 						buf_vec[0].buf_iova,
129299a2dd95SBruce Richardson 						dev->vhost_hlen);
129399a2dd95SBruce Richardson 			}
129499a2dd95SBruce Richardson 
129599a2dd95SBruce Richardson 			hdr_addr = 0;
129699a2dd95SBruce Richardson 		}
129799a2dd95SBruce Richardson 
129899a2dd95SBruce Richardson 		cpy_len = RTE_MIN(buf_avail, mbuf_avail);
129999a2dd95SBruce Richardson 
1300816a565bSMaxime Coquelin 		if (is_async) {
1301d796fee5SXuan Ding 			if (async_fill_seg(dev, vq, m, mbuf_offset,
1302d796fee5SXuan Ding 					   buf_iova + buf_offset, cpy_len, true) < 0)
1303816a565bSMaxime Coquelin 				goto error;
1304816a565bSMaxime Coquelin 		} else {
13056d823bb3SXuan Ding 			sync_fill_seg(dev, vq, m, mbuf_offset,
1306b84e85e3SMaxime Coquelin 				      buf_addr + buf_offset,
13076d823bb3SXuan Ding 				      buf_iova + buf_offset, cpy_len, true);
1308abeb8652SJiayu Hu 		}
130999a2dd95SBruce Richardson 
1310dbfa4c0bSMaxime Coquelin 		mbuf_avail  -= cpy_len;
1311dbfa4c0bSMaxime Coquelin 		mbuf_offset += cpy_len;
1312dbfa4c0bSMaxime Coquelin 		buf_avail  -= cpy_len;
1313dbfa4c0bSMaxime Coquelin 		buf_offset += cpy_len;
131499a2dd95SBruce Richardson 	}
131599a2dd95SBruce Richardson 
1316816a565bSMaxime Coquelin 	if (is_async)
1317d5d25cfdSMaxime Coquelin 		async_iter_finalize(async);
1318d5d25cfdSMaxime Coquelin 
1319d5d25cfdSMaxime Coquelin 	return 0;
1320d5d25cfdSMaxime Coquelin error:
1321816a565bSMaxime Coquelin 	if (is_async)
1322d5d25cfdSMaxime Coquelin 		async_iter_cancel(async);
1323d5d25cfdSMaxime Coquelin 
1324d5d25cfdSMaxime Coquelin 	return -1;
132599a2dd95SBruce Richardson }
132699a2dd95SBruce Richardson 
132799a2dd95SBruce Richardson static __rte_always_inline int
132899a2dd95SBruce Richardson vhost_enqueue_single_packed(struct virtio_net *dev,
132999a2dd95SBruce Richardson 			    struct vhost_virtqueue *vq,
133099a2dd95SBruce Richardson 			    struct rte_mbuf *pkt,
133199a2dd95SBruce Richardson 			    struct buf_vector *buf_vec,
133299a2dd95SBruce Richardson 			    uint16_t *nr_descs)
133303f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
1334bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
133599a2dd95SBruce Richardson {
133699a2dd95SBruce Richardson 	uint16_t nr_vec = 0;
133799a2dd95SBruce Richardson 	uint16_t avail_idx = vq->last_avail_idx;
133899a2dd95SBruce Richardson 	uint16_t max_tries, tries = 0;
133999a2dd95SBruce Richardson 	uint16_t buf_id = 0;
134099a2dd95SBruce Richardson 	uint32_t len = 0;
134199a2dd95SBruce Richardson 	uint16_t desc_count;
13424226aa9cSMaxime Coquelin 	uint64_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf);
134399a2dd95SBruce Richardson 	uint16_t num_buffers = 0;
134499a2dd95SBruce Richardson 	uint32_t buffer_len[vq->size];
134599a2dd95SBruce Richardson 	uint16_t buffer_buf_id[vq->size];
134699a2dd95SBruce Richardson 	uint16_t buffer_desc_count[vq->size];
134799a2dd95SBruce Richardson 
134899a2dd95SBruce Richardson 	if (rxvq_is_mergeable(dev))
134999a2dd95SBruce Richardson 		max_tries = vq->size - 1;
135099a2dd95SBruce Richardson 	else
135199a2dd95SBruce Richardson 		max_tries = 1;
135299a2dd95SBruce Richardson 
135399a2dd95SBruce Richardson 	while (size > 0) {
135499a2dd95SBruce Richardson 		/*
135599a2dd95SBruce Richardson 		 * if we tried all available ring items, and still
135699a2dd95SBruce Richardson 		 * can't get enough buf, it means something abnormal
135799a2dd95SBruce Richardson 		 * happened.
135899a2dd95SBruce Richardson 		 */
135999a2dd95SBruce Richardson 		if (unlikely(++tries > max_tries))
136099a2dd95SBruce Richardson 			return -1;
136199a2dd95SBruce Richardson 
136299a2dd95SBruce Richardson 		if (unlikely(fill_vec_buf_packed(dev, vq,
136399a2dd95SBruce Richardson 						avail_idx, &desc_count,
136499a2dd95SBruce Richardson 						buf_vec, &nr_vec,
136599a2dd95SBruce Richardson 						&buf_id, &len,
136699a2dd95SBruce Richardson 						VHOST_ACCESS_RW) < 0))
136799a2dd95SBruce Richardson 			return -1;
136899a2dd95SBruce Richardson 
136999a2dd95SBruce Richardson 		len = RTE_MIN(len, size);
137099a2dd95SBruce Richardson 		size -= len;
137199a2dd95SBruce Richardson 
137299a2dd95SBruce Richardson 		buffer_len[num_buffers] = len;
137399a2dd95SBruce Richardson 		buffer_buf_id[num_buffers] = buf_id;
137499a2dd95SBruce Richardson 		buffer_desc_count[num_buffers] = desc_count;
137599a2dd95SBruce Richardson 		num_buffers += 1;
137699a2dd95SBruce Richardson 
137799a2dd95SBruce Richardson 		*nr_descs += desc_count;
137899a2dd95SBruce Richardson 		avail_idx += desc_count;
137999a2dd95SBruce Richardson 		if (avail_idx >= vq->size)
138099a2dd95SBruce Richardson 			avail_idx -= vq->size;
138199a2dd95SBruce Richardson 	}
138299a2dd95SBruce Richardson 
1383816a565bSMaxime Coquelin 	if (mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, num_buffers, false) < 0)
138499a2dd95SBruce Richardson 		return -1;
138599a2dd95SBruce Richardson 
138699a2dd95SBruce Richardson 	vhost_shadow_enqueue_single_packed(dev, vq, buffer_len, buffer_buf_id,
138799a2dd95SBruce Richardson 					   buffer_desc_count, num_buffers);
138899a2dd95SBruce Richardson 
138999a2dd95SBruce Richardson 	return 0;
139099a2dd95SBruce Richardson }
139199a2dd95SBruce Richardson 
139299a2dd95SBruce Richardson static __rte_noinline uint32_t
139399a2dd95SBruce Richardson virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
139499a2dd95SBruce Richardson 	struct rte_mbuf **pkts, uint32_t count)
139503f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
1396bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
139799a2dd95SBruce Richardson {
139899a2dd95SBruce Richardson 	uint32_t pkt_idx = 0;
139999a2dd95SBruce Richardson 	uint16_t num_buffers;
140099a2dd95SBruce Richardson 	struct buf_vector buf_vec[BUF_VECTOR_MAX];
140199a2dd95SBruce Richardson 	uint16_t avail_head;
140299a2dd95SBruce Richardson 
140399a2dd95SBruce Richardson 	/*
140499a2dd95SBruce Richardson 	 * The ordering between avail index and
140599a2dd95SBruce Richardson 	 * desc reads needs to be enforced.
140699a2dd95SBruce Richardson 	 */
14075147b641STyler Retzlaff 	avail_head = rte_atomic_load_explicit((unsigned short __rte_atomic *)&vq->avail->idx,
14085147b641STyler Retzlaff 		rte_memory_order_acquire);
140999a2dd95SBruce Richardson 
141099a2dd95SBruce Richardson 	rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
141199a2dd95SBruce Richardson 
141299a2dd95SBruce Richardson 	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
14134226aa9cSMaxime Coquelin 		uint64_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
141499a2dd95SBruce Richardson 		uint16_t nr_vec = 0;
141599a2dd95SBruce Richardson 
141699a2dd95SBruce Richardson 		if (unlikely(reserve_avail_buf_split(dev, vq,
141799a2dd95SBruce Richardson 						pkt_len, buf_vec, &num_buffers,
141899a2dd95SBruce Richardson 						avail_head, &nr_vec) < 0)) {
14190e21c7c0SDavid Marchand 			VHOST_DATA_LOG(dev->ifname, DEBUG,
14200e21c7c0SDavid Marchand 				"failed to get enough desc from vring");
142199a2dd95SBruce Richardson 			vq->shadow_used_idx -= num_buffers;
142299a2dd95SBruce Richardson 			break;
142399a2dd95SBruce Richardson 		}
142499a2dd95SBruce Richardson 
14250e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, DEBUG,
14260e21c7c0SDavid Marchand 			"current index %d | end index %d",
142736c525a0SDavid Marchand 			vq->last_avail_idx, vq->last_avail_idx + num_buffers);
142899a2dd95SBruce Richardson 
1429816a565bSMaxime Coquelin 		if (mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec,
1430816a565bSMaxime Coquelin 					num_buffers, false) < 0) {
143199a2dd95SBruce Richardson 			vq->shadow_used_idx -= num_buffers;
143299a2dd95SBruce Richardson 			break;
143399a2dd95SBruce Richardson 		}
143499a2dd95SBruce Richardson 
143599a2dd95SBruce Richardson 		vq->last_avail_idx += num_buffers;
143615677ca2SMaxime Coquelin 		vhost_virtqueue_reconnect_log_split(vq);
143799a2dd95SBruce Richardson 	}
143899a2dd95SBruce Richardson 
143999a2dd95SBruce Richardson 	do_data_copy_enqueue(dev, vq);
144099a2dd95SBruce Richardson 
144199a2dd95SBruce Richardson 	if (likely(vq->shadow_used_idx)) {
144299a2dd95SBruce Richardson 		flush_shadow_used_ring_split(dev, vq);
144399a2dd95SBruce Richardson 		vhost_vring_call_split(dev, vq);
144499a2dd95SBruce Richardson 	}
144599a2dd95SBruce Richardson 
144699a2dd95SBruce Richardson 	return pkt_idx;
144799a2dd95SBruce Richardson }
144899a2dd95SBruce Richardson 
144999a2dd95SBruce Richardson static __rte_always_inline int
14502e3f1ab0SCheng Jiang virtio_dev_rx_sync_batch_check(struct virtio_net *dev,
145199a2dd95SBruce Richardson 			   struct vhost_virtqueue *vq,
14522e3f1ab0SCheng Jiang 			   struct rte_mbuf **pkts,
14532e3f1ab0SCheng Jiang 			   uint64_t *desc_addrs,
14542e3f1ab0SCheng Jiang 			   uint64_t *lens)
1455bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
145699a2dd95SBruce Richardson {
145799a2dd95SBruce Richardson 	bool wrap_counter = vq->avail_wrap_counter;
145899a2dd95SBruce Richardson 	struct vring_packed_desc *descs = vq->desc_packed;
145999a2dd95SBruce Richardson 	uint16_t avail_idx = vq->last_avail_idx;
146099a2dd95SBruce Richardson 	uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
146199a2dd95SBruce Richardson 	uint16_t i;
146299a2dd95SBruce Richardson 
146399a2dd95SBruce Richardson 	if (unlikely(avail_idx & PACKED_BATCH_MASK))
146499a2dd95SBruce Richardson 		return -1;
146599a2dd95SBruce Richardson 
146699a2dd95SBruce Richardson 	if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
146799a2dd95SBruce Richardson 		return -1;
146899a2dd95SBruce Richardson 
146999a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
147099a2dd95SBruce Richardson 		if (unlikely(pkts[i]->next != NULL))
147199a2dd95SBruce Richardson 			return -1;
147299a2dd95SBruce Richardson 		if (unlikely(!desc_is_avail(&descs[avail_idx + i],
147399a2dd95SBruce Richardson 					    wrap_counter)))
147499a2dd95SBruce Richardson 			return -1;
147599a2dd95SBruce Richardson 	}
147699a2dd95SBruce Richardson 
147799a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
147899a2dd95SBruce Richardson 		lens[i] = descs[avail_idx + i].len;
147999a2dd95SBruce Richardson 
148099a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
148199a2dd95SBruce Richardson 		if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset)))
148299a2dd95SBruce Richardson 			return -1;
148399a2dd95SBruce Richardson 	}
148499a2dd95SBruce Richardson 
148599a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
148699a2dd95SBruce Richardson 		desc_addrs[i] = vhost_iova_to_vva(dev, vq,
148799a2dd95SBruce Richardson 						  descs[avail_idx + i].addr,
148899a2dd95SBruce Richardson 						  &lens[i],
148999a2dd95SBruce Richardson 						  VHOST_ACCESS_RW);
149099a2dd95SBruce Richardson 
149199a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
149299a2dd95SBruce Richardson 		if (unlikely(!desc_addrs[i]))
149399a2dd95SBruce Richardson 			return -1;
149499a2dd95SBruce Richardson 		if (unlikely(lens[i] != descs[avail_idx + i].len))
149599a2dd95SBruce Richardson 			return -1;
149699a2dd95SBruce Richardson 	}
149799a2dd95SBruce Richardson 
14982e3f1ab0SCheng Jiang 	return 0;
14992e3f1ab0SCheng Jiang }
15002e3f1ab0SCheng Jiang 
1501f9ebb02bSCheng Jiang static __rte_always_inline int
1502f9ebb02bSCheng Jiang virtio_dev_rx_async_batch_check(struct vhost_virtqueue *vq,
1503f9ebb02bSCheng Jiang 			   struct rte_mbuf **pkts,
1504f9ebb02bSCheng Jiang 			   uint64_t *desc_addrs,
1505f9ebb02bSCheng Jiang 			   uint64_t *lens,
1506f9ebb02bSCheng Jiang 			   int16_t dma_id,
1507f9ebb02bSCheng Jiang 			   uint16_t vchan_id)
1508f9ebb02bSCheng Jiang {
1509f9ebb02bSCheng Jiang 	bool wrap_counter = vq->avail_wrap_counter;
1510f9ebb02bSCheng Jiang 	struct vring_packed_desc *descs = vq->desc_packed;
1511f9ebb02bSCheng Jiang 	uint16_t avail_idx = vq->last_avail_idx;
1512f9ebb02bSCheng Jiang 	uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1513f9ebb02bSCheng Jiang 	uint16_t i;
1514f9ebb02bSCheng Jiang 
1515f9ebb02bSCheng Jiang 	if (unlikely(avail_idx & PACKED_BATCH_MASK))
1516f9ebb02bSCheng Jiang 		return -1;
1517f9ebb02bSCheng Jiang 
1518f9ebb02bSCheng Jiang 	if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
1519f9ebb02bSCheng Jiang 		return -1;
1520f9ebb02bSCheng Jiang 
1521f9ebb02bSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1522f9ebb02bSCheng Jiang 		if (unlikely(pkts[i]->next != NULL))
1523f9ebb02bSCheng Jiang 			return -1;
1524f9ebb02bSCheng Jiang 		if (unlikely(!desc_is_avail(&descs[avail_idx + i],
1525f9ebb02bSCheng Jiang 					    wrap_counter)))
1526f9ebb02bSCheng Jiang 			return -1;
1527f9ebb02bSCheng Jiang 	}
1528f9ebb02bSCheng Jiang 
1529f9ebb02bSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1530f9ebb02bSCheng Jiang 		lens[i] = descs[avail_idx + i].len;
1531f9ebb02bSCheng Jiang 
1532f9ebb02bSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1533f9ebb02bSCheng Jiang 		if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset)))
1534f9ebb02bSCheng Jiang 			return -1;
1535f9ebb02bSCheng Jiang 	}
1536f9ebb02bSCheng Jiang 
1537f9ebb02bSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1538f9ebb02bSCheng Jiang 		desc_addrs[i] =  descs[avail_idx + i].addr;
1539f9ebb02bSCheng Jiang 
1540f9ebb02bSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1541f9ebb02bSCheng Jiang 		if (unlikely(!desc_addrs[i]))
1542f9ebb02bSCheng Jiang 			return -1;
1543f9ebb02bSCheng Jiang 		if (unlikely(lens[i] != descs[avail_idx + i].len))
1544f9ebb02bSCheng Jiang 			return -1;
1545f9ebb02bSCheng Jiang 	}
1546f9ebb02bSCheng Jiang 
1547f9ebb02bSCheng Jiang 	if (rte_dma_burst_capacity(dma_id, vchan_id) < PACKED_BATCH_SIZE)
1548f9ebb02bSCheng Jiang 		return -1;
1549f9ebb02bSCheng Jiang 
1550f9ebb02bSCheng Jiang 	return 0;
1551f9ebb02bSCheng Jiang }
1552f9ebb02bSCheng Jiang 
15532e3f1ab0SCheng Jiang static __rte_always_inline void
15542e3f1ab0SCheng Jiang virtio_dev_rx_batch_packed_copy(struct virtio_net *dev,
15552e3f1ab0SCheng Jiang 			   struct vhost_virtqueue *vq,
15562e3f1ab0SCheng Jiang 			   struct rte_mbuf **pkts,
15572e3f1ab0SCheng Jiang 			   uint64_t *desc_addrs,
15582e3f1ab0SCheng Jiang 			   uint64_t *lens)
1559bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
15602e3f1ab0SCheng Jiang {
15612e3f1ab0SCheng Jiang 	uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
15622e3f1ab0SCheng Jiang 	struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE];
15632e3f1ab0SCheng Jiang 	struct vring_packed_desc *descs = vq->desc_packed;
15642e3f1ab0SCheng Jiang 	uint16_t avail_idx = vq->last_avail_idx;
15652e3f1ab0SCheng Jiang 	uint16_t ids[PACKED_BATCH_SIZE];
15662e3f1ab0SCheng Jiang 	uint16_t i;
15672e3f1ab0SCheng Jiang 
156899a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
156999a2dd95SBruce Richardson 		rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
157099a2dd95SBruce Richardson 		hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *)
157199a2dd95SBruce Richardson 					(uintptr_t)desc_addrs[i];
157299a2dd95SBruce Richardson 		lens[i] = pkts[i]->pkt_len +
157399a2dd95SBruce Richardson 			sizeof(struct virtio_net_hdr_mrg_rxbuf);
157499a2dd95SBruce Richardson 	}
157599a2dd95SBruce Richardson 
1576b9c1ec85SWenwu Ma 	if (rxvq_is_mergeable(dev)) {
1577b9c1ec85SWenwu Ma 		vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1578b9c1ec85SWenwu Ma 			ASSIGN_UNLESS_EQUAL(hdrs[i]->num_buffers, 1);
1579b9c1ec85SWenwu Ma 		}
1580b9c1ec85SWenwu Ma 	}
1581b9c1ec85SWenwu Ma 
158299a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
158399a2dd95SBruce Richardson 		virtio_enqueue_offload(pkts[i], &hdrs[i]->hdr);
158499a2dd95SBruce Richardson 
158599a2dd95SBruce Richardson 	vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE);
158699a2dd95SBruce Richardson 
158799a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
158899a2dd95SBruce Richardson 		rte_memcpy((void *)(uintptr_t)(desc_addrs[i] + buf_offset),
158999a2dd95SBruce Richardson 			   rte_pktmbuf_mtod_offset(pkts[i], void *, 0),
159099a2dd95SBruce Richardson 			   pkts[i]->pkt_len);
159199a2dd95SBruce Richardson 	}
159299a2dd95SBruce Richardson 
159399a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
159499a2dd95SBruce Richardson 		vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr,
159599a2dd95SBruce Richardson 					   lens[i]);
159699a2dd95SBruce Richardson 
159799a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
159899a2dd95SBruce Richardson 		ids[i] = descs[avail_idx + i].id;
159999a2dd95SBruce Richardson 
160099a2dd95SBruce Richardson 	vhost_flush_enqueue_batch_packed(dev, vq, lens, ids);
16012e3f1ab0SCheng Jiang }
16022e3f1ab0SCheng Jiang 
16032e3f1ab0SCheng Jiang static __rte_always_inline int
16042e3f1ab0SCheng Jiang virtio_dev_rx_sync_batch_packed(struct virtio_net *dev,
16052e3f1ab0SCheng Jiang 			   struct vhost_virtqueue *vq,
16062e3f1ab0SCheng Jiang 			   struct rte_mbuf **pkts)
1607bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
16082e3f1ab0SCheng Jiang {
16092e3f1ab0SCheng Jiang 	uint64_t desc_addrs[PACKED_BATCH_SIZE];
16102e3f1ab0SCheng Jiang 	uint64_t lens[PACKED_BATCH_SIZE];
16112e3f1ab0SCheng Jiang 
16122e3f1ab0SCheng Jiang 	if (virtio_dev_rx_sync_batch_check(dev, vq, pkts, desc_addrs, lens) == -1)
16132e3f1ab0SCheng Jiang 		return -1;
16142e3f1ab0SCheng Jiang 
16152e3f1ab0SCheng Jiang 	if (vq->shadow_used_idx) {
16162e3f1ab0SCheng Jiang 		do_data_copy_enqueue(dev, vq);
16172e3f1ab0SCheng Jiang 		vhost_flush_enqueue_shadow_packed(dev, vq);
16182e3f1ab0SCheng Jiang 	}
16192e3f1ab0SCheng Jiang 
16202e3f1ab0SCheng Jiang 	virtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens);
16212e3f1ab0SCheng Jiang 
16222e3f1ab0SCheng Jiang 	return 0;
16232e3f1ab0SCheng Jiang }
16242e3f1ab0SCheng Jiang 
162599a2dd95SBruce Richardson static __rte_always_inline int16_t
162699a2dd95SBruce Richardson virtio_dev_rx_single_packed(struct virtio_net *dev,
162799a2dd95SBruce Richardson 			    struct vhost_virtqueue *vq,
162899a2dd95SBruce Richardson 			    struct rte_mbuf *pkt)
162903f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
1630bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
163199a2dd95SBruce Richardson {
163299a2dd95SBruce Richardson 	struct buf_vector buf_vec[BUF_VECTOR_MAX];
163399a2dd95SBruce Richardson 	uint16_t nr_descs = 0;
163499a2dd95SBruce Richardson 
163599a2dd95SBruce Richardson 	if (unlikely(vhost_enqueue_single_packed(dev, vq, pkt, buf_vec,
163699a2dd95SBruce Richardson 						 &nr_descs) < 0)) {
16370e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, DEBUG, "failed to get enough desc from vring");
163899a2dd95SBruce Richardson 		return -1;
163999a2dd95SBruce Richardson 	}
164099a2dd95SBruce Richardson 
16410e21c7c0SDavid Marchand 	VHOST_DATA_LOG(dev->ifname, DEBUG,
16420e21c7c0SDavid Marchand 		"current index %d | end index %d",
164336c525a0SDavid Marchand 		vq->last_avail_idx, vq->last_avail_idx + nr_descs);
164499a2dd95SBruce Richardson 
164599a2dd95SBruce Richardson 	vq_inc_last_avail_packed(vq, nr_descs);
164699a2dd95SBruce Richardson 
164799a2dd95SBruce Richardson 	return 0;
164899a2dd95SBruce Richardson }
164999a2dd95SBruce Richardson 
165099a2dd95SBruce Richardson static __rte_noinline uint32_t
165199a2dd95SBruce Richardson virtio_dev_rx_packed(struct virtio_net *dev,
165299a2dd95SBruce Richardson 		     struct vhost_virtqueue *__rte_restrict vq,
165399a2dd95SBruce Richardson 		     struct rte_mbuf **__rte_restrict pkts,
165499a2dd95SBruce Richardson 		     uint32_t count)
165503f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
1656bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
165799a2dd95SBruce Richardson {
165899a2dd95SBruce Richardson 	uint32_t pkt_idx = 0;
165999a2dd95SBruce Richardson 
166099a2dd95SBruce Richardson 	do {
166199a2dd95SBruce Richardson 		rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
166299a2dd95SBruce Richardson 
166356fa2791SBalazs Nemeth 		if (count - pkt_idx >= PACKED_BATCH_SIZE) {
16642e3f1ab0SCheng Jiang 			if (!virtio_dev_rx_sync_batch_packed(dev, vq,
166599a2dd95SBruce Richardson 							&pkts[pkt_idx])) {
166699a2dd95SBruce Richardson 				pkt_idx += PACKED_BATCH_SIZE;
166799a2dd95SBruce Richardson 				continue;
166899a2dd95SBruce Richardson 			}
166999a2dd95SBruce Richardson 		}
167099a2dd95SBruce Richardson 
167199a2dd95SBruce Richardson 		if (virtio_dev_rx_single_packed(dev, vq, pkts[pkt_idx]))
167299a2dd95SBruce Richardson 			break;
167399a2dd95SBruce Richardson 		pkt_idx++;
167499a2dd95SBruce Richardson 
167599a2dd95SBruce Richardson 	} while (pkt_idx < count);
167699a2dd95SBruce Richardson 
167799a2dd95SBruce Richardson 	if (vq->shadow_used_idx) {
167899a2dd95SBruce Richardson 		do_data_copy_enqueue(dev, vq);
167999a2dd95SBruce Richardson 		vhost_flush_enqueue_shadow_packed(dev, vq);
168099a2dd95SBruce Richardson 	}
168199a2dd95SBruce Richardson 
168299a2dd95SBruce Richardson 	if (pkt_idx)
168399a2dd95SBruce Richardson 		vhost_vring_call_packed(dev, vq);
168499a2dd95SBruce Richardson 
168599a2dd95SBruce Richardson 	return pkt_idx;
168699a2dd95SBruce Richardson }
168799a2dd95SBruce Richardson 
16889fc93a1eSDavid Marchand static void
16899fc93a1eSDavid Marchand virtio_dev_vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
16909fc93a1eSDavid Marchand {
16919fc93a1eSDavid Marchand 	rte_rwlock_write_lock(&vq->access_lock);
16929fc93a1eSDavid Marchand 	vhost_user_iotlb_rd_lock(vq);
16939fc93a1eSDavid Marchand 	if (!vq->access_ok)
16949fc93a1eSDavid Marchand 		vring_translate(dev, vq);
16959fc93a1eSDavid Marchand 	vhost_user_iotlb_rd_unlock(vq);
16969fc93a1eSDavid Marchand 	rte_rwlock_write_unlock(&vq->access_lock);
16979fc93a1eSDavid Marchand }
16989fc93a1eSDavid Marchand 
169999a2dd95SBruce Richardson static __rte_always_inline uint32_t
170057e414e3SDavid Marchand virtio_dev_rx(struct virtio_net *dev, struct vhost_virtqueue *vq,
170199a2dd95SBruce Richardson 	struct rte_mbuf **pkts, uint32_t count)
170299a2dd95SBruce Richardson {
170399a2dd95SBruce Richardson 	uint32_t nb_tx = 0;
170499a2dd95SBruce Richardson 
17050e21c7c0SDavid Marchand 	VHOST_DATA_LOG(dev->ifname, DEBUG, "%s", __func__);
170603f77d66SEelco Chaudron 	rte_rwlock_read_lock(&vq->access_lock);
170799a2dd95SBruce Richardson 
170899a2dd95SBruce Richardson 	if (unlikely(!vq->enabled))
170999a2dd95SBruce Richardson 		goto out_access_unlock;
171099a2dd95SBruce Richardson 
171199a2dd95SBruce Richardson 	vhost_user_iotlb_rd_lock(vq);
171299a2dd95SBruce Richardson 
17139fc93a1eSDavid Marchand 	if (unlikely(!vq->access_ok)) {
17149fc93a1eSDavid Marchand 		vhost_user_iotlb_rd_unlock(vq);
17159fc93a1eSDavid Marchand 		rte_rwlock_read_unlock(&vq->access_lock);
17169fc93a1eSDavid Marchand 
17179fc93a1eSDavid Marchand 		virtio_dev_vring_translate(dev, vq);
17189fc93a1eSDavid Marchand 		goto out_no_unlock;
17199fc93a1eSDavid Marchand 	}
172099a2dd95SBruce Richardson 
172199a2dd95SBruce Richardson 	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
172299a2dd95SBruce Richardson 	if (count == 0)
172399a2dd95SBruce Richardson 		goto out;
172499a2dd95SBruce Richardson 
172599a2dd95SBruce Richardson 	if (vq_is_packed(dev))
172699a2dd95SBruce Richardson 		nb_tx = virtio_dev_rx_packed(dev, vq, pkts, count);
172799a2dd95SBruce Richardson 	else
172899a2dd95SBruce Richardson 		nb_tx = virtio_dev_rx_split(dev, vq, pkts, count);
172999a2dd95SBruce Richardson 
1730be75dc99SMaxime Coquelin 	vhost_queue_stats_update(dev, vq, pkts, nb_tx);
1731be75dc99SMaxime Coquelin 
173299a2dd95SBruce Richardson out:
173399a2dd95SBruce Richardson 	vhost_user_iotlb_rd_unlock(vq);
173499a2dd95SBruce Richardson 
173599a2dd95SBruce Richardson out_access_unlock:
173603f77d66SEelco Chaudron 	rte_rwlock_read_unlock(&vq->access_lock);
173799a2dd95SBruce Richardson 
17389fc93a1eSDavid Marchand out_no_unlock:
173999a2dd95SBruce Richardson 	return nb_tx;
174099a2dd95SBruce Richardson }
174199a2dd95SBruce Richardson 
174299a2dd95SBruce Richardson uint16_t
174399a2dd95SBruce Richardson rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
174499a2dd95SBruce Richardson 	struct rte_mbuf **__rte_restrict pkts, uint16_t count)
174599a2dd95SBruce Richardson {
174699a2dd95SBruce Richardson 	struct virtio_net *dev = get_device(vid);
174799a2dd95SBruce Richardson 
174899a2dd95SBruce Richardson 	if (!dev)
174999a2dd95SBruce Richardson 		return 0;
175099a2dd95SBruce Richardson 
175199a2dd95SBruce Richardson 	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
17520e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR,
17530e21c7c0SDavid Marchand 			"%s: built-in vhost net backend is disabled.",
175436c525a0SDavid Marchand 			__func__);
175599a2dd95SBruce Richardson 		return 0;
175699a2dd95SBruce Richardson 	}
175799a2dd95SBruce Richardson 
175857e414e3SDavid Marchand 	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
17590e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR,
17600e21c7c0SDavid Marchand 			"%s: invalid virtqueue idx %d.",
176157e414e3SDavid Marchand 			__func__, queue_id);
176257e414e3SDavid Marchand 		return 0;
176357e414e3SDavid Marchand 	}
176457e414e3SDavid Marchand 
176557e414e3SDavid Marchand 	return virtio_dev_rx(dev, dev->virtqueue[queue_id], pkts, count);
176699a2dd95SBruce Richardson }
176799a2dd95SBruce Richardson 
176899a2dd95SBruce Richardson static __rte_always_inline uint16_t
1769c7598748SMaxime Coquelin async_get_first_inflight_pkt_idx(struct vhost_virtqueue *vq)
177003f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
177199a2dd95SBruce Richardson {
1772c7598748SMaxime Coquelin 	struct vhost_async *async = vq->async;
1773c7598748SMaxime Coquelin 
1774c7598748SMaxime Coquelin 	if (async->pkts_idx >= async->pkts_inflight_n)
1775c7598748SMaxime Coquelin 		return async->pkts_idx - async->pkts_inflight_n;
1776c7598748SMaxime Coquelin 	else
1777c7598748SMaxime Coquelin 		return vq->size - async->pkts_inflight_n + async->pkts_idx;
177899a2dd95SBruce Richardson }
177999a2dd95SBruce Richardson 
17803d6cb86bSCheng Jiang static __rte_always_inline void
17813d6cb86bSCheng Jiang store_dma_desc_info_split(struct vring_used_elem *s_ring, struct vring_used_elem *d_ring,
17823d6cb86bSCheng Jiang 		uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count)
17833d6cb86bSCheng Jiang {
178411a7cd8cSCheng Jiang 	size_t elem_size = sizeof(struct vring_used_elem);
17853d6cb86bSCheng Jiang 
17863d6cb86bSCheng Jiang 	if (d_idx + count <= ring_size) {
17873d6cb86bSCheng Jiang 		rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size);
17883d6cb86bSCheng Jiang 	} else {
17893d6cb86bSCheng Jiang 		uint16_t size = ring_size - d_idx;
17903d6cb86bSCheng Jiang 
17913d6cb86bSCheng Jiang 		rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size);
17923d6cb86bSCheng Jiang 		rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size);
17933d6cb86bSCheng Jiang 	}
17943d6cb86bSCheng Jiang }
17953d6cb86bSCheng Jiang 
179699a2dd95SBruce Richardson static __rte_noinline uint32_t
179753d3f477SJiayu Hu virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
179857e414e3SDavid Marchand 	struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id)
17994b02c267SDavid Marchand 	__rte_exclusive_locks_required(&vq->access_lock)
1800bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
180199a2dd95SBruce Richardson {
1802abeb8652SJiayu Hu 	struct buf_vector buf_vec[BUF_VECTOR_MAX];
1803d5d25cfdSMaxime Coquelin 	uint32_t pkt_idx = 0;
180499a2dd95SBruce Richardson 	uint16_t num_buffers;
180599a2dd95SBruce Richardson 	uint16_t avail_head;
180699a2dd95SBruce Richardson 
1807ee8024b3SMaxime Coquelin 	struct vhost_async *async = vq->async;
1808ee8024b3SMaxime Coquelin 	struct async_inflight_info *pkts_info = async->pkts_info;
1809d5d25cfdSMaxime Coquelin 	uint32_t pkt_err = 0;
181053d3f477SJiayu Hu 	uint16_t n_xfer;
1811d5d25cfdSMaxime Coquelin 	uint16_t slot_idx = 0;
181299a2dd95SBruce Richardson 
181399a2dd95SBruce Richardson 	/*
181499a2dd95SBruce Richardson 	 * The ordering between avail index and desc reads need to be enforced.
181599a2dd95SBruce Richardson 	 */
18165147b641STyler Retzlaff 	avail_head = rte_atomic_load_explicit((unsigned short __rte_atomic *)&vq->avail->idx,
18175147b641STyler Retzlaff 		rte_memory_order_acquire);
181899a2dd95SBruce Richardson 
181999a2dd95SBruce Richardson 	rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
182099a2dd95SBruce Richardson 
1821d5d25cfdSMaxime Coquelin 	async_iter_reset(async);
1822d5d25cfdSMaxime Coquelin 
182399a2dd95SBruce Richardson 	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
18244226aa9cSMaxime Coquelin 		uint64_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
182599a2dd95SBruce Richardson 		uint16_t nr_vec = 0;
182699a2dd95SBruce Richardson 
1827d5d25cfdSMaxime Coquelin 		if (unlikely(reserve_avail_buf_split(dev, vq, pkt_len, buf_vec,
1828d5d25cfdSMaxime Coquelin 						&num_buffers, avail_head, &nr_vec) < 0)) {
18290e21c7c0SDavid Marchand 			VHOST_DATA_LOG(dev->ifname, DEBUG,
18300e21c7c0SDavid Marchand 				"failed to get enough desc from vring");
183199a2dd95SBruce Richardson 			vq->shadow_used_idx -= num_buffers;
183299a2dd95SBruce Richardson 			break;
183399a2dd95SBruce Richardson 		}
183499a2dd95SBruce Richardson 
18350e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, DEBUG,
18360e21c7c0SDavid Marchand 			"current index %d | end index %d",
183736c525a0SDavid Marchand 			vq->last_avail_idx, vq->last_avail_idx + num_buffers);
183899a2dd95SBruce Richardson 
1839816a565bSMaxime Coquelin 		if (mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, num_buffers, true) < 0) {
184099a2dd95SBruce Richardson 			vq->shadow_used_idx -= num_buffers;
184199a2dd95SBruce Richardson 			break;
184299a2dd95SBruce Richardson 		}
184399a2dd95SBruce Richardson 
1844ee8024b3SMaxime Coquelin 		slot_idx = (async->pkts_idx + pkt_idx) & (vq->size - 1);
184599a2dd95SBruce Richardson 		pkts_info[slot_idx].descs = num_buffers;
184699a2dd95SBruce Richardson 		pkts_info[slot_idx].mbuf = pkts[pkt_idx];
18473d6cb86bSCheng Jiang 
184899a2dd95SBruce Richardson 		vq->last_avail_idx += num_buffers;
184915677ca2SMaxime Coquelin 		vhost_virtqueue_reconnect_log_split(vq);
18503f63c19bSCheng Jiang 	}
18513f63c19bSCheng Jiang 
1852d5d25cfdSMaxime Coquelin 	if (unlikely(pkt_idx == 0))
1853d5d25cfdSMaxime Coquelin 		return 0;
185499a2dd95SBruce Richardson 
185553d3f477SJiayu Hu 	n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx,
185653d3f477SJiayu Hu 			async->iov_iter, pkt_idx);
18573f63c19bSCheng Jiang 
1858d5d25cfdSMaxime Coquelin 	pkt_err = pkt_idx - n_xfer;
185999a2dd95SBruce Richardson 	if (unlikely(pkt_err)) {
186099a2dd95SBruce Richardson 		uint16_t num_descs = 0;
186199a2dd95SBruce Richardson 
18620e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, DEBUG,
18630e21c7c0SDavid Marchand 			"%s: failed to transfer %u packets for queue %u.",
186457e414e3SDavid Marchand 			__func__, pkt_err, vq->index);
186553d3f477SJiayu Hu 
1866abeb8652SJiayu Hu 		/* update number of completed packets */
1867d5d25cfdSMaxime Coquelin 		pkt_idx = n_xfer;
1868abeb8652SJiayu Hu 
1869abeb8652SJiayu Hu 		/* calculate the sum of descriptors to revert */
187099a2dd95SBruce Richardson 		while (pkt_err-- > 0) {
187199a2dd95SBruce Richardson 			num_descs += pkts_info[slot_idx & (vq->size - 1)].descs;
187299a2dd95SBruce Richardson 			slot_idx--;
187399a2dd95SBruce Richardson 		}
1874abeb8652SJiayu Hu 
187599a2dd95SBruce Richardson 		/* recover shadow used ring and available ring */
1876abeb8652SJiayu Hu 		vq->shadow_used_idx -= num_descs;
1877abeb8652SJiayu Hu 		vq->last_avail_idx -= num_descs;
187815677ca2SMaxime Coquelin 		vhost_virtqueue_reconnect_log_split(vq);
187999a2dd95SBruce Richardson 	}
188099a2dd95SBruce Richardson 
1881abeb8652SJiayu Hu 	/* keep used descriptors */
188299a2dd95SBruce Richardson 	if (likely(vq->shadow_used_idx)) {
1883ee8024b3SMaxime Coquelin 		uint16_t to = async->desc_idx_split & (vq->size - 1);
1884abeb8652SJiayu Hu 
1885abeb8652SJiayu Hu 		store_dma_desc_info_split(vq->shadow_used_split,
1886ee8024b3SMaxime Coquelin 				async->descs_split, vq->size, 0, to,
1887abeb8652SJiayu Hu 				vq->shadow_used_idx);
1888abeb8652SJiayu Hu 
1889ee8024b3SMaxime Coquelin 		async->desc_idx_split += vq->shadow_used_idx;
1890a3cfa808SMaxime Coquelin 
1891ee8024b3SMaxime Coquelin 		async->pkts_idx += pkt_idx;
1892a3cfa808SMaxime Coquelin 		if (async->pkts_idx >= vq->size)
1893a3cfa808SMaxime Coquelin 			async->pkts_idx -= vq->size;
1894a3cfa808SMaxime Coquelin 
1895ee8024b3SMaxime Coquelin 		async->pkts_inflight_n += pkt_idx;
1896abeb8652SJiayu Hu 		vq->shadow_used_idx = 0;
189799a2dd95SBruce Richardson 	}
189899a2dd95SBruce Richardson 
189999a2dd95SBruce Richardson 	return pkt_idx;
190099a2dd95SBruce Richardson }
190199a2dd95SBruce Richardson 
1902873e8dadSCheng Jiang 
1903873e8dadSCheng Jiang static __rte_always_inline int
1904abeb8652SJiayu Hu vhost_enqueue_async_packed(struct virtio_net *dev,
1905873e8dadSCheng Jiang 			    struct vhost_virtqueue *vq,
1906873e8dadSCheng Jiang 			    struct rte_mbuf *pkt,
1907873e8dadSCheng Jiang 			    struct buf_vector *buf_vec,
1908873e8dadSCheng Jiang 			    uint16_t *nr_descs,
1909d5d25cfdSMaxime Coquelin 			    uint16_t *nr_buffers)
19104b02c267SDavid Marchand 	__rte_exclusive_locks_required(&vq->access_lock)
1911bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
1912873e8dadSCheng Jiang {
1913873e8dadSCheng Jiang 	uint16_t nr_vec = 0;
1914873e8dadSCheng Jiang 	uint16_t avail_idx = vq->last_avail_idx;
1915873e8dadSCheng Jiang 	uint16_t max_tries, tries = 0;
1916873e8dadSCheng Jiang 	uint16_t buf_id = 0;
1917873e8dadSCheng Jiang 	uint32_t len = 0;
1918873e8dadSCheng Jiang 	uint16_t desc_count = 0;
19194226aa9cSMaxime Coquelin 	uint64_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf);
1920873e8dadSCheng Jiang 	uint32_t buffer_len[vq->size];
1921873e8dadSCheng Jiang 	uint16_t buffer_buf_id[vq->size];
1922873e8dadSCheng Jiang 	uint16_t buffer_desc_count[vq->size];
1923873e8dadSCheng Jiang 
1924873e8dadSCheng Jiang 	if (rxvq_is_mergeable(dev))
1925873e8dadSCheng Jiang 		max_tries = vq->size - 1;
1926873e8dadSCheng Jiang 	else
1927873e8dadSCheng Jiang 		max_tries = 1;
1928873e8dadSCheng Jiang 
19298db1acabSLuca Vizzarro 	do {
1930873e8dadSCheng Jiang 		/*
1931873e8dadSCheng Jiang 		 * if we tried all available ring items, and still
1932873e8dadSCheng Jiang 		 * can't get enough buf, it means something abnormal
1933873e8dadSCheng Jiang 		 * happened.
1934873e8dadSCheng Jiang 		 */
1935873e8dadSCheng Jiang 		if (unlikely(++tries > max_tries))
1936873e8dadSCheng Jiang 			return -1;
1937873e8dadSCheng Jiang 
193897064162SMaxime Coquelin 		if (unlikely(fill_vec_buf_packed(dev, vq,
193997064162SMaxime Coquelin 						avail_idx, &desc_count,
194097064162SMaxime Coquelin 						buf_vec, &nr_vec,
194197064162SMaxime Coquelin 						&buf_id, &len,
194297064162SMaxime Coquelin 						VHOST_ACCESS_RW) < 0))
1943873e8dadSCheng Jiang 			return -1;
1944873e8dadSCheng Jiang 
1945873e8dadSCheng Jiang 		len = RTE_MIN(len, size);
1946873e8dadSCheng Jiang 		size -= len;
1947873e8dadSCheng Jiang 
1948873e8dadSCheng Jiang 		buffer_len[*nr_buffers] = len;
1949873e8dadSCheng Jiang 		buffer_buf_id[*nr_buffers] = buf_id;
1950873e8dadSCheng Jiang 		buffer_desc_count[*nr_buffers] = desc_count;
1951873e8dadSCheng Jiang 		*nr_buffers += 1;
1952873e8dadSCheng Jiang 		*nr_descs += desc_count;
1953873e8dadSCheng Jiang 		avail_idx += desc_count;
1954873e8dadSCheng Jiang 		if (avail_idx >= vq->size)
1955873e8dadSCheng Jiang 			avail_idx -= vq->size;
19568db1acabSLuca Vizzarro 	} while (size > 0);
1957873e8dadSCheng Jiang 
1958816a565bSMaxime Coquelin 	if (unlikely(mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers, true) < 0))
1959873e8dadSCheng Jiang 		return -1;
1960873e8dadSCheng Jiang 
1961637711f0SCheng Jiang 	vhost_async_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id,
1962637711f0SCheng Jiang 					buffer_desc_count, *nr_buffers);
1963873e8dadSCheng Jiang 
1964873e8dadSCheng Jiang 	return 0;
1965873e8dadSCheng Jiang }
1966873e8dadSCheng Jiang 
1967873e8dadSCheng Jiang static __rte_always_inline int16_t
1968abeb8652SJiayu Hu virtio_dev_rx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
1969d5d25cfdSMaxime Coquelin 			    struct rte_mbuf *pkt, uint16_t *nr_descs, uint16_t *nr_buffers)
19704b02c267SDavid Marchand 	__rte_exclusive_locks_required(&vq->access_lock)
1971bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
1972873e8dadSCheng Jiang {
1973873e8dadSCheng Jiang 	struct buf_vector buf_vec[BUF_VECTOR_MAX];
1974873e8dadSCheng Jiang 
1975d5d25cfdSMaxime Coquelin 	if (unlikely(vhost_enqueue_async_packed(dev, vq, pkt, buf_vec,
1976d5d25cfdSMaxime Coquelin 					nr_descs, nr_buffers) < 0)) {
19770e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, DEBUG, "failed to get enough desc from vring");
1978873e8dadSCheng Jiang 		return -1;
1979873e8dadSCheng Jiang 	}
1980873e8dadSCheng Jiang 
19810e21c7c0SDavid Marchand 	VHOST_DATA_LOG(dev->ifname, DEBUG,
19820e21c7c0SDavid Marchand 		"current index %d | end index %d",
198336c525a0SDavid Marchand 		vq->last_avail_idx, vq->last_avail_idx + *nr_descs);
1984873e8dadSCheng Jiang 
1985873e8dadSCheng Jiang 	return 0;
1986873e8dadSCheng Jiang }
1987873e8dadSCheng Jiang 
1988873e8dadSCheng Jiang static __rte_always_inline void
1989f9ebb02bSCheng Jiang virtio_dev_rx_async_packed_batch_enqueue(struct virtio_net *dev,
1990f9ebb02bSCheng Jiang 			   struct vhost_virtqueue *vq,
1991f9ebb02bSCheng Jiang 			   struct rte_mbuf **pkts,
1992f9ebb02bSCheng Jiang 			   uint64_t *desc_addrs,
1993f9ebb02bSCheng Jiang 			   uint64_t *lens)
19944b02c267SDavid Marchand 	__rte_exclusive_locks_required(&vq->access_lock)
1995bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
1996f9ebb02bSCheng Jiang {
1997f9ebb02bSCheng Jiang 	uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1998f9ebb02bSCheng Jiang 	struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE];
1999f9ebb02bSCheng Jiang 	struct vring_packed_desc *descs = vq->desc_packed;
2000f9ebb02bSCheng Jiang 	struct vhost_async *async = vq->async;
2001f9ebb02bSCheng Jiang 	uint16_t avail_idx = vq->last_avail_idx;
2002f9ebb02bSCheng Jiang 	uint32_t mbuf_offset = 0;
2003f9ebb02bSCheng Jiang 	uint16_t ids[PACKED_BATCH_SIZE];
2004f9ebb02bSCheng Jiang 	uint64_t mapped_len[PACKED_BATCH_SIZE];
2005f9ebb02bSCheng Jiang 	void *host_iova[PACKED_BATCH_SIZE];
2006f9ebb02bSCheng Jiang 	uintptr_t desc;
2007f9ebb02bSCheng Jiang 	uint16_t i;
2008f9ebb02bSCheng Jiang 
2009f9ebb02bSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
2010f9ebb02bSCheng Jiang 		rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
2011f9ebb02bSCheng Jiang 		desc = vhost_iova_to_vva(dev, vq, desc_addrs[i], &lens[i], VHOST_ACCESS_RW);
2012f9ebb02bSCheng Jiang 		hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc;
2013f9ebb02bSCheng Jiang 		lens[i] = pkts[i]->pkt_len +
2014f9ebb02bSCheng Jiang 			sizeof(struct virtio_net_hdr_mrg_rxbuf);
2015f9ebb02bSCheng Jiang 	}
2016f9ebb02bSCheng Jiang 
2017f9ebb02bSCheng Jiang 	if (rxvq_is_mergeable(dev)) {
2018f9ebb02bSCheng Jiang 		vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
2019f9ebb02bSCheng Jiang 			ASSIGN_UNLESS_EQUAL(hdrs[i]->num_buffers, 1);
2020f9ebb02bSCheng Jiang 		}
2021f9ebb02bSCheng Jiang 	}
2022f9ebb02bSCheng Jiang 
2023f9ebb02bSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
2024f9ebb02bSCheng Jiang 		virtio_enqueue_offload(pkts[i], &hdrs[i]->hdr);
2025f9ebb02bSCheng Jiang 
2026f9ebb02bSCheng Jiang 	vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE);
2027f9ebb02bSCheng Jiang 
2028f9ebb02bSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
2029f9ebb02bSCheng Jiang 		host_iova[i] = (void *)(uintptr_t)gpa_to_first_hpa(dev,
2030f9ebb02bSCheng Jiang 			desc_addrs[i] + buf_offset, lens[i], &mapped_len[i]);
2031f9ebb02bSCheng Jiang 	}
2032f9ebb02bSCheng Jiang 
2033f9ebb02bSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
2034f9ebb02bSCheng Jiang 		async_iter_initialize(dev, async);
2035f9ebb02bSCheng Jiang 		async_iter_add_iovec(dev, async,
2036f9ebb02bSCheng Jiang 				(void *)(uintptr_t)rte_pktmbuf_iova_offset(pkts[i], mbuf_offset),
2037f9ebb02bSCheng Jiang 				host_iova[i],
2038f9ebb02bSCheng Jiang 				mapped_len[i]);
2039f9ebb02bSCheng Jiang 		async->iter_idx++;
2040f9ebb02bSCheng Jiang 	}
2041f9ebb02bSCheng Jiang 
2042f9ebb02bSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
2043f9ebb02bSCheng Jiang 		vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr, lens[i]);
2044f9ebb02bSCheng Jiang 
2045f9ebb02bSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
2046f9ebb02bSCheng Jiang 		ids[i] = descs[avail_idx + i].id;
2047f9ebb02bSCheng Jiang 
2048f9ebb02bSCheng Jiang 	vhost_async_shadow_enqueue_packed_batch(vq, lens, ids);
2049f9ebb02bSCheng Jiang }
2050f9ebb02bSCheng Jiang 
2051f9ebb02bSCheng Jiang static __rte_always_inline int
2052f9ebb02bSCheng Jiang virtio_dev_rx_async_packed_batch(struct virtio_net *dev,
2053f9ebb02bSCheng Jiang 			   struct vhost_virtqueue *vq,
2054f9ebb02bSCheng Jiang 			   struct rte_mbuf **pkts,
2055f9ebb02bSCheng Jiang 			   int16_t dma_id, uint16_t vchan_id)
20564b02c267SDavid Marchand 	__rte_exclusive_locks_required(&vq->access_lock)
2057bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
2058f9ebb02bSCheng Jiang {
2059f9ebb02bSCheng Jiang 	uint64_t desc_addrs[PACKED_BATCH_SIZE];
2060f9ebb02bSCheng Jiang 	uint64_t lens[PACKED_BATCH_SIZE];
2061f9ebb02bSCheng Jiang 
2062f9ebb02bSCheng Jiang 	if (virtio_dev_rx_async_batch_check(vq, pkts, desc_addrs, lens, dma_id, vchan_id) == -1)
2063f9ebb02bSCheng Jiang 		return -1;
2064f9ebb02bSCheng Jiang 
2065f9ebb02bSCheng Jiang 	virtio_dev_rx_async_packed_batch_enqueue(dev, vq, pkts, desc_addrs, lens);
2066f9ebb02bSCheng Jiang 
2067f9ebb02bSCheng Jiang 	return 0;
2068f9ebb02bSCheng Jiang }
2069f9ebb02bSCheng Jiang 
2070f9ebb02bSCheng Jiang static __rte_always_inline void
2071abeb8652SJiayu Hu dma_error_handler_packed(struct vhost_virtqueue *vq, uint16_t slot_idx,
2072abeb8652SJiayu Hu 			uint32_t nr_err, uint32_t *pkt_idx)
20734b02c267SDavid Marchand 	__rte_exclusive_locks_required(&vq->access_lock)
2074873e8dadSCheng Jiang {
2075873e8dadSCheng Jiang 	uint16_t descs_err = 0;
2076873e8dadSCheng Jiang 	uint16_t buffers_err = 0;
2077637711f0SCheng Jiang 	struct vhost_async *async = vq->async;
2078ee8024b3SMaxime Coquelin 	struct async_inflight_info *pkts_info = vq->async->pkts_info;
2079873e8dadSCheng Jiang 
2080873e8dadSCheng Jiang 	*pkt_idx -= nr_err;
2081873e8dadSCheng Jiang 	/* calculate the sum of buffers and descs of DMA-error packets. */
2082873e8dadSCheng Jiang 	while (nr_err-- > 0) {
2083873e8dadSCheng Jiang 		descs_err += pkts_info[slot_idx % vq->size].descs;
2084873e8dadSCheng Jiang 		buffers_err += pkts_info[slot_idx % vq->size].nr_buffers;
2085873e8dadSCheng Jiang 		slot_idx--;
2086873e8dadSCheng Jiang 	}
2087873e8dadSCheng Jiang 
2088873e8dadSCheng Jiang 	if (vq->last_avail_idx >= descs_err) {
2089873e8dadSCheng Jiang 		vq->last_avail_idx -= descs_err;
2090873e8dadSCheng Jiang 	} else {
2091873e8dadSCheng Jiang 		vq->last_avail_idx = vq->last_avail_idx + vq->size - descs_err;
2092873e8dadSCheng Jiang 		vq->avail_wrap_counter ^= 1;
2093873e8dadSCheng Jiang 	}
209415677ca2SMaxime Coquelin 	vhost_virtqueue_reconnect_log_packed(vq);
2095873e8dadSCheng Jiang 
2096637711f0SCheng Jiang 	if (async->buffer_idx_packed >= buffers_err)
2097637711f0SCheng Jiang 		async->buffer_idx_packed -= buffers_err;
2098637711f0SCheng Jiang 	else
2099637711f0SCheng Jiang 		async->buffer_idx_packed = async->buffer_idx_packed + vq->size - buffers_err;
2100873e8dadSCheng Jiang }
2101873e8dadSCheng Jiang 
2102873e8dadSCheng Jiang static __rte_noinline uint32_t
210353d3f477SJiayu Hu virtio_dev_rx_async_submit_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
210457e414e3SDavid Marchand 	struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id)
21054b02c267SDavid Marchand 	__rte_exclusive_locks_required(&vq->access_lock)
2106bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
2107873e8dadSCheng Jiang {
2108d5d25cfdSMaxime Coquelin 	uint32_t pkt_idx = 0;
210953d3f477SJiayu Hu 	uint16_t n_xfer;
2110873e8dadSCheng Jiang 	uint16_t num_buffers;
2111873e8dadSCheng Jiang 	uint16_t num_descs;
2112873e8dadSCheng Jiang 
2113ee8024b3SMaxime Coquelin 	struct vhost_async *async = vq->async;
2114ee8024b3SMaxime Coquelin 	struct async_inflight_info *pkts_info = async->pkts_info;
2115d5d25cfdSMaxime Coquelin 	uint32_t pkt_err = 0;
2116873e8dadSCheng Jiang 	uint16_t slot_idx = 0;
2117f9ebb02bSCheng Jiang 	uint16_t i;
2118873e8dadSCheng Jiang 
2119eb365204SCheng Jiang 	do {
2120873e8dadSCheng Jiang 		rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
2121873e8dadSCheng Jiang 
2122f9ebb02bSCheng Jiang 		if (count - pkt_idx >= PACKED_BATCH_SIZE) {
2123f9ebb02bSCheng Jiang 			if (!virtio_dev_rx_async_packed_batch(dev, vq, &pkts[pkt_idx],
2124f9ebb02bSCheng Jiang 					dma_id, vchan_id)) {
2125f9ebb02bSCheng Jiang 				for (i = 0; i < PACKED_BATCH_SIZE; i++) {
2126f9ebb02bSCheng Jiang 					slot_idx = (async->pkts_idx + pkt_idx) % vq->size;
2127f9ebb02bSCheng Jiang 					pkts_info[slot_idx].descs = 1;
2128f9ebb02bSCheng Jiang 					pkts_info[slot_idx].nr_buffers = 1;
2129f9ebb02bSCheng Jiang 					pkts_info[slot_idx].mbuf = pkts[pkt_idx];
2130f9ebb02bSCheng Jiang 					pkt_idx++;
2131f9ebb02bSCheng Jiang 				}
2132f9ebb02bSCheng Jiang 				continue;
2133f9ebb02bSCheng Jiang 			}
2134f9ebb02bSCheng Jiang 		}
2135f9ebb02bSCheng Jiang 
2136873e8dadSCheng Jiang 		num_buffers = 0;
2137873e8dadSCheng Jiang 		num_descs = 0;
2138abeb8652SJiayu Hu 		if (unlikely(virtio_dev_rx_async_packed(dev, vq, pkts[pkt_idx],
2139d5d25cfdSMaxime Coquelin 						&num_descs, &num_buffers) < 0))
2140873e8dadSCheng Jiang 			break;
2141873e8dadSCheng Jiang 
2142ee8024b3SMaxime Coquelin 		slot_idx = (async->pkts_idx + pkt_idx) % vq->size;
2143873e8dadSCheng Jiang 
2144873e8dadSCheng Jiang 		pkts_info[slot_idx].descs = num_descs;
2145873e8dadSCheng Jiang 		pkts_info[slot_idx].nr_buffers = num_buffers;
2146873e8dadSCheng Jiang 		pkts_info[slot_idx].mbuf = pkts[pkt_idx];
2147873e8dadSCheng Jiang 
2148eb365204SCheng Jiang 		pkt_idx++;
2149873e8dadSCheng Jiang 		vq_inc_last_avail_packed(vq, num_descs);
2150eb365204SCheng Jiang 	} while (pkt_idx < count);
2151873e8dadSCheng Jiang 
2152d5d25cfdSMaxime Coquelin 	if (unlikely(pkt_idx == 0))
2153d5d25cfdSMaxime Coquelin 		return 0;
2154d5d25cfdSMaxime Coquelin 
215553d3f477SJiayu Hu 	n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx,
215653d3f477SJiayu Hu 			async->iov_iter, pkt_idx);
2157d5d25cfdSMaxime Coquelin 
2158d5d25cfdSMaxime Coquelin 	async_iter_reset(async);
2159873e8dadSCheng Jiang 
216053d3f477SJiayu Hu 	pkt_err = pkt_idx - n_xfer;
216153d3f477SJiayu Hu 	if (unlikely(pkt_err)) {
21620e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, DEBUG,
21630e21c7c0SDavid Marchand 			"%s: failed to transfer %u packets for queue %u.",
216457e414e3SDavid Marchand 			__func__, pkt_err, vq->index);
2165abeb8652SJiayu Hu 		dma_error_handler_packed(vq, slot_idx, pkt_err, &pkt_idx);
216653d3f477SJiayu Hu 	}
2167873e8dadSCheng Jiang 
2168ee8024b3SMaxime Coquelin 	async->pkts_idx += pkt_idx;
2169ee8024b3SMaxime Coquelin 	if (async->pkts_idx >= vq->size)
2170ee8024b3SMaxime Coquelin 		async->pkts_idx -= vq->size;
2171abeb8652SJiayu Hu 
2172ee8024b3SMaxime Coquelin 	async->pkts_inflight_n += pkt_idx;
2173873e8dadSCheng Jiang 
2174873e8dadSCheng Jiang 	return pkt_idx;
2175873e8dadSCheng Jiang }
2176873e8dadSCheng Jiang 
2177873e8dadSCheng Jiang static __rte_always_inline void
21783d6cb86bSCheng Jiang write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs)
217903f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
21803d6cb86bSCheng Jiang {
2181ee8024b3SMaxime Coquelin 	struct vhost_async *async = vq->async;
21823d6cb86bSCheng Jiang 	uint16_t nr_left = n_descs;
21833d6cb86bSCheng Jiang 	uint16_t nr_copy;
21843d6cb86bSCheng Jiang 	uint16_t to, from;
21853d6cb86bSCheng Jiang 
21863d6cb86bSCheng Jiang 	do {
2187ee8024b3SMaxime Coquelin 		from = async->last_desc_idx_split & (vq->size - 1);
21883d6cb86bSCheng Jiang 		nr_copy = nr_left + from <= vq->size ? nr_left : vq->size - from;
21893d6cb86bSCheng Jiang 		to = vq->last_used_idx & (vq->size - 1);
21903d6cb86bSCheng Jiang 
21913d6cb86bSCheng Jiang 		if (to + nr_copy <= vq->size) {
2192ee8024b3SMaxime Coquelin 			rte_memcpy(&vq->used->ring[to], &async->descs_split[from],
21933d6cb86bSCheng Jiang 					nr_copy * sizeof(struct vring_used_elem));
21943d6cb86bSCheng Jiang 		} else {
21953d6cb86bSCheng Jiang 			uint16_t size = vq->size - to;
21963d6cb86bSCheng Jiang 
2197ee8024b3SMaxime Coquelin 			rte_memcpy(&vq->used->ring[to], &async->descs_split[from],
21983d6cb86bSCheng Jiang 					size * sizeof(struct vring_used_elem));
2199ee8024b3SMaxime Coquelin 			rte_memcpy(&vq->used->ring[0], &async->descs_split[from + size],
22003d6cb86bSCheng Jiang 					(nr_copy - size) * sizeof(struct vring_used_elem));
22013d6cb86bSCheng Jiang 		}
22023d6cb86bSCheng Jiang 
2203ee8024b3SMaxime Coquelin 		async->last_desc_idx_split += nr_copy;
22043d6cb86bSCheng Jiang 		vq->last_used_idx += nr_copy;
22053d6cb86bSCheng Jiang 		nr_left -= nr_copy;
22063d6cb86bSCheng Jiang 	} while (nr_left > 0);
22073d6cb86bSCheng Jiang }
22083d6cb86bSCheng Jiang 
2209873e8dadSCheng Jiang static __rte_always_inline void
2210873e8dadSCheng Jiang write_back_completed_descs_packed(struct vhost_virtqueue *vq,
2211873e8dadSCheng Jiang 				uint16_t n_buffers)
221203f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
2213873e8dadSCheng Jiang {
2214ee8024b3SMaxime Coquelin 	struct vhost_async *async = vq->async;
2215af4d7ad5SJiayu Hu 	uint16_t from = async->last_buffer_idx_packed;
2216af4d7ad5SJiayu Hu 	uint16_t used_idx = vq->last_used_idx;
2217af4d7ad5SJiayu Hu 	uint16_t head_idx = vq->last_used_idx;
2218af4d7ad5SJiayu Hu 	uint16_t head_flags = 0;
2219af4d7ad5SJiayu Hu 	uint16_t i;
2220873e8dadSCheng Jiang 
2221af4d7ad5SJiayu Hu 	/* Split loop in two to save memory barriers */
2222af4d7ad5SJiayu Hu 	for (i = 0; i < n_buffers; i++) {
2223af4d7ad5SJiayu Hu 		vq->desc_packed[used_idx].id = async->buffers_packed[from].id;
2224af4d7ad5SJiayu Hu 		vq->desc_packed[used_idx].len = async->buffers_packed[from].len;
2225af4d7ad5SJiayu Hu 
2226af4d7ad5SJiayu Hu 		used_idx += async->buffers_packed[from].count;
2227af4d7ad5SJiayu Hu 		if (used_idx >= vq->size)
2228af4d7ad5SJiayu Hu 			used_idx -= vq->size;
2229af4d7ad5SJiayu Hu 
2230af4d7ad5SJiayu Hu 		from++;
2231af4d7ad5SJiayu Hu 		if (from >= vq->size)
2232af4d7ad5SJiayu Hu 			from = 0;
2233873e8dadSCheng Jiang 	}
2234af4d7ad5SJiayu Hu 
2235af4d7ad5SJiayu Hu 	/* The ordering for storing desc flags needs to be enforced. */
22365147b641STyler Retzlaff 	rte_atomic_thread_fence(rte_memory_order_release);
2237af4d7ad5SJiayu Hu 
2238af4d7ad5SJiayu Hu 	from = async->last_buffer_idx_packed;
2239af4d7ad5SJiayu Hu 
2240af4d7ad5SJiayu Hu 	for (i = 0; i < n_buffers; i++) {
2241af4d7ad5SJiayu Hu 		uint16_t flags;
2242af4d7ad5SJiayu Hu 
2243af4d7ad5SJiayu Hu 		if (async->buffers_packed[from].len)
2244af4d7ad5SJiayu Hu 			flags = VRING_DESC_F_WRITE;
2245af4d7ad5SJiayu Hu 		else
2246af4d7ad5SJiayu Hu 			flags = 0;
2247af4d7ad5SJiayu Hu 
2248af4d7ad5SJiayu Hu 		if (vq->used_wrap_counter) {
2249af4d7ad5SJiayu Hu 			flags |= VRING_DESC_F_USED;
2250af4d7ad5SJiayu Hu 			flags |= VRING_DESC_F_AVAIL;
2251af4d7ad5SJiayu Hu 		} else {
2252af4d7ad5SJiayu Hu 			flags &= ~VRING_DESC_F_USED;
2253af4d7ad5SJiayu Hu 			flags &= ~VRING_DESC_F_AVAIL;
2254af4d7ad5SJiayu Hu 		}
2255af4d7ad5SJiayu Hu 
2256af4d7ad5SJiayu Hu 		if (i > 0) {
2257af4d7ad5SJiayu Hu 			vq->desc_packed[vq->last_used_idx].flags = flags;
2258af4d7ad5SJiayu Hu 		} else {
2259af4d7ad5SJiayu Hu 			head_idx = vq->last_used_idx;
2260af4d7ad5SJiayu Hu 			head_flags = flags;
2261af4d7ad5SJiayu Hu 		}
2262af4d7ad5SJiayu Hu 
2263af4d7ad5SJiayu Hu 		vq_inc_last_used_packed(vq, async->buffers_packed[from].count);
2264af4d7ad5SJiayu Hu 
2265af4d7ad5SJiayu Hu 		from++;
2266af4d7ad5SJiayu Hu 		if (from == vq->size)
2267af4d7ad5SJiayu Hu 			from = 0;
2268af4d7ad5SJiayu Hu 	}
2269af4d7ad5SJiayu Hu 
2270af4d7ad5SJiayu Hu 	vq->desc_packed[head_idx].flags = head_flags;
2271af4d7ad5SJiayu Hu 	async->last_buffer_idx_packed = from;
2272873e8dadSCheng Jiang }
2273873e8dadSCheng Jiang 
2274b737fd61SCheng Jiang static __rte_always_inline uint16_t
227557e414e3SDavid Marchand vhost_poll_enqueue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq,
227657e414e3SDavid Marchand 	struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, uint16_t vchan_id)
227703f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
227899a2dd95SBruce Richardson {
2279a3cfa808SMaxime Coquelin 	struct vhost_async *async = vq->async;
2280a3cfa808SMaxime Coquelin 	struct async_inflight_info *pkts_info = async->pkts_info;
228153d3f477SJiayu Hu 	uint16_t nr_cpl_pkts = 0;
2282a3cfa808SMaxime Coquelin 	uint16_t n_descs = 0, n_buffers = 0;
2283a3cfa808SMaxime Coquelin 	uint16_t start_idx, from, i;
228499a2dd95SBruce Richardson 
228553d3f477SJiayu Hu 	/* Check completed copies for the given DMA vChannel */
228653d3f477SJiayu Hu 	vhost_async_dma_check_completed(dev, dma_id, vchan_id, VHOST_DMA_MAX_COPY_COMPLETE);
2287a3cfa808SMaxime Coquelin 
2288c7598748SMaxime Coquelin 	start_idx = async_get_first_inflight_pkt_idx(vq);
228953d3f477SJiayu Hu 	/**
229053d3f477SJiayu Hu 	 * Calculate the number of copy completed packets.
229153d3f477SJiayu Hu 	 * Note that there may be completed packets even if
229253d3f477SJiayu Hu 	 * no copies are reported done by the given DMA vChannel,
229353d3f477SJiayu Hu 	 * as it's possible that a virtqueue uses multiple DMA
229453d3f477SJiayu Hu 	 * vChannels.
229553d3f477SJiayu Hu 	 */
229653d3f477SJiayu Hu 	from = start_idx;
229753d3f477SJiayu Hu 	while (vq->async->pkts_cmpl_flag[from] && count--) {
229853d3f477SJiayu Hu 		vq->async->pkts_cmpl_flag[from] = false;
229953d3f477SJiayu Hu 		from++;
230053d3f477SJiayu Hu 		if (from >= vq->size)
230153d3f477SJiayu Hu 			from -= vq->size;
230253d3f477SJiayu Hu 		nr_cpl_pkts++;
230353d3f477SJiayu Hu 	}
2304c7598748SMaxime Coquelin 
230553d3f477SJiayu Hu 	if (nr_cpl_pkts == 0)
230653d3f477SJiayu Hu 		return 0;
230753d3f477SJiayu Hu 
230853d3f477SJiayu Hu 	for (i = 0; i < nr_cpl_pkts; i++) {
2309a3cfa808SMaxime Coquelin 		from = (start_idx + i) % vq->size;
2310a3cfa808SMaxime Coquelin 		/* Only used with packed ring */
2311873e8dadSCheng Jiang 		n_buffers += pkts_info[from].nr_buffers;
2312a3cfa808SMaxime Coquelin 		/* Only used with split ring */
231399a2dd95SBruce Richardson 		n_descs += pkts_info[from].descs;
231499a2dd95SBruce Richardson 		pkts[i] = pkts_info[from].mbuf;
231599a2dd95SBruce Richardson 	}
2316a3cfa808SMaxime Coquelin 
231753d3f477SJiayu Hu 	async->pkts_inflight_n -= nr_cpl_pkts;
231899a2dd95SBruce Richardson 
231999a2dd95SBruce Richardson 	if (likely(vq->enabled && vq->access_ok)) {
2320873e8dadSCheng Jiang 		if (vq_is_packed(dev)) {
2321873e8dadSCheng Jiang 			write_back_completed_descs_packed(vq, n_buffers);
2322873e8dadSCheng Jiang 			vhost_vring_call_packed(dev, vq);
2323873e8dadSCheng Jiang 		} else {
23243d6cb86bSCheng Jiang 			write_back_completed_descs_split(vq, n_descs);
23255147b641STyler Retzlaff 			rte_atomic_fetch_add_explicit(
23265147b641STyler Retzlaff 				(unsigned short __rte_atomic *)&vq->used->idx,
23275147b641STyler Retzlaff 				n_descs, rte_memory_order_release);
232899a2dd95SBruce Richardson 			vhost_vring_call_split(dev, vq);
2329873e8dadSCheng Jiang 		}
2330873e8dadSCheng Jiang 	} else {
23318d2c1260SCheng Jiang 		if (vq_is_packed(dev)) {
2332ee8024b3SMaxime Coquelin 			async->last_buffer_idx_packed += n_buffers;
2333ee8024b3SMaxime Coquelin 			if (async->last_buffer_idx_packed >= vq->size)
2334ee8024b3SMaxime Coquelin 				async->last_buffer_idx_packed -= vq->size;
23358d2c1260SCheng Jiang 		} else {
2336ee8024b3SMaxime Coquelin 			async->last_desc_idx_split += n_descs;
2337873e8dadSCheng Jiang 		}
23388d2c1260SCheng Jiang 	}
233999a2dd95SBruce Richardson 
234053d3f477SJiayu Hu 	return nr_cpl_pkts;
2341b737fd61SCheng Jiang }
2342b737fd61SCheng Jiang 
2343b737fd61SCheng Jiang uint16_t
2344b737fd61SCheng Jiang rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
234553d3f477SJiayu Hu 		struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
234653d3f477SJiayu Hu 		uint16_t vchan_id)
2347b737fd61SCheng Jiang {
2348b737fd61SCheng Jiang 	struct virtio_net *dev = get_device(vid);
2349b737fd61SCheng Jiang 	struct vhost_virtqueue *vq;
2350b737fd61SCheng Jiang 	uint16_t n_pkts_cpl = 0;
2351b737fd61SCheng Jiang 
2352abeb8652SJiayu Hu 	if (unlikely(!dev))
2353b737fd61SCheng Jiang 		return 0;
2354b737fd61SCheng Jiang 
23550e21c7c0SDavid Marchand 	VHOST_DATA_LOG(dev->ifname, DEBUG, "%s", __func__);
2356b737fd61SCheng Jiang 	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
23570e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR,
23580e21c7c0SDavid Marchand 			"%s: invalid virtqueue idx %d.",
235936c525a0SDavid Marchand 			__func__, queue_id);
2360b737fd61SCheng Jiang 		return 0;
2361b737fd61SCheng Jiang 	}
2362b737fd61SCheng Jiang 
236353d3f477SJiayu Hu 	if (unlikely(!dma_copy_track[dma_id].vchans ||
236453d3f477SJiayu Hu 				!dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) {
23650e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR,
23660e21c7c0SDavid Marchand 			"%s: invalid channel %d:%u.",
236736c525a0SDavid Marchand 			__func__, dma_id, vchan_id);
2368b737fd61SCheng Jiang 		return 0;
2369b737fd61SCheng Jiang 	}
2370b737fd61SCheng Jiang 
237153d3f477SJiayu Hu 	vq = dev->virtqueue[queue_id];
2372b737fd61SCheng Jiang 
237303f77d66SEelco Chaudron 	if (rte_rwlock_read_trylock(&vq->access_lock)) {
23740e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, DEBUG,
23750e21c7c0SDavid Marchand 			"%s: virtqueue %u is busy.",
237636c525a0SDavid Marchand 			__func__, queue_id);
237753d3f477SJiayu Hu 		return 0;
237853d3f477SJiayu Hu 	}
2379b737fd61SCheng Jiang 
238053d3f477SJiayu Hu 	if (unlikely(!vq->async)) {
23810e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR,
23820e21c7c0SDavid Marchand 			"%s: async not registered for virtqueue %d.",
238336c525a0SDavid Marchand 			__func__, queue_id);
238453d3f477SJiayu Hu 		goto out;
238553d3f477SJiayu Hu 	}
238653d3f477SJiayu Hu 
238757e414e3SDavid Marchand 	n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count, dma_id, vchan_id);
238853d3f477SJiayu Hu 
2389be75dc99SMaxime Coquelin 	vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl);
239069c94e35SMaxime Coquelin 	vq->stats.inflight_completed += n_pkts_cpl;
2391be75dc99SMaxime Coquelin 
239253d3f477SJiayu Hu out:
239303f77d66SEelco Chaudron 	rte_rwlock_read_unlock(&vq->access_lock);
239499a2dd95SBruce Richardson 
2395b737fd61SCheng Jiang 	return n_pkts_cpl;
2396b737fd61SCheng Jiang }
2397b737fd61SCheng Jiang 
2398b737fd61SCheng Jiang uint16_t
2399b737fd61SCheng Jiang rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id,
240053d3f477SJiayu Hu 		struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
240153d3f477SJiayu Hu 		uint16_t vchan_id)
2402b737fd61SCheng Jiang {
2403b737fd61SCheng Jiang 	struct virtio_net *dev = get_device(vid);
2404b737fd61SCheng Jiang 	struct vhost_virtqueue *vq;
2405b737fd61SCheng Jiang 	uint16_t n_pkts_cpl = 0;
2406b737fd61SCheng Jiang 
2407b737fd61SCheng Jiang 	if (!dev)
2408b737fd61SCheng Jiang 		return 0;
2409b737fd61SCheng Jiang 
24100e21c7c0SDavid Marchand 	VHOST_DATA_LOG(dev->ifname, DEBUG, "%s", __func__);
24113753ebf0SYuan Wang 	if (unlikely(queue_id >= dev->nr_vring)) {
24120e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR, "%s: invalid virtqueue idx %d.",
241336c525a0SDavid Marchand 			__func__, queue_id);
2414b737fd61SCheng Jiang 		return 0;
2415b737fd61SCheng Jiang 	}
2416b737fd61SCheng Jiang 
24173753ebf0SYuan Wang 	if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) {
24180e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR, "%s: invalid dma id %d.",
241936c525a0SDavid Marchand 			__func__, dma_id);
24203753ebf0SYuan Wang 		return 0;
24213753ebf0SYuan Wang 	}
24223753ebf0SYuan Wang 
2423b737fd61SCheng Jiang 	vq = dev->virtqueue[queue_id];
2424b737fd61SCheng Jiang 
242590d6e52bSDavid Marchand 	vq_assert_lock(dev, vq);
24260a8363efSMaxime Coquelin 
2427ee8024b3SMaxime Coquelin 	if (unlikely(!vq->async)) {
24280e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR,
24290e21c7c0SDavid Marchand 			"%s: async not registered for virtqueue %d.",
243036c525a0SDavid Marchand 			__func__, queue_id);
2431b737fd61SCheng Jiang 		return 0;
2432b737fd61SCheng Jiang 	}
2433b737fd61SCheng Jiang 
243453d3f477SJiayu Hu 	if (unlikely(!dma_copy_track[dma_id].vchans ||
243553d3f477SJiayu Hu 				!dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) {
24360e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR,
24370e21c7c0SDavid Marchand 			"%s: invalid channel %d:%u.",
243836c525a0SDavid Marchand 			__func__, dma_id, vchan_id);
243953d3f477SJiayu Hu 		return 0;
244053d3f477SJiayu Hu 	}
244153d3f477SJiayu Hu 
24423753ebf0SYuan Wang 	if ((queue_id & 1) == 0)
244357e414e3SDavid Marchand 		n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count,
244457e414e3SDavid Marchand 			dma_id, vchan_id);
244557e414e3SDavid Marchand 	else
2446fe8477ebSCheng Jiang 		n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count,
24473753ebf0SYuan Wang 			dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS);
2448b737fd61SCheng Jiang 
244969c94e35SMaxime Coquelin 	vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl);
245069c94e35SMaxime Coquelin 	vq->stats.inflight_completed += n_pkts_cpl;
245169c94e35SMaxime Coquelin 
2452b737fd61SCheng Jiang 	return n_pkts_cpl;
245399a2dd95SBruce Richardson }
245499a2dd95SBruce Richardson 
24553753ebf0SYuan Wang uint16_t
24563753ebf0SYuan Wang rte_vhost_clear_queue(int vid, uint16_t queue_id, struct rte_mbuf **pkts,
24573753ebf0SYuan Wang 		uint16_t count, int16_t dma_id, uint16_t vchan_id)
24583753ebf0SYuan Wang {
24593753ebf0SYuan Wang 	struct virtio_net *dev = get_device(vid);
24603753ebf0SYuan Wang 	struct vhost_virtqueue *vq;
24613753ebf0SYuan Wang 	uint16_t n_pkts_cpl = 0;
24623753ebf0SYuan Wang 
24633753ebf0SYuan Wang 	if (!dev)
24643753ebf0SYuan Wang 		return 0;
24653753ebf0SYuan Wang 
24660e21c7c0SDavid Marchand 	VHOST_DATA_LOG(dev->ifname, DEBUG, "%s", __func__);
24673753ebf0SYuan Wang 	if (unlikely(queue_id >= dev->nr_vring)) {
24680e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR, "%s: invalid virtqueue idx %u.",
246936c525a0SDavid Marchand 			__func__, queue_id);
24703753ebf0SYuan Wang 		return 0;
24713753ebf0SYuan Wang 	}
24723753ebf0SYuan Wang 
24733753ebf0SYuan Wang 	if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) {
24740e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR, "%s: invalid dma id %d.",
247536c525a0SDavid Marchand 			__func__, dma_id);
24763753ebf0SYuan Wang 		return 0;
24773753ebf0SYuan Wang 	}
24783753ebf0SYuan Wang 
24793753ebf0SYuan Wang 	vq = dev->virtqueue[queue_id];
24803753ebf0SYuan Wang 
248103f77d66SEelco Chaudron 	if (rte_rwlock_read_trylock(&vq->access_lock)) {
24820e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, DEBUG, "%s: virtqueue %u is busy.",
248336c525a0SDavid Marchand 			__func__, queue_id);
24843753ebf0SYuan Wang 		return 0;
24853753ebf0SYuan Wang 	}
24863753ebf0SYuan Wang 
24873753ebf0SYuan Wang 	if (unlikely(!vq->async)) {
24880e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR, "%s: async not registered for queue id %u.",
248936c525a0SDavid Marchand 			__func__, queue_id);
24903753ebf0SYuan Wang 		goto out_access_unlock;
24913753ebf0SYuan Wang 	}
24923753ebf0SYuan Wang 
24933753ebf0SYuan Wang 	if (unlikely(!dma_copy_track[dma_id].vchans ||
24943753ebf0SYuan Wang 				!dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) {
24950e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR, "%s: invalid channel %d:%u.",
249636c525a0SDavid Marchand 			__func__, dma_id, vchan_id);
24973753ebf0SYuan Wang 		goto out_access_unlock;
24983753ebf0SYuan Wang 	}
24993753ebf0SYuan Wang 
25003753ebf0SYuan Wang 	if ((queue_id & 1) == 0)
250157e414e3SDavid Marchand 		n_pkts_cpl = vhost_poll_enqueue_completed(dev, vq, pkts, count,
250257e414e3SDavid Marchand 			dma_id, vchan_id);
250357e414e3SDavid Marchand 	else
2504fe8477ebSCheng Jiang 		n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count,
25053753ebf0SYuan Wang 			dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS);
25063753ebf0SYuan Wang 
25073753ebf0SYuan Wang 	vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl);
25083753ebf0SYuan Wang 	vq->stats.inflight_completed += n_pkts_cpl;
25093753ebf0SYuan Wang 
25103753ebf0SYuan Wang out_access_unlock:
251103f77d66SEelco Chaudron 	rte_rwlock_read_unlock(&vq->access_lock);
25123753ebf0SYuan Wang 
25133753ebf0SYuan Wang 	return n_pkts_cpl;
25143753ebf0SYuan Wang }
25153753ebf0SYuan Wang 
251699a2dd95SBruce Richardson static __rte_always_inline uint32_t
251757e414e3SDavid Marchand virtio_dev_rx_async_submit(struct virtio_net *dev, struct vhost_virtqueue *vq,
251853d3f477SJiayu Hu 	struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id)
251999a2dd95SBruce Richardson {
252099a2dd95SBruce Richardson 	uint32_t nb_tx = 0;
252199a2dd95SBruce Richardson 
25220e21c7c0SDavid Marchand 	VHOST_DATA_LOG(dev->ifname, DEBUG, "%s", __func__);
252399a2dd95SBruce Richardson 
252453d3f477SJiayu Hu 	if (unlikely(!dma_copy_track[dma_id].vchans ||
252553d3f477SJiayu Hu 				!dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) {
25260e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR,
25270e21c7c0SDavid Marchand 			"%s: invalid channel %d:%u.",
252836c525a0SDavid Marchand 			 __func__, dma_id, vchan_id);
252953d3f477SJiayu Hu 		return 0;
253053d3f477SJiayu Hu 	}
253153d3f477SJiayu Hu 
253203f77d66SEelco Chaudron 	rte_rwlock_write_lock(&vq->access_lock);
253399a2dd95SBruce Richardson 
2534ee8024b3SMaxime Coquelin 	if (unlikely(!vq->enabled || !vq->async))
253599a2dd95SBruce Richardson 		goto out_access_unlock;
253699a2dd95SBruce Richardson 
253799a2dd95SBruce Richardson 	vhost_user_iotlb_rd_lock(vq);
253899a2dd95SBruce Richardson 
25399fc93a1eSDavid Marchand 	if (unlikely(!vq->access_ok)) {
25409fc93a1eSDavid Marchand 		vhost_user_iotlb_rd_unlock(vq);
254122aa9a9cSStephen Hemminger 		rte_rwlock_write_unlock(&vq->access_lock);
25429fc93a1eSDavid Marchand 
25439fc93a1eSDavid Marchand 		virtio_dev_vring_translate(dev, vq);
25449fc93a1eSDavid Marchand 		goto out_no_unlock;
25459fc93a1eSDavid Marchand 	}
254699a2dd95SBruce Richardson 
254799a2dd95SBruce Richardson 	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
254899a2dd95SBruce Richardson 	if (count == 0)
254999a2dd95SBruce Richardson 		goto out;
255099a2dd95SBruce Richardson 
255199a2dd95SBruce Richardson 	if (vq_is_packed(dev))
255257e414e3SDavid Marchand 		nb_tx = virtio_dev_rx_async_submit_packed(dev, vq, pkts, count,
255357e414e3SDavid Marchand 			dma_id, vchan_id);
255499a2dd95SBruce Richardson 	else
255557e414e3SDavid Marchand 		nb_tx = virtio_dev_rx_async_submit_split(dev, vq, pkts, count,
255657e414e3SDavid Marchand 			dma_id, vchan_id);
255799a2dd95SBruce Richardson 
255869c94e35SMaxime Coquelin 	vq->stats.inflight_submitted += nb_tx;
255969c94e35SMaxime Coquelin 
256099a2dd95SBruce Richardson out:
256199a2dd95SBruce Richardson 	vhost_user_iotlb_rd_unlock(vq);
256299a2dd95SBruce Richardson 
256399a2dd95SBruce Richardson out_access_unlock:
256403f77d66SEelco Chaudron 	rte_rwlock_write_unlock(&vq->access_lock);
256599a2dd95SBruce Richardson 
25669fc93a1eSDavid Marchand out_no_unlock:
256799a2dd95SBruce Richardson 	return nb_tx;
256899a2dd95SBruce Richardson }
256999a2dd95SBruce Richardson 
257099a2dd95SBruce Richardson uint16_t
257199a2dd95SBruce Richardson rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id,
257253d3f477SJiayu Hu 		struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
257353d3f477SJiayu Hu 		uint16_t vchan_id)
257499a2dd95SBruce Richardson {
257599a2dd95SBruce Richardson 	struct virtio_net *dev = get_device(vid);
257699a2dd95SBruce Richardson 
257799a2dd95SBruce Richardson 	if (!dev)
257899a2dd95SBruce Richardson 		return 0;
257999a2dd95SBruce Richardson 
258099a2dd95SBruce Richardson 	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
25810e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR,
25820e21c7c0SDavid Marchand 			"%s: built-in vhost net backend is disabled.",
258336c525a0SDavid Marchand 			__func__);
258499a2dd95SBruce Richardson 		return 0;
258599a2dd95SBruce Richardson 	}
258699a2dd95SBruce Richardson 
258757e414e3SDavid Marchand 	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
25880e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR,
25890e21c7c0SDavid Marchand 			"%s: invalid virtqueue idx %d.",
259057e414e3SDavid Marchand 			__func__, queue_id);
259157e414e3SDavid Marchand 		return 0;
259257e414e3SDavid Marchand 	}
259357e414e3SDavid Marchand 
259457e414e3SDavid Marchand 	return virtio_dev_rx_async_submit(dev, dev->virtqueue[queue_id], pkts, count,
259557e414e3SDavid Marchand 		dma_id, vchan_id);
259699a2dd95SBruce Richardson }
259799a2dd95SBruce Richardson 
259899a2dd95SBruce Richardson static inline bool
259999a2dd95SBruce Richardson virtio_net_with_host_offload(struct virtio_net *dev)
260099a2dd95SBruce Richardson {
260199a2dd95SBruce Richardson 	if (dev->features &
260299a2dd95SBruce Richardson 			((1ULL << VIRTIO_NET_F_CSUM) |
260399a2dd95SBruce Richardson 			 (1ULL << VIRTIO_NET_F_HOST_ECN) |
260499a2dd95SBruce Richardson 			 (1ULL << VIRTIO_NET_F_HOST_TSO4) |
260599a2dd95SBruce Richardson 			 (1ULL << VIRTIO_NET_F_HOST_TSO6) |
260699a2dd95SBruce Richardson 			 (1ULL << VIRTIO_NET_F_HOST_UFO)))
260799a2dd95SBruce Richardson 		return true;
260899a2dd95SBruce Richardson 
260999a2dd95SBruce Richardson 	return false;
261099a2dd95SBruce Richardson }
261199a2dd95SBruce Richardson 
2612706ba486SXiao Wang static int
2613706ba486SXiao Wang parse_headers(struct rte_mbuf *m, uint8_t *l4_proto)
261499a2dd95SBruce Richardson {
261599a2dd95SBruce Richardson 	struct rte_ipv4_hdr *ipv4_hdr;
261699a2dd95SBruce Richardson 	struct rte_ipv6_hdr *ipv6_hdr;
261799a2dd95SBruce Richardson 	struct rte_ether_hdr *eth_hdr;
261899a2dd95SBruce Richardson 	uint16_t ethertype;
2619706ba486SXiao Wang 	uint16_t data_len = rte_pktmbuf_data_len(m);
2620706ba486SXiao Wang 
2621706ba486SXiao Wang 	if (data_len < sizeof(struct rte_ether_hdr))
2622706ba486SXiao Wang 		return -EINVAL;
262399a2dd95SBruce Richardson 
262499a2dd95SBruce Richardson 	eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
262599a2dd95SBruce Richardson 
262699a2dd95SBruce Richardson 	m->l2_len = sizeof(struct rte_ether_hdr);
262799a2dd95SBruce Richardson 	ethertype = rte_be_to_cpu_16(eth_hdr->ether_type);
262899a2dd95SBruce Richardson 
262999a2dd95SBruce Richardson 	if (ethertype == RTE_ETHER_TYPE_VLAN) {
2630706ba486SXiao Wang 		if (data_len < sizeof(struct rte_ether_hdr) +
2631706ba486SXiao Wang 				sizeof(struct rte_vlan_hdr))
2632706ba486SXiao Wang 			goto error;
2633706ba486SXiao Wang 
263499a2dd95SBruce Richardson 		struct rte_vlan_hdr *vlan_hdr =
263599a2dd95SBruce Richardson 			(struct rte_vlan_hdr *)(eth_hdr + 1);
263699a2dd95SBruce Richardson 
263799a2dd95SBruce Richardson 		m->l2_len += sizeof(struct rte_vlan_hdr);
263899a2dd95SBruce Richardson 		ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto);
263999a2dd95SBruce Richardson 	}
264099a2dd95SBruce Richardson 
264199a2dd95SBruce Richardson 	switch (ethertype) {
264299a2dd95SBruce Richardson 	case RTE_ETHER_TYPE_IPV4:
2643706ba486SXiao Wang 		if (data_len < m->l2_len + sizeof(struct rte_ipv4_hdr))
2644706ba486SXiao Wang 			goto error;
2645706ba486SXiao Wang 		ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
2646706ba486SXiao Wang 				m->l2_len);
264799a2dd95SBruce Richardson 		m->l3_len = rte_ipv4_hdr_len(ipv4_hdr);
2648706ba486SXiao Wang 		if (data_len < m->l2_len + m->l3_len)
2649706ba486SXiao Wang 			goto error;
2650daa02b5cSOlivier Matz 		m->ol_flags |= RTE_MBUF_F_TX_IPV4;
2651706ba486SXiao Wang 		*l4_proto = ipv4_hdr->next_proto_id;
265299a2dd95SBruce Richardson 		break;
265399a2dd95SBruce Richardson 	case RTE_ETHER_TYPE_IPV6:
2654706ba486SXiao Wang 		if (data_len < m->l2_len + sizeof(struct rte_ipv6_hdr))
2655706ba486SXiao Wang 			goto error;
2656706ba486SXiao Wang 		ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
2657706ba486SXiao Wang 				m->l2_len);
265899a2dd95SBruce Richardson 		m->l3_len = sizeof(struct rte_ipv6_hdr);
2659daa02b5cSOlivier Matz 		m->ol_flags |= RTE_MBUF_F_TX_IPV6;
2660706ba486SXiao Wang 		*l4_proto = ipv6_hdr->proto;
266199a2dd95SBruce Richardson 		break;
266299a2dd95SBruce Richardson 	default:
2663706ba486SXiao Wang 		/* a valid L3 header is needed for further L4 parsing */
2664706ba486SXiao Wang 		goto error;
266599a2dd95SBruce Richardson 	}
2666706ba486SXiao Wang 
2667706ba486SXiao Wang 	/* both CSUM and GSO need a valid L4 header */
2668706ba486SXiao Wang 	switch (*l4_proto) {
2669706ba486SXiao Wang 	case IPPROTO_TCP:
2670706ba486SXiao Wang 		if (data_len < m->l2_len + m->l3_len +
2671706ba486SXiao Wang 				sizeof(struct rte_tcp_hdr))
2672706ba486SXiao Wang 			goto error;
2673706ba486SXiao Wang 		break;
2674706ba486SXiao Wang 	case IPPROTO_UDP:
2675706ba486SXiao Wang 		if (data_len < m->l2_len + m->l3_len +
2676706ba486SXiao Wang 				sizeof(struct rte_udp_hdr))
2677706ba486SXiao Wang 			goto error;
2678706ba486SXiao Wang 		break;
2679706ba486SXiao Wang 	case IPPROTO_SCTP:
2680706ba486SXiao Wang 		if (data_len < m->l2_len + m->l3_len +
2681706ba486SXiao Wang 				sizeof(struct rte_sctp_hdr))
2682706ba486SXiao Wang 			goto error;
2683706ba486SXiao Wang 		break;
2684706ba486SXiao Wang 	default:
2685706ba486SXiao Wang 		goto error;
2686706ba486SXiao Wang 	}
2687706ba486SXiao Wang 
2688706ba486SXiao Wang 	return 0;
2689706ba486SXiao Wang 
2690706ba486SXiao Wang error:
2691706ba486SXiao Wang 	m->l2_len = 0;
2692706ba486SXiao Wang 	m->l3_len = 0;
2693706ba486SXiao Wang 	m->ol_flags = 0;
2694706ba486SXiao Wang 	return -EINVAL;
269599a2dd95SBruce Richardson }
269699a2dd95SBruce Richardson 
269799a2dd95SBruce Richardson static __rte_always_inline void
269802798b07SMaxime Coquelin vhost_dequeue_offload_legacy(struct virtio_net *dev, struct virtio_net_hdr *hdr,
269902798b07SMaxime Coquelin 		struct rte_mbuf *m)
270099a2dd95SBruce Richardson {
2701706ba486SXiao Wang 	uint8_t l4_proto = 0;
270299a2dd95SBruce Richardson 	struct rte_tcp_hdr *tcp_hdr = NULL;
2703706ba486SXiao Wang 	uint16_t tcp_len;
2704706ba486SXiao Wang 	uint16_t data_len = rte_pktmbuf_data_len(m);
270599a2dd95SBruce Richardson 
2706706ba486SXiao Wang 	if (parse_headers(m, &l4_proto) < 0)
2707706ba486SXiao Wang 		return;
2708706ba486SXiao Wang 
270999a2dd95SBruce Richardson 	if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
271099a2dd95SBruce Richardson 		if (hdr->csum_start == (m->l2_len + m->l3_len)) {
271199a2dd95SBruce Richardson 			switch (hdr->csum_offset) {
271299a2dd95SBruce Richardson 			case (offsetof(struct rte_tcp_hdr, cksum)):
2713706ba486SXiao Wang 				if (l4_proto != IPPROTO_TCP)
2714706ba486SXiao Wang 					goto error;
2715daa02b5cSOlivier Matz 				m->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM;
271699a2dd95SBruce Richardson 				break;
271799a2dd95SBruce Richardson 			case (offsetof(struct rte_udp_hdr, dgram_cksum)):
2718706ba486SXiao Wang 				if (l4_proto != IPPROTO_UDP)
2719706ba486SXiao Wang 					goto error;
2720daa02b5cSOlivier Matz 				m->ol_flags |= RTE_MBUF_F_TX_UDP_CKSUM;
272199a2dd95SBruce Richardson 				break;
272299a2dd95SBruce Richardson 			case (offsetof(struct rte_sctp_hdr, cksum)):
2723706ba486SXiao Wang 				if (l4_proto != IPPROTO_SCTP)
2724706ba486SXiao Wang 					goto error;
2725daa02b5cSOlivier Matz 				m->ol_flags |= RTE_MBUF_F_TX_SCTP_CKSUM;
272699a2dd95SBruce Richardson 				break;
272799a2dd95SBruce Richardson 			default:
2728706ba486SXiao Wang 				goto error;
272999a2dd95SBruce Richardson 			}
2730706ba486SXiao Wang 		} else {
2731706ba486SXiao Wang 			goto error;
273299a2dd95SBruce Richardson 		}
273399a2dd95SBruce Richardson 	}
273499a2dd95SBruce Richardson 
2735706ba486SXiao Wang 	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
27367023f3e5SYunjian Wang 		if (hdr->gso_size == 0)
27377023f3e5SYunjian Wang 			goto error;
27387023f3e5SYunjian Wang 
273999a2dd95SBruce Richardson 		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
274099a2dd95SBruce Richardson 		case VIRTIO_NET_HDR_GSO_TCPV4:
274199a2dd95SBruce Richardson 		case VIRTIO_NET_HDR_GSO_TCPV6:
2742706ba486SXiao Wang 			if (l4_proto != IPPROTO_TCP)
2743706ba486SXiao Wang 				goto error;
2744706ba486SXiao Wang 			tcp_hdr = rte_pktmbuf_mtod_offset(m,
2745706ba486SXiao Wang 					struct rte_tcp_hdr *,
2746706ba486SXiao Wang 					m->l2_len + m->l3_len);
2747706ba486SXiao Wang 			tcp_len = (tcp_hdr->data_off & 0xf0) >> 2;
2748706ba486SXiao Wang 			if (data_len < m->l2_len + m->l3_len + tcp_len)
2749706ba486SXiao Wang 				goto error;
2750daa02b5cSOlivier Matz 			m->ol_flags |= RTE_MBUF_F_TX_TCP_SEG;
275199a2dd95SBruce Richardson 			m->tso_segsz = hdr->gso_size;
2752706ba486SXiao Wang 			m->l4_len = tcp_len;
275399a2dd95SBruce Richardson 			break;
275499a2dd95SBruce Richardson 		case VIRTIO_NET_HDR_GSO_UDP:
2755706ba486SXiao Wang 			if (l4_proto != IPPROTO_UDP)
2756706ba486SXiao Wang 				goto error;
2757daa02b5cSOlivier Matz 			m->ol_flags |= RTE_MBUF_F_TX_UDP_SEG;
275899a2dd95SBruce Richardson 			m->tso_segsz = hdr->gso_size;
275999a2dd95SBruce Richardson 			m->l4_len = sizeof(struct rte_udp_hdr);
276099a2dd95SBruce Richardson 			break;
276199a2dd95SBruce Richardson 		default:
27620e21c7c0SDavid Marchand 			VHOST_DATA_LOG(dev->ifname, WARNING,
27630e21c7c0SDavid Marchand 				"unsupported gso type %u.",
276436c525a0SDavid Marchand 				hdr->gso_type);
2765706ba486SXiao Wang 			goto error;
276699a2dd95SBruce Richardson 		}
276799a2dd95SBruce Richardson 	}
2768706ba486SXiao Wang 	return;
2769706ba486SXiao Wang 
2770706ba486SXiao Wang error:
2771706ba486SXiao Wang 	m->l2_len = 0;
2772706ba486SXiao Wang 	m->l3_len = 0;
2773706ba486SXiao Wang 	m->ol_flags = 0;
277499a2dd95SBruce Richardson }
277599a2dd95SBruce Richardson 
2776ca7036b4SDavid Marchand static __rte_always_inline void
277702798b07SMaxime Coquelin vhost_dequeue_offload(struct virtio_net *dev, struct virtio_net_hdr *hdr,
277802798b07SMaxime Coquelin 		struct rte_mbuf *m, bool legacy_ol_flags)
2779ca7036b4SDavid Marchand {
2780ca7036b4SDavid Marchand 	struct rte_net_hdr_lens hdr_lens;
2781ca7036b4SDavid Marchand 	int l4_supported = 0;
2782ca7036b4SDavid Marchand 	uint32_t ptype;
2783ca7036b4SDavid Marchand 
2784ca7036b4SDavid Marchand 	if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
2785ca7036b4SDavid Marchand 		return;
2786ca7036b4SDavid Marchand 
2787ca7036b4SDavid Marchand 	if (legacy_ol_flags) {
278802798b07SMaxime Coquelin 		vhost_dequeue_offload_legacy(dev, hdr, m);
2789ca7036b4SDavid Marchand 		return;
2790ca7036b4SDavid Marchand 	}
2791ca7036b4SDavid Marchand 
2792daa02b5cSOlivier Matz 	m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN;
2793ca7036b4SDavid Marchand 
2794ca7036b4SDavid Marchand 	ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
2795ca7036b4SDavid Marchand 	m->packet_type = ptype;
2796ca7036b4SDavid Marchand 	if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
2797ca7036b4SDavid Marchand 	    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
2798ca7036b4SDavid Marchand 	    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
2799ca7036b4SDavid Marchand 		l4_supported = 1;
2800ca7036b4SDavid Marchand 
2801ca7036b4SDavid Marchand 	/* According to Virtio 1.1 spec, the device only needs to look at
2802ca7036b4SDavid Marchand 	 * VIRTIO_NET_HDR_F_NEEDS_CSUM in the packet transmission path.
2803ca7036b4SDavid Marchand 	 * This differs from the processing incoming packets path where the
2804ca7036b4SDavid Marchand 	 * driver could rely on VIRTIO_NET_HDR_F_DATA_VALID flag set by the
2805ca7036b4SDavid Marchand 	 * device.
2806ca7036b4SDavid Marchand 	 *
2807ca7036b4SDavid Marchand 	 * 5.1.6.2.1 Driver Requirements: Packet Transmission
2808ca7036b4SDavid Marchand 	 * The driver MUST NOT set the VIRTIO_NET_HDR_F_DATA_VALID and
2809ca7036b4SDavid Marchand 	 * VIRTIO_NET_HDR_F_RSC_INFO bits in flags.
2810ca7036b4SDavid Marchand 	 *
2811ca7036b4SDavid Marchand 	 * 5.1.6.2.2 Device Requirements: Packet Transmission
2812ca7036b4SDavid Marchand 	 * The device MUST ignore flag bits that it does not recognize.
2813ca7036b4SDavid Marchand 	 */
2814ca7036b4SDavid Marchand 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
2815ca7036b4SDavid Marchand 		uint32_t hdrlen;
2816ca7036b4SDavid Marchand 
2817ca7036b4SDavid Marchand 		hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
2818ca7036b4SDavid Marchand 		if (hdr->csum_start <= hdrlen && l4_supported != 0) {
2819daa02b5cSOlivier Matz 			m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_NONE;
2820ca7036b4SDavid Marchand 		} else {
2821ca7036b4SDavid Marchand 			/* Unknown proto or tunnel, do sw cksum. We can assume
2822ca7036b4SDavid Marchand 			 * the cksum field is in the first segment since the
2823ca7036b4SDavid Marchand 			 * buffers we provided to the host are large enough.
2824ca7036b4SDavid Marchand 			 * In case of SCTP, this will be wrong since it's a CRC
2825ca7036b4SDavid Marchand 			 * but there's nothing we can do.
2826ca7036b4SDavid Marchand 			 */
2827ca7036b4SDavid Marchand 			uint16_t csum = 0, off;
2828ca7036b4SDavid Marchand 
28294dc4e33fSOlivier Matz 			if (hdr->csum_start >= rte_pktmbuf_pkt_len(m))
28304dc4e33fSOlivier Matz 				return;
28314dc4e33fSOlivier Matz 
2832ca7036b4SDavid Marchand 			if (rte_raw_cksum_mbuf(m, hdr->csum_start,
2833ca7036b4SDavid Marchand 					rte_pktmbuf_pkt_len(m) - hdr->csum_start, &csum) < 0)
2834ca7036b4SDavid Marchand 				return;
2835ca7036b4SDavid Marchand 			if (likely(csum != 0xffff))
2836ca7036b4SDavid Marchand 				csum = ~csum;
2837ca7036b4SDavid Marchand 			off = hdr->csum_offset + hdr->csum_start;
2838ca7036b4SDavid Marchand 			if (rte_pktmbuf_data_len(m) >= off + 1)
2839ca7036b4SDavid Marchand 				*rte_pktmbuf_mtod_offset(m, uint16_t *, off) = csum;
2840ca7036b4SDavid Marchand 		}
2841ca7036b4SDavid Marchand 	}
2842ca7036b4SDavid Marchand 
2843ca7036b4SDavid Marchand 	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
2844ca7036b4SDavid Marchand 		if (hdr->gso_size == 0)
2845ca7036b4SDavid Marchand 			return;
2846ca7036b4SDavid Marchand 
2847ca7036b4SDavid Marchand 		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2848ca7036b4SDavid Marchand 		case VIRTIO_NET_HDR_GSO_TCPV4:
2849ca7036b4SDavid Marchand 		case VIRTIO_NET_HDR_GSO_TCPV6:
2850ca7036b4SDavid Marchand 			if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_TCP)
2851ca7036b4SDavid Marchand 				break;
2852daa02b5cSOlivier Matz 			m->ol_flags |= RTE_MBUF_F_RX_LRO | RTE_MBUF_F_RX_L4_CKSUM_NONE;
2853ca7036b4SDavid Marchand 			m->tso_segsz = hdr->gso_size;
2854ca7036b4SDavid Marchand 			break;
2855ca7036b4SDavid Marchand 		case VIRTIO_NET_HDR_GSO_UDP:
2856ca7036b4SDavid Marchand 			if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_UDP)
2857ca7036b4SDavid Marchand 				break;
2858daa02b5cSOlivier Matz 			m->ol_flags |= RTE_MBUF_F_RX_LRO | RTE_MBUF_F_RX_L4_CKSUM_NONE;
2859ca7036b4SDavid Marchand 			m->tso_segsz = hdr->gso_size;
2860ca7036b4SDavid Marchand 			break;
2861ca7036b4SDavid Marchand 		default:
2862ca7036b4SDavid Marchand 			break;
2863ca7036b4SDavid Marchand 		}
2864ca7036b4SDavid Marchand 	}
2865ca7036b4SDavid Marchand }
2866ca7036b4SDavid Marchand 
286799a2dd95SBruce Richardson static __rte_noinline void
286899a2dd95SBruce Richardson copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr,
286999a2dd95SBruce Richardson 		struct buf_vector *buf_vec)
287099a2dd95SBruce Richardson {
287199a2dd95SBruce Richardson 	uint64_t len;
287299a2dd95SBruce Richardson 	uint64_t remain = sizeof(struct virtio_net_hdr);
287399a2dd95SBruce Richardson 	uint64_t src;
287499a2dd95SBruce Richardson 	uint64_t dst = (uint64_t)(uintptr_t)hdr;
287599a2dd95SBruce Richardson 
287699a2dd95SBruce Richardson 	while (remain) {
287799a2dd95SBruce Richardson 		len = RTE_MIN(remain, buf_vec->buf_len);
287899a2dd95SBruce Richardson 		src = buf_vec->buf_addr;
287999a2dd95SBruce Richardson 		rte_memcpy((void *)(uintptr_t)dst,
288099a2dd95SBruce Richardson 				(void *)(uintptr_t)src, len);
288199a2dd95SBruce Richardson 
288299a2dd95SBruce Richardson 		remain -= len;
288399a2dd95SBruce Richardson 		dst += len;
288499a2dd95SBruce Richardson 		buf_vec++;
288599a2dd95SBruce Richardson 	}
288699a2dd95SBruce Richardson }
288799a2dd95SBruce Richardson 
288899a2dd95SBruce Richardson static __rte_always_inline int
2889844e113aSXuan Ding desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
289099a2dd95SBruce Richardson 		  struct buf_vector *buf_vec, uint16_t nr_vec,
2891ca7036b4SDavid Marchand 		  struct rte_mbuf *m, struct rte_mempool *mbuf_pool,
2892844e113aSXuan Ding 		  bool legacy_ol_flags, uint16_t slot_idx, bool is_async)
289303f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
2894bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
289599a2dd95SBruce Richardson {
28966d823bb3SXuan Ding 	uint32_t buf_avail, buf_offset, buf_len;
28976d823bb3SXuan Ding 	uint64_t buf_addr, buf_iova;
289899a2dd95SBruce Richardson 	uint32_t mbuf_avail, mbuf_offset;
2899dc1516e2SMaxime Coquelin 	uint32_t hdr_remain = dev->vhost_hlen;
290099a2dd95SBruce Richardson 	uint32_t cpy_len;
290199a2dd95SBruce Richardson 	struct rte_mbuf *cur = m, *prev = m;
290299a2dd95SBruce Richardson 	struct virtio_net_hdr tmp_hdr;
290399a2dd95SBruce Richardson 	struct virtio_net_hdr *hdr = NULL;
2904dc1516e2SMaxime Coquelin 	uint16_t vec_idx;
2905844e113aSXuan Ding 	struct vhost_async *async = vq->async;
2906844e113aSXuan Ding 	struct async_inflight_info *pkts_info;
290799a2dd95SBruce Richardson 
290871bd0cc5SMaxime Coquelin 	/*
290971bd0cc5SMaxime Coquelin 	 * The caller has checked the descriptors chain is larger than the
291071bd0cc5SMaxime Coquelin 	 * header size.
291171bd0cc5SMaxime Coquelin 	 */
291299a2dd95SBruce Richardson 
291399a2dd95SBruce Richardson 	if (virtio_net_with_host_offload(dev)) {
2914dc1516e2SMaxime Coquelin 		if (unlikely(buf_vec[0].buf_len < sizeof(struct virtio_net_hdr))) {
291599a2dd95SBruce Richardson 			/*
291699a2dd95SBruce Richardson 			 * No luck, the virtio-net header doesn't fit
291799a2dd95SBruce Richardson 			 * in a contiguous virtual area.
291899a2dd95SBruce Richardson 			 */
291999a2dd95SBruce Richardson 			copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
292099a2dd95SBruce Richardson 			hdr = &tmp_hdr;
292199a2dd95SBruce Richardson 		} else {
2922dc1516e2SMaxime Coquelin 			hdr = (struct virtio_net_hdr *)((uintptr_t)buf_vec[0].buf_addr);
292399a2dd95SBruce Richardson 		}
292499a2dd95SBruce Richardson 	}
292599a2dd95SBruce Richardson 
2926dc1516e2SMaxime Coquelin 	for (vec_idx = 0; vec_idx < nr_vec; vec_idx++) {
2927dc1516e2SMaxime Coquelin 		if (buf_vec[vec_idx].buf_len > hdr_remain)
2928dc1516e2SMaxime Coquelin 			break;
292999a2dd95SBruce Richardson 
2930dc1516e2SMaxime Coquelin 		hdr_remain -= buf_vec[vec_idx].buf_len;
293199a2dd95SBruce Richardson 	}
293299a2dd95SBruce Richardson 
2933dc1516e2SMaxime Coquelin 	buf_addr = buf_vec[vec_idx].buf_addr;
2934dc1516e2SMaxime Coquelin 	buf_iova = buf_vec[vec_idx].buf_iova;
2935dc1516e2SMaxime Coquelin 	buf_len = buf_vec[vec_idx].buf_len;
2936dc1516e2SMaxime Coquelin 	buf_offset = hdr_remain;
2937dc1516e2SMaxime Coquelin 	buf_avail = buf_vec[vec_idx].buf_len - hdr_remain;
2938dc1516e2SMaxime Coquelin 
293999a2dd95SBruce Richardson 	PRINT_PACKET(dev,
294099a2dd95SBruce Richardson 			(uintptr_t)(buf_addr + buf_offset),
294199a2dd95SBruce Richardson 			(uint32_t)buf_avail, 0);
294299a2dd95SBruce Richardson 
294399a2dd95SBruce Richardson 	mbuf_offset = 0;
294499a2dd95SBruce Richardson 	mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
2945844e113aSXuan Ding 
2946844e113aSXuan Ding 	if (is_async) {
2947844e113aSXuan Ding 		pkts_info = async->pkts_info;
2948844e113aSXuan Ding 		if (async_iter_initialize(dev, async))
2949844e113aSXuan Ding 			return -1;
2950844e113aSXuan Ding 	}
2951844e113aSXuan Ding 
295299a2dd95SBruce Richardson 	while (1) {
295399a2dd95SBruce Richardson 		cpy_len = RTE_MIN(buf_avail, mbuf_avail);
295499a2dd95SBruce Richardson 
2955844e113aSXuan Ding 		if (is_async) {
2956844e113aSXuan Ding 			if (async_fill_seg(dev, vq, cur, mbuf_offset,
2957844e113aSXuan Ding 					   buf_iova + buf_offset, cpy_len, false) < 0)
2958844e113aSXuan Ding 				goto error;
2959cd79d1b0SXuan Ding 		} else if (likely(hdr && cur == m)) {
2960b6eee3e8SXuan Ding 			rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset),
2961b6eee3e8SXuan Ding 				(void *)((uintptr_t)(buf_addr + buf_offset)),
2962b6eee3e8SXuan Ding 				cpy_len);
2963b6eee3e8SXuan Ding 		} else {
29646d823bb3SXuan Ding 			sync_fill_seg(dev, vq, cur, mbuf_offset,
29656d823bb3SXuan Ding 				      buf_addr + buf_offset,
29666d823bb3SXuan Ding 				      buf_iova + buf_offset, cpy_len, false);
2967844e113aSXuan Ding 		}
296899a2dd95SBruce Richardson 
296999a2dd95SBruce Richardson 		mbuf_avail  -= cpy_len;
297099a2dd95SBruce Richardson 		mbuf_offset += cpy_len;
297199a2dd95SBruce Richardson 		buf_avail -= cpy_len;
297299a2dd95SBruce Richardson 		buf_offset += cpy_len;
297399a2dd95SBruce Richardson 
297499a2dd95SBruce Richardson 		/* This buf reaches to its end, get the next one */
297599a2dd95SBruce Richardson 		if (buf_avail == 0) {
297699a2dd95SBruce Richardson 			if (++vec_idx >= nr_vec)
297799a2dd95SBruce Richardson 				break;
297899a2dd95SBruce Richardson 
297999a2dd95SBruce Richardson 			buf_addr = buf_vec[vec_idx].buf_addr;
29806d823bb3SXuan Ding 			buf_iova = buf_vec[vec_idx].buf_iova;
298199a2dd95SBruce Richardson 			buf_len = buf_vec[vec_idx].buf_len;
298299a2dd95SBruce Richardson 
298399a2dd95SBruce Richardson 			buf_offset = 0;
298499a2dd95SBruce Richardson 			buf_avail  = buf_len;
298599a2dd95SBruce Richardson 
298699a2dd95SBruce Richardson 			PRINT_PACKET(dev, (uintptr_t)buf_addr,
298799a2dd95SBruce Richardson 					(uint32_t)buf_avail, 0);
298899a2dd95SBruce Richardson 		}
298999a2dd95SBruce Richardson 
299099a2dd95SBruce Richardson 		/*
299199a2dd95SBruce Richardson 		 * This mbuf reaches to its end, get a new one
299299a2dd95SBruce Richardson 		 * to hold more data.
299399a2dd95SBruce Richardson 		 */
299499a2dd95SBruce Richardson 		if (mbuf_avail == 0) {
299599a2dd95SBruce Richardson 			cur = rte_pktmbuf_alloc(mbuf_pool);
299699a2dd95SBruce Richardson 			if (unlikely(cur == NULL)) {
2997458dc624SMaxime Coquelin 				vq->stats.mbuf_alloc_failed++;
29980e21c7c0SDavid Marchand 				VHOST_DATA_LOG(dev->ifname, ERR,
29990e21c7c0SDavid Marchand 					"failed to allocate memory for mbuf.");
30006d823bb3SXuan Ding 				goto error;
300199a2dd95SBruce Richardson 			}
300299a2dd95SBruce Richardson 
300399a2dd95SBruce Richardson 			prev->next = cur;
300499a2dd95SBruce Richardson 			prev->data_len = mbuf_offset;
300599a2dd95SBruce Richardson 			m->nb_segs += 1;
300699a2dd95SBruce Richardson 			m->pkt_len += mbuf_offset;
300799a2dd95SBruce Richardson 			prev = cur;
300899a2dd95SBruce Richardson 
300999a2dd95SBruce Richardson 			mbuf_offset = 0;
301099a2dd95SBruce Richardson 			mbuf_avail  = cur->buf_len - RTE_PKTMBUF_HEADROOM;
301199a2dd95SBruce Richardson 		}
301299a2dd95SBruce Richardson 	}
301399a2dd95SBruce Richardson 
301499a2dd95SBruce Richardson 	prev->data_len = mbuf_offset;
301599a2dd95SBruce Richardson 	m->pkt_len    += mbuf_offset;
301699a2dd95SBruce Richardson 
3017844e113aSXuan Ding 	if (is_async) {
3018844e113aSXuan Ding 		async_iter_finalize(async);
3019844e113aSXuan Ding 		if (hdr)
3020844e113aSXuan Ding 			pkts_info[slot_idx].nethdr = *hdr;
3021cd79d1b0SXuan Ding 	} else if (hdr) {
302202798b07SMaxime Coquelin 		vhost_dequeue_offload(dev, hdr, m, legacy_ol_flags);
3023844e113aSXuan Ding 	}
302499a2dd95SBruce Richardson 
30256d823bb3SXuan Ding 	return 0;
30266d823bb3SXuan Ding error:
3027844e113aSXuan Ding 	if (is_async)
3028844e113aSXuan Ding 		async_iter_cancel(async);
3029844e113aSXuan Ding 
30306d823bb3SXuan Ding 	return -1;
303199a2dd95SBruce Richardson }
303299a2dd95SBruce Richardson 
303399a2dd95SBruce Richardson static void
303499a2dd95SBruce Richardson virtio_dev_extbuf_free(void *addr __rte_unused, void *opaque)
303599a2dd95SBruce Richardson {
303699a2dd95SBruce Richardson 	rte_free(opaque);
303799a2dd95SBruce Richardson }
303899a2dd95SBruce Richardson 
303999a2dd95SBruce Richardson static int
304002798b07SMaxime Coquelin virtio_dev_extbuf_alloc(struct virtio_net *dev, struct rte_mbuf *pkt, uint32_t size)
304199a2dd95SBruce Richardson {
304299a2dd95SBruce Richardson 	struct rte_mbuf_ext_shared_info *shinfo = NULL;
304399a2dd95SBruce Richardson 	uint32_t total_len = RTE_PKTMBUF_HEADROOM + size;
304499a2dd95SBruce Richardson 	uint16_t buf_len;
304599a2dd95SBruce Richardson 	rte_iova_t iova;
304699a2dd95SBruce Richardson 	void *buf;
304799a2dd95SBruce Richardson 
304899a2dd95SBruce Richardson 	total_len += sizeof(*shinfo) + sizeof(uintptr_t);
304999a2dd95SBruce Richardson 	total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t));
305099a2dd95SBruce Richardson 
305199a2dd95SBruce Richardson 	if (unlikely(total_len > UINT16_MAX))
305299a2dd95SBruce Richardson 		return -ENOSPC;
305399a2dd95SBruce Richardson 
305499a2dd95SBruce Richardson 	buf_len = total_len;
305599a2dd95SBruce Richardson 	buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE);
305699a2dd95SBruce Richardson 	if (unlikely(buf == NULL))
305799a2dd95SBruce Richardson 		return -ENOMEM;
305899a2dd95SBruce Richardson 
305999a2dd95SBruce Richardson 	/* Initialize shinfo */
306099a2dd95SBruce Richardson 	shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
306199a2dd95SBruce Richardson 						virtio_dev_extbuf_free, buf);
306299a2dd95SBruce Richardson 	if (unlikely(shinfo == NULL)) {
306399a2dd95SBruce Richardson 		rte_free(buf);
30640e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR, "failed to init shinfo");
306599a2dd95SBruce Richardson 		return -1;
306699a2dd95SBruce Richardson 	}
306799a2dd95SBruce Richardson 
306899a2dd95SBruce Richardson 	iova = rte_malloc_virt2iova(buf);
306999a2dd95SBruce Richardson 	rte_pktmbuf_attach_extbuf(pkt, buf, iova, buf_len, shinfo);
307099a2dd95SBruce Richardson 	rte_pktmbuf_reset_headroom(pkt);
307199a2dd95SBruce Richardson 
307299a2dd95SBruce Richardson 	return 0;
307399a2dd95SBruce Richardson }
307499a2dd95SBruce Richardson 
3075242695f6SBalazs Nemeth /*
3076242695f6SBalazs Nemeth  * Prepare a host supported pktmbuf.
3077242695f6SBalazs Nemeth  */
3078a287ac28SBalazs Nemeth static __rte_always_inline int
3079a287ac28SBalazs Nemeth virtio_dev_pktmbuf_prep(struct virtio_net *dev, struct rte_mbuf *pkt,
3080a287ac28SBalazs Nemeth 			 uint32_t data_len)
3081a287ac28SBalazs Nemeth {
3082a287ac28SBalazs Nemeth 	if (rte_pktmbuf_tailroom(pkt) >= data_len)
3083a287ac28SBalazs Nemeth 		return 0;
3084a287ac28SBalazs Nemeth 
3085a287ac28SBalazs Nemeth 	/* attach an external buffer if supported */
308602798b07SMaxime Coquelin 	if (dev->extbuf && !virtio_dev_extbuf_alloc(dev, pkt, data_len))
3087a287ac28SBalazs Nemeth 		return 0;
3088a287ac28SBalazs Nemeth 
3089a287ac28SBalazs Nemeth 	/* check if chained buffers are allowed */
3090a287ac28SBalazs Nemeth 	if (!dev->linearbuf)
3091a287ac28SBalazs Nemeth 		return 0;
3092a287ac28SBalazs Nemeth 
3093a287ac28SBalazs Nemeth 	return -1;
3094a287ac28SBalazs Nemeth }
3095a287ac28SBalazs Nemeth 
3096ca7036b4SDavid Marchand __rte_always_inline
3097ca7036b4SDavid Marchand static uint16_t
309899a2dd95SBruce Richardson virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
3099ca7036b4SDavid Marchand 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
3100ca7036b4SDavid Marchand 	bool legacy_ol_flags)
310103f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
3102bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
310399a2dd95SBruce Richardson {
310499a2dd95SBruce Richardson 	uint16_t i;
3105f4284e2dSMaxime Coquelin 	uint16_t avail_entries;
310699a2dd95SBruce Richardson 	static bool allocerr_warned;
310799a2dd95SBruce Richardson 
310899a2dd95SBruce Richardson 	/*
310999a2dd95SBruce Richardson 	 * The ordering between avail index and
311099a2dd95SBruce Richardson 	 * desc reads needs to be enforced.
311199a2dd95SBruce Richardson 	 */
31125147b641STyler Retzlaff 	avail_entries = rte_atomic_load_explicit((unsigned short __rte_atomic *)&vq->avail->idx,
31135147b641STyler Retzlaff 		rte_memory_order_acquire) - vq->last_avail_idx;
3114f4284e2dSMaxime Coquelin 	if (avail_entries == 0)
311599a2dd95SBruce Richardson 		return 0;
311699a2dd95SBruce Richardson 
311799a2dd95SBruce Richardson 	rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
311899a2dd95SBruce Richardson 
31190e21c7c0SDavid Marchand 	VHOST_DATA_LOG(dev->ifname, DEBUG, "%s", __func__);
312099a2dd95SBruce Richardson 
312199a2dd95SBruce Richardson 	count = RTE_MIN(count, MAX_PKT_BURST);
3122f4284e2dSMaxime Coquelin 	count = RTE_MIN(count, avail_entries);
31230e21c7c0SDavid Marchand 	VHOST_DATA_LOG(dev->ifname, DEBUG, "about to dequeue %u buffers", count);
312499a2dd95SBruce Richardson 
3125458dc624SMaxime Coquelin 	if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) {
3126458dc624SMaxime Coquelin 		vq->stats.mbuf_alloc_failed += count;
3127242695f6SBalazs Nemeth 		return 0;
3128458dc624SMaxime Coquelin 	}
3129242695f6SBalazs Nemeth 
313099a2dd95SBruce Richardson 	for (i = 0; i < count; i++) {
313199a2dd95SBruce Richardson 		struct buf_vector buf_vec[BUF_VECTOR_MAX];
313299a2dd95SBruce Richardson 		uint16_t head_idx;
313399a2dd95SBruce Richardson 		uint32_t buf_len;
313499a2dd95SBruce Richardson 		uint16_t nr_vec = 0;
313599a2dd95SBruce Richardson 		int err;
313699a2dd95SBruce Richardson 
313799a2dd95SBruce Richardson 		if (unlikely(fill_vec_buf_split(dev, vq,
313899a2dd95SBruce Richardson 						vq->last_avail_idx + i,
313999a2dd95SBruce Richardson 						&nr_vec, buf_vec,
314099a2dd95SBruce Richardson 						&head_idx, &buf_len,
314199a2dd95SBruce Richardson 						VHOST_ACCESS_RO) < 0))
314299a2dd95SBruce Richardson 			break;
314399a2dd95SBruce Richardson 
314499a2dd95SBruce Richardson 		update_shadow_used_ring_split(vq, head_idx, 0);
314599a2dd95SBruce Richardson 
31467746fba8SMaxime Coquelin 		if (unlikely(buf_len <= dev->vhost_hlen))
314771bd0cc5SMaxime Coquelin 			break;
314871bd0cc5SMaxime Coquelin 
314971bd0cc5SMaxime Coquelin 		buf_len -= dev->vhost_hlen;
315071bd0cc5SMaxime Coquelin 
3151242695f6SBalazs Nemeth 		err = virtio_dev_pktmbuf_prep(dev, pkts[i], buf_len);
3152242695f6SBalazs Nemeth 		if (unlikely(err)) {
315399a2dd95SBruce Richardson 			/*
315499a2dd95SBruce Richardson 			 * mbuf allocation fails for jumbo packets when external
315599a2dd95SBruce Richardson 			 * buffer allocation is not allowed and linear buffer
315699a2dd95SBruce Richardson 			 * is required. Drop this packet.
315799a2dd95SBruce Richardson 			 */
315899a2dd95SBruce Richardson 			if (!allocerr_warned) {
31590e21c7c0SDavid Marchand 				VHOST_DATA_LOG(dev->ifname, ERR,
31600e21c7c0SDavid Marchand 					"failed mbuf alloc of size %d from %s.",
316136c525a0SDavid Marchand 					buf_len, mbuf_pool->name);
316299a2dd95SBruce Richardson 				allocerr_warned = true;
316399a2dd95SBruce Richardson 			}
316499a2dd95SBruce Richardson 			break;
316599a2dd95SBruce Richardson 		}
316699a2dd95SBruce Richardson 
3167844e113aSXuan Ding 		err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
3168844e113aSXuan Ding 				   mbuf_pool, legacy_ol_flags, 0, false);
316999a2dd95SBruce Richardson 		if (unlikely(err)) {
317099a2dd95SBruce Richardson 			if (!allocerr_warned) {
31710e21c7c0SDavid Marchand 				VHOST_DATA_LOG(dev->ifname, ERR, "failed to copy desc to mbuf.");
317299a2dd95SBruce Richardson 				allocerr_warned = true;
317399a2dd95SBruce Richardson 			}
317499a2dd95SBruce Richardson 			break;
317599a2dd95SBruce Richardson 		}
317699a2dd95SBruce Richardson 	}
317799a2dd95SBruce Richardson 
31787746fba8SMaxime Coquelin 	if (unlikely(count != i))
31797746fba8SMaxime Coquelin 		rte_pktmbuf_free_bulk(&pkts[i], count - i);
3180242695f6SBalazs Nemeth 
318199a2dd95SBruce Richardson 	if (likely(vq->shadow_used_idx)) {
31827746fba8SMaxime Coquelin 		vq->last_avail_idx += vq->shadow_used_idx;
318315677ca2SMaxime Coquelin 		vhost_virtqueue_reconnect_log_split(vq);
31847746fba8SMaxime Coquelin 		do_data_copy_dequeue(vq);
318599a2dd95SBruce Richardson 		flush_shadow_used_ring_split(dev, vq);
318699a2dd95SBruce Richardson 		vhost_vring_call_split(dev, vq);
318799a2dd95SBruce Richardson 	}
318899a2dd95SBruce Richardson 
31897746fba8SMaxime Coquelin 	return i;
319099a2dd95SBruce Richardson }
319199a2dd95SBruce Richardson 
3192ca7036b4SDavid Marchand __rte_noinline
3193ca7036b4SDavid Marchand static uint16_t
3194ca7036b4SDavid Marchand virtio_dev_tx_split_legacy(struct virtio_net *dev,
3195ca7036b4SDavid Marchand 	struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
3196ca7036b4SDavid Marchand 	struct rte_mbuf **pkts, uint16_t count)
319703f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
3198bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
3199ca7036b4SDavid Marchand {
3200ca7036b4SDavid Marchand 	return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, true);
3201ca7036b4SDavid Marchand }
3202ca7036b4SDavid Marchand 
3203ca7036b4SDavid Marchand __rte_noinline
3204ca7036b4SDavid Marchand static uint16_t
3205ca7036b4SDavid Marchand virtio_dev_tx_split_compliant(struct virtio_net *dev,
3206ca7036b4SDavid Marchand 	struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
3207ca7036b4SDavid Marchand 	struct rte_mbuf **pkts, uint16_t count)
320803f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
3209bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
3210ca7036b4SDavid Marchand {
3211ca7036b4SDavid Marchand 	return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, false);
3212ca7036b4SDavid Marchand }
3213ca7036b4SDavid Marchand 
321499a2dd95SBruce Richardson static __rte_always_inline int
321599a2dd95SBruce Richardson vhost_reserve_avail_batch_packed(struct virtio_net *dev,
321699a2dd95SBruce Richardson 				 struct vhost_virtqueue *vq,
321799a2dd95SBruce Richardson 				 struct rte_mbuf **pkts,
321899a2dd95SBruce Richardson 				 uint16_t avail_idx,
321999a2dd95SBruce Richardson 				 uintptr_t *desc_addrs,
322099a2dd95SBruce Richardson 				 uint16_t *ids)
3221bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
322299a2dd95SBruce Richardson {
322399a2dd95SBruce Richardson 	bool wrap = vq->avail_wrap_counter;
322499a2dd95SBruce Richardson 	struct vring_packed_desc *descs = vq->desc_packed;
322599a2dd95SBruce Richardson 	uint64_t lens[PACKED_BATCH_SIZE];
322699a2dd95SBruce Richardson 	uint64_t buf_lens[PACKED_BATCH_SIZE];
322799a2dd95SBruce Richardson 	uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
322899a2dd95SBruce Richardson 	uint16_t flags, i;
322999a2dd95SBruce Richardson 
323099a2dd95SBruce Richardson 	if (unlikely(avail_idx & PACKED_BATCH_MASK))
323199a2dd95SBruce Richardson 		return -1;
323299a2dd95SBruce Richardson 	if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
323399a2dd95SBruce Richardson 		return -1;
323499a2dd95SBruce Richardson 
323599a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
323699a2dd95SBruce Richardson 		flags = descs[avail_idx + i].flags;
323799a2dd95SBruce Richardson 		if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) ||
323899a2dd95SBruce Richardson 			     (wrap == !!(flags & VRING_DESC_F_USED))  ||
323999a2dd95SBruce Richardson 			     (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG)))
324099a2dd95SBruce Richardson 			return -1;
324199a2dd95SBruce Richardson 	}
324299a2dd95SBruce Richardson 
32435147b641STyler Retzlaff 	rte_atomic_thread_fence(rte_memory_order_acquire);
324499a2dd95SBruce Richardson 
324599a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
324699a2dd95SBruce Richardson 		lens[i] = descs[avail_idx + i].len;
324799a2dd95SBruce Richardson 
324899a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
324999a2dd95SBruce Richardson 		desc_addrs[i] = vhost_iova_to_vva(dev, vq,
325099a2dd95SBruce Richardson 						  descs[avail_idx + i].addr,
325199a2dd95SBruce Richardson 						  &lens[i], VHOST_ACCESS_RW);
325299a2dd95SBruce Richardson 	}
325399a2dd95SBruce Richardson 
325499a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
325599a2dd95SBruce Richardson 		if (unlikely(!desc_addrs[i]))
325699a2dd95SBruce Richardson 			return -1;
325799a2dd95SBruce Richardson 		if (unlikely((lens[i] != descs[avail_idx + i].len)))
325899a2dd95SBruce Richardson 			return -1;
325999a2dd95SBruce Richardson 	}
326099a2dd95SBruce Richardson 
326199a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3262a287ac28SBalazs Nemeth 		if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i]))
3263a287ac28SBalazs Nemeth 			goto err;
326499a2dd95SBruce Richardson 	}
326599a2dd95SBruce Richardson 
326699a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
326799a2dd95SBruce Richardson 		buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off;
326899a2dd95SBruce Richardson 
326999a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
327099a2dd95SBruce Richardson 		if (unlikely(buf_lens[i] < (lens[i] - buf_offset)))
3271a287ac28SBalazs Nemeth 			goto err;
327299a2dd95SBruce Richardson 	}
327399a2dd95SBruce Richardson 
327499a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
327599a2dd95SBruce Richardson 		pkts[i]->pkt_len = lens[i] - buf_offset;
327699a2dd95SBruce Richardson 		pkts[i]->data_len = pkts[i]->pkt_len;
327799a2dd95SBruce Richardson 		ids[i] = descs[avail_idx + i].id;
327899a2dd95SBruce Richardson 	}
327999a2dd95SBruce Richardson 
328099a2dd95SBruce Richardson 	return 0;
328199a2dd95SBruce Richardson 
3282a287ac28SBalazs Nemeth err:
328399a2dd95SBruce Richardson 	return -1;
328499a2dd95SBruce Richardson }
328599a2dd95SBruce Richardson 
328699a2dd95SBruce Richardson static __rte_always_inline int
3287c2fa52bfSCheng Jiang vhost_async_tx_batch_packed_check(struct virtio_net *dev,
3288c2fa52bfSCheng Jiang 				 struct vhost_virtqueue *vq,
3289c2fa52bfSCheng Jiang 				 struct rte_mbuf **pkts,
3290c2fa52bfSCheng Jiang 				 uint16_t avail_idx,
3291c2fa52bfSCheng Jiang 				 uintptr_t *desc_addrs,
3292c2fa52bfSCheng Jiang 				 uint64_t *lens,
3293c2fa52bfSCheng Jiang 				 uint16_t *ids,
3294c2fa52bfSCheng Jiang 				 int16_t dma_id,
3295c2fa52bfSCheng Jiang 				 uint16_t vchan_id)
3296c2fa52bfSCheng Jiang {
3297c2fa52bfSCheng Jiang 	bool wrap = vq->avail_wrap_counter;
3298c2fa52bfSCheng Jiang 	struct vring_packed_desc *descs = vq->desc_packed;
3299c2fa52bfSCheng Jiang 	uint64_t buf_lens[PACKED_BATCH_SIZE];
3300c2fa52bfSCheng Jiang 	uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
3301c2fa52bfSCheng Jiang 	uint16_t flags, i;
3302c2fa52bfSCheng Jiang 
3303c2fa52bfSCheng Jiang 	if (unlikely(avail_idx & PACKED_BATCH_MASK))
3304c2fa52bfSCheng Jiang 		return -1;
3305c2fa52bfSCheng Jiang 	if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
3306c2fa52bfSCheng Jiang 		return -1;
3307c2fa52bfSCheng Jiang 
3308c2fa52bfSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3309c2fa52bfSCheng Jiang 		flags = descs[avail_idx + i].flags;
3310c2fa52bfSCheng Jiang 		if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) ||
3311c2fa52bfSCheng Jiang 			     (wrap == !!(flags & VRING_DESC_F_USED))  ||
3312c2fa52bfSCheng Jiang 			     (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG)))
3313c2fa52bfSCheng Jiang 			return -1;
3314c2fa52bfSCheng Jiang 	}
3315c2fa52bfSCheng Jiang 
33165147b641STyler Retzlaff 	rte_atomic_thread_fence(rte_memory_order_acquire);
3317c2fa52bfSCheng Jiang 
3318c2fa52bfSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
3319c2fa52bfSCheng Jiang 		lens[i] = descs[avail_idx + i].len;
3320c2fa52bfSCheng Jiang 
3321c2fa52bfSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3322c2fa52bfSCheng Jiang 		desc_addrs[i] = descs[avail_idx + i].addr;
3323c2fa52bfSCheng Jiang 	}
3324c2fa52bfSCheng Jiang 
3325c2fa52bfSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3326c2fa52bfSCheng Jiang 		if (unlikely(!desc_addrs[i]))
3327c2fa52bfSCheng Jiang 			return -1;
3328c2fa52bfSCheng Jiang 		if (unlikely((lens[i] != descs[avail_idx + i].len)))
3329c2fa52bfSCheng Jiang 			return -1;
3330c2fa52bfSCheng Jiang 	}
3331c2fa52bfSCheng Jiang 
3332c2fa52bfSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3333c2fa52bfSCheng Jiang 		if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i]))
3334c2fa52bfSCheng Jiang 			goto err;
3335c2fa52bfSCheng Jiang 	}
3336c2fa52bfSCheng Jiang 
3337c2fa52bfSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
3338c2fa52bfSCheng Jiang 		buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off;
3339c2fa52bfSCheng Jiang 
3340c2fa52bfSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3341c2fa52bfSCheng Jiang 		if (unlikely(buf_lens[i] < (lens[i] - buf_offset)))
3342c2fa52bfSCheng Jiang 			goto err;
3343c2fa52bfSCheng Jiang 	}
3344c2fa52bfSCheng Jiang 
3345c2fa52bfSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3346c2fa52bfSCheng Jiang 		pkts[i]->pkt_len = lens[i] - buf_offset;
3347c2fa52bfSCheng Jiang 		pkts[i]->data_len = pkts[i]->pkt_len;
3348c2fa52bfSCheng Jiang 		ids[i] = descs[avail_idx + i].id;
3349c2fa52bfSCheng Jiang 	}
3350c2fa52bfSCheng Jiang 
3351c2fa52bfSCheng Jiang 	if (rte_dma_burst_capacity(dma_id, vchan_id) < PACKED_BATCH_SIZE)
3352c2fa52bfSCheng Jiang 		return -1;
3353c2fa52bfSCheng Jiang 
3354c2fa52bfSCheng Jiang 	return 0;
3355c2fa52bfSCheng Jiang 
3356c2fa52bfSCheng Jiang err:
3357c2fa52bfSCheng Jiang 	return -1;
3358c2fa52bfSCheng Jiang }
3359c2fa52bfSCheng Jiang 
3360c2fa52bfSCheng Jiang static __rte_always_inline int
336199a2dd95SBruce Richardson virtio_dev_tx_batch_packed(struct virtio_net *dev,
336299a2dd95SBruce Richardson 			   struct vhost_virtqueue *vq,
3363ca7036b4SDavid Marchand 			   struct rte_mbuf **pkts,
3364ca7036b4SDavid Marchand 			   bool legacy_ol_flags)
3365bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
336699a2dd95SBruce Richardson {
336799a2dd95SBruce Richardson 	uint16_t avail_idx = vq->last_avail_idx;
336899a2dd95SBruce Richardson 	uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
336999a2dd95SBruce Richardson 	struct virtio_net_hdr *hdr;
337099a2dd95SBruce Richardson 	uintptr_t desc_addrs[PACKED_BATCH_SIZE];
337199a2dd95SBruce Richardson 	uint16_t ids[PACKED_BATCH_SIZE];
337299a2dd95SBruce Richardson 	uint16_t i;
337399a2dd95SBruce Richardson 
3374a287ac28SBalazs Nemeth 	if (vhost_reserve_avail_batch_packed(dev, vq, pkts, avail_idx,
3375a287ac28SBalazs Nemeth 					     desc_addrs, ids))
337699a2dd95SBruce Richardson 		return -1;
337799a2dd95SBruce Richardson 
337899a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
337999a2dd95SBruce Richardson 		rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
338099a2dd95SBruce Richardson 
338199a2dd95SBruce Richardson 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
338299a2dd95SBruce Richardson 		rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0),
338399a2dd95SBruce Richardson 			   (void *)(uintptr_t)(desc_addrs[i] + buf_offset),
338499a2dd95SBruce Richardson 			   pkts[i]->pkt_len);
338599a2dd95SBruce Richardson 
338699a2dd95SBruce Richardson 	if (virtio_net_with_host_offload(dev)) {
338799a2dd95SBruce Richardson 		vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
338899a2dd95SBruce Richardson 			hdr = (struct virtio_net_hdr *)(desc_addrs[i]);
338902798b07SMaxime Coquelin 			vhost_dequeue_offload(dev, hdr, pkts[i], legacy_ol_flags);
339099a2dd95SBruce Richardson 		}
339199a2dd95SBruce Richardson 	}
339299a2dd95SBruce Richardson 
339399a2dd95SBruce Richardson 	if (virtio_net_is_inorder(dev))
339499a2dd95SBruce Richardson 		vhost_shadow_dequeue_batch_packed_inorder(vq,
339599a2dd95SBruce Richardson 			ids[PACKED_BATCH_SIZE - 1]);
339699a2dd95SBruce Richardson 	else
339799a2dd95SBruce Richardson 		vhost_shadow_dequeue_batch_packed(dev, vq, ids);
339899a2dd95SBruce Richardson 
339999a2dd95SBruce Richardson 	vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE);
340099a2dd95SBruce Richardson 
340199a2dd95SBruce Richardson 	return 0;
340299a2dd95SBruce Richardson }
340399a2dd95SBruce Richardson 
340499a2dd95SBruce Richardson static __rte_always_inline int
340599a2dd95SBruce Richardson vhost_dequeue_single_packed(struct virtio_net *dev,
340699a2dd95SBruce Richardson 			    struct vhost_virtqueue *vq,
340799a2dd95SBruce Richardson 			    struct rte_mempool *mbuf_pool,
3408a287ac28SBalazs Nemeth 			    struct rte_mbuf *pkts,
340999a2dd95SBruce Richardson 			    uint16_t *buf_id,
3410ca7036b4SDavid Marchand 			    uint16_t *desc_count,
3411ca7036b4SDavid Marchand 			    bool legacy_ol_flags)
341203f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
3413bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
341499a2dd95SBruce Richardson {
341599a2dd95SBruce Richardson 	struct buf_vector buf_vec[BUF_VECTOR_MAX];
341699a2dd95SBruce Richardson 	uint32_t buf_len;
341799a2dd95SBruce Richardson 	uint16_t nr_vec = 0;
341899a2dd95SBruce Richardson 	int err;
341999a2dd95SBruce Richardson 	static bool allocerr_warned;
342099a2dd95SBruce Richardson 
342199a2dd95SBruce Richardson 	if (unlikely(fill_vec_buf_packed(dev, vq,
342299a2dd95SBruce Richardson 					 vq->last_avail_idx, desc_count,
342399a2dd95SBruce Richardson 					 buf_vec, &nr_vec,
342499a2dd95SBruce Richardson 					 buf_id, &buf_len,
342599a2dd95SBruce Richardson 					 VHOST_ACCESS_RO) < 0))
342699a2dd95SBruce Richardson 		return -1;
342799a2dd95SBruce Richardson 
342871bd0cc5SMaxime Coquelin 	if (unlikely(buf_len <= dev->vhost_hlen))
342971bd0cc5SMaxime Coquelin 		return -1;
343071bd0cc5SMaxime Coquelin 
343171bd0cc5SMaxime Coquelin 	buf_len -= dev->vhost_hlen;
343271bd0cc5SMaxime Coquelin 
3433a287ac28SBalazs Nemeth 	if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) {
343499a2dd95SBruce Richardson 		if (!allocerr_warned) {
34350e21c7c0SDavid Marchand 			VHOST_DATA_LOG(dev->ifname, ERR,
34360e21c7c0SDavid Marchand 				"failed mbuf alloc of size %d from %s.",
343736c525a0SDavid Marchand 				buf_len, mbuf_pool->name);
343899a2dd95SBruce Richardson 			allocerr_warned = true;
343999a2dd95SBruce Richardson 		}
344099a2dd95SBruce Richardson 		return -1;
344199a2dd95SBruce Richardson 	}
344299a2dd95SBruce Richardson 
3443844e113aSXuan Ding 	err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts,
3444844e113aSXuan Ding 			   mbuf_pool, legacy_ol_flags, 0, false);
344599a2dd95SBruce Richardson 	if (unlikely(err)) {
344699a2dd95SBruce Richardson 		if (!allocerr_warned) {
34470e21c7c0SDavid Marchand 			VHOST_DATA_LOG(dev->ifname, ERR, "failed to copy desc to mbuf.");
344899a2dd95SBruce Richardson 			allocerr_warned = true;
344999a2dd95SBruce Richardson 		}
345099a2dd95SBruce Richardson 		return -1;
345199a2dd95SBruce Richardson 	}
345299a2dd95SBruce Richardson 
345399a2dd95SBruce Richardson 	return 0;
345499a2dd95SBruce Richardson }
345599a2dd95SBruce Richardson 
345699a2dd95SBruce Richardson static __rte_always_inline int
345799a2dd95SBruce Richardson virtio_dev_tx_single_packed(struct virtio_net *dev,
345899a2dd95SBruce Richardson 			    struct vhost_virtqueue *vq,
345999a2dd95SBruce Richardson 			    struct rte_mempool *mbuf_pool,
3460ca7036b4SDavid Marchand 			    struct rte_mbuf *pkts,
3461ca7036b4SDavid Marchand 			    bool legacy_ol_flags)
346203f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
3463bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
346499a2dd95SBruce Richardson {
346599a2dd95SBruce Richardson 
346699a2dd95SBruce Richardson 	uint16_t buf_id, desc_count = 0;
346799a2dd95SBruce Richardson 	int ret;
346899a2dd95SBruce Richardson 
346999a2dd95SBruce Richardson 	ret = vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id,
3470ca7036b4SDavid Marchand 					&desc_count, legacy_ol_flags);
347199a2dd95SBruce Richardson 
347299a2dd95SBruce Richardson 	if (likely(desc_count > 0)) {
347399a2dd95SBruce Richardson 		if (virtio_net_is_inorder(dev))
347499a2dd95SBruce Richardson 			vhost_shadow_dequeue_single_packed_inorder(vq, buf_id,
347599a2dd95SBruce Richardson 								   desc_count);
347699a2dd95SBruce Richardson 		else
347799a2dd95SBruce Richardson 			vhost_shadow_dequeue_single_packed(vq, buf_id,
347899a2dd95SBruce Richardson 					desc_count);
347999a2dd95SBruce Richardson 
348099a2dd95SBruce Richardson 		vq_inc_last_avail_packed(vq, desc_count);
348199a2dd95SBruce Richardson 	}
348299a2dd95SBruce Richardson 
348399a2dd95SBruce Richardson 	return ret;
348499a2dd95SBruce Richardson }
348599a2dd95SBruce Richardson 
348605675579SAndrey Ignatov static __rte_always_inline uint16_t
348705675579SAndrey Ignatov get_nb_avail_entries_packed(const struct vhost_virtqueue *__rte_restrict vq,
348805675579SAndrey Ignatov 			    uint16_t max_nb_avail_entries)
348905675579SAndrey Ignatov {
349005675579SAndrey Ignatov 	const struct vring_packed_desc *descs = vq->desc_packed;
349105675579SAndrey Ignatov 	bool avail_wrap = vq->avail_wrap_counter;
349205675579SAndrey Ignatov 	uint16_t avail_idx = vq->last_avail_idx;
349305675579SAndrey Ignatov 	uint16_t nb_avail_entries = 0;
349405675579SAndrey Ignatov 	uint16_t flags;
349505675579SAndrey Ignatov 
349605675579SAndrey Ignatov 	while (nb_avail_entries < max_nb_avail_entries) {
349705675579SAndrey Ignatov 		flags = descs[avail_idx].flags;
349805675579SAndrey Ignatov 
349905675579SAndrey Ignatov 		if ((avail_wrap != !!(flags & VRING_DESC_F_AVAIL)) ||
350005675579SAndrey Ignatov 		    (avail_wrap == !!(flags & VRING_DESC_F_USED)))
350105675579SAndrey Ignatov 			return nb_avail_entries;
350205675579SAndrey Ignatov 
350305675579SAndrey Ignatov 		if (!(flags & VRING_DESC_F_NEXT))
350405675579SAndrey Ignatov 			++nb_avail_entries;
350505675579SAndrey Ignatov 
350605675579SAndrey Ignatov 		if (unlikely(++avail_idx >= vq->size)) {
350705675579SAndrey Ignatov 			avail_idx -= vq->size;
350805675579SAndrey Ignatov 			avail_wrap = !avail_wrap;
350905675579SAndrey Ignatov 		}
351005675579SAndrey Ignatov 	}
351105675579SAndrey Ignatov 
351205675579SAndrey Ignatov 	return nb_avail_entries;
351305675579SAndrey Ignatov }
351405675579SAndrey Ignatov 
3515ca7036b4SDavid Marchand __rte_always_inline
3516ca7036b4SDavid Marchand static uint16_t
351799a2dd95SBruce Richardson virtio_dev_tx_packed(struct virtio_net *dev,
351899a2dd95SBruce Richardson 		     struct vhost_virtqueue *__rte_restrict vq,
351999a2dd95SBruce Richardson 		     struct rte_mempool *mbuf_pool,
352099a2dd95SBruce Richardson 		     struct rte_mbuf **__rte_restrict pkts,
3521ca7036b4SDavid Marchand 		     uint32_t count,
3522ca7036b4SDavid Marchand 		     bool legacy_ol_flags)
352303f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
3524bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
352599a2dd95SBruce Richardson {
352699a2dd95SBruce Richardson 	uint32_t pkt_idx = 0;
352799a2dd95SBruce Richardson 
352805675579SAndrey Ignatov 	count = get_nb_avail_entries_packed(vq, count);
352905675579SAndrey Ignatov 	if (count == 0)
353005675579SAndrey Ignatov 		return 0;
353105675579SAndrey Ignatov 
3532458dc624SMaxime Coquelin 	if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count)) {
3533458dc624SMaxime Coquelin 		vq->stats.mbuf_alloc_failed += count;
3534a287ac28SBalazs Nemeth 		return 0;
3535458dc624SMaxime Coquelin 	}
3536a287ac28SBalazs Nemeth 
353799a2dd95SBruce Richardson 	do {
353899a2dd95SBruce Richardson 		rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
353999a2dd95SBruce Richardson 
354056fa2791SBalazs Nemeth 		if (count - pkt_idx >= PACKED_BATCH_SIZE) {
3541a287ac28SBalazs Nemeth 			if (!virtio_dev_tx_batch_packed(dev, vq,
3542ca7036b4SDavid Marchand 							&pkts[pkt_idx],
3543ca7036b4SDavid Marchand 							legacy_ol_flags)) {
354499a2dd95SBruce Richardson 				pkt_idx += PACKED_BATCH_SIZE;
354599a2dd95SBruce Richardson 				continue;
354699a2dd95SBruce Richardson 			}
354799a2dd95SBruce Richardson 		}
354899a2dd95SBruce Richardson 
354999a2dd95SBruce Richardson 		if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool,
3550ca7036b4SDavid Marchand 						pkts[pkt_idx],
3551ca7036b4SDavid Marchand 						legacy_ol_flags))
355299a2dd95SBruce Richardson 			break;
355399a2dd95SBruce Richardson 		pkt_idx++;
355456fa2791SBalazs Nemeth 	} while (pkt_idx < count);
355599a2dd95SBruce Richardson 
3556a287ac28SBalazs Nemeth 	if (pkt_idx != count)
3557a287ac28SBalazs Nemeth 		rte_pktmbuf_free_bulk(&pkts[pkt_idx], count - pkt_idx);
3558a287ac28SBalazs Nemeth 
355999a2dd95SBruce Richardson 	if (vq->shadow_used_idx) {
356099a2dd95SBruce Richardson 		do_data_copy_dequeue(vq);
356199a2dd95SBruce Richardson 
356299a2dd95SBruce Richardson 		vhost_flush_dequeue_shadow_packed(dev, vq);
356399a2dd95SBruce Richardson 		vhost_vring_call_packed(dev, vq);
356499a2dd95SBruce Richardson 	}
356599a2dd95SBruce Richardson 
356699a2dd95SBruce Richardson 	return pkt_idx;
356799a2dd95SBruce Richardson }
356899a2dd95SBruce Richardson 
3569ca7036b4SDavid Marchand __rte_noinline
3570ca7036b4SDavid Marchand static uint16_t
3571ca7036b4SDavid Marchand virtio_dev_tx_packed_legacy(struct virtio_net *dev,
3572ca7036b4SDavid Marchand 	struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool,
3573ca7036b4SDavid Marchand 	struct rte_mbuf **__rte_restrict pkts, uint32_t count)
357403f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
3575bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
3576ca7036b4SDavid Marchand {
3577ca7036b4SDavid Marchand 	return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, true);
3578ca7036b4SDavid Marchand }
3579ca7036b4SDavid Marchand 
3580ca7036b4SDavid Marchand __rte_noinline
3581ca7036b4SDavid Marchand static uint16_t
3582ca7036b4SDavid Marchand virtio_dev_tx_packed_compliant(struct virtio_net *dev,
3583ca7036b4SDavid Marchand 	struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool,
3584ca7036b4SDavid Marchand 	struct rte_mbuf **__rte_restrict pkts, uint32_t count)
358503f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
3586bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
3587ca7036b4SDavid Marchand {
3588ca7036b4SDavid Marchand 	return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, false);
3589ca7036b4SDavid Marchand }
3590ca7036b4SDavid Marchand 
359199a2dd95SBruce Richardson uint16_t
359299a2dd95SBruce Richardson rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
359399a2dd95SBruce Richardson 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
359499a2dd95SBruce Richardson {
359599a2dd95SBruce Richardson 	struct virtio_net *dev;
359699a2dd95SBruce Richardson 	struct vhost_virtqueue *vq;
359799a2dd95SBruce Richardson 	int16_t success = 1;
35986ee0cf80SMaxime Coquelin 	uint16_t nb_rx = 0;
359999a2dd95SBruce Richardson 
360099a2dd95SBruce Richardson 	dev = get_device(vid);
360199a2dd95SBruce Richardson 	if (!dev)
360299a2dd95SBruce Richardson 		return 0;
360399a2dd95SBruce Richardson 
360499a2dd95SBruce Richardson 	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
36050e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR,
36060e21c7c0SDavid Marchand 			"%s: built-in vhost net backend is disabled.",
360736c525a0SDavid Marchand 			__func__);
36086ee0cf80SMaxime Coquelin 		goto out_no_unlock;
360999a2dd95SBruce Richardson 	}
361099a2dd95SBruce Richardson 
361199a2dd95SBruce Richardson 	if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {
36120e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR,
36130e21c7c0SDavid Marchand 			"%s: invalid virtqueue idx %d.",
361436c525a0SDavid Marchand 			__func__, queue_id);
36156ee0cf80SMaxime Coquelin 		goto out_no_unlock;
361699a2dd95SBruce Richardson 	}
361799a2dd95SBruce Richardson 
361899a2dd95SBruce Richardson 	vq = dev->virtqueue[queue_id];
361999a2dd95SBruce Richardson 
362003f77d66SEelco Chaudron 	if (unlikely(rte_rwlock_read_trylock(&vq->access_lock) != 0))
36216ee0cf80SMaxime Coquelin 		goto out_no_unlock;
362299a2dd95SBruce Richardson 
36236ee0cf80SMaxime Coquelin 	if (unlikely(!vq->enabled))
362499a2dd95SBruce Richardson 		goto out_access_unlock;
362599a2dd95SBruce Richardson 
362699a2dd95SBruce Richardson 	vhost_user_iotlb_rd_lock(vq);
362799a2dd95SBruce Richardson 
36289fc93a1eSDavid Marchand 	if (unlikely(!vq->access_ok)) {
36299fc93a1eSDavid Marchand 		vhost_user_iotlb_rd_unlock(vq);
36309fc93a1eSDavid Marchand 		rte_rwlock_read_unlock(&vq->access_lock);
36319fc93a1eSDavid Marchand 
36329fc93a1eSDavid Marchand 		virtio_dev_vring_translate(dev, vq);
36338b96508aSMaxime Coquelin 
36349fc93a1eSDavid Marchand 		goto out_no_unlock;
363599a2dd95SBruce Richardson 	}
363699a2dd95SBruce Richardson 
363799a2dd95SBruce Richardson 	/*
363899a2dd95SBruce Richardson 	 * Construct a RARP broadcast packet, and inject it to the "pkts"
363999a2dd95SBruce Richardson 	 * array, to looks like that guest actually send such packet.
364099a2dd95SBruce Richardson 	 *
364199a2dd95SBruce Richardson 	 * Check user_send_rarp() for more information.
364299a2dd95SBruce Richardson 	 *
364399a2dd95SBruce Richardson 	 * broadcast_rarp shares a cacheline in the virtio_net structure
364499a2dd95SBruce Richardson 	 * with some fields that are accessed during enqueue and
36455147b641STyler Retzlaff 	 * rte_atomic_compare_exchange_strong_explicit causes a write if performed compare
364699a2dd95SBruce Richardson 	 * and exchange. This could result in false sharing between enqueue
364799a2dd95SBruce Richardson 	 * and dequeue.
364899a2dd95SBruce Richardson 	 *
364999a2dd95SBruce Richardson 	 * Prevent unnecessary false sharing by reading broadcast_rarp first
365099a2dd95SBruce Richardson 	 * and only performing compare and exchange if the read indicates it
365199a2dd95SBruce Richardson 	 * is likely to be set.
365299a2dd95SBruce Richardson 	 */
36535147b641STyler Retzlaff 	if (unlikely(rte_atomic_load_explicit(&dev->broadcast_rarp, rte_memory_order_acquire) &&
36545147b641STyler Retzlaff 			rte_atomic_compare_exchange_strong_explicit(&dev->broadcast_rarp,
36555147b641STyler Retzlaff 			&success, 0, rte_memory_order_release, rte_memory_order_relaxed))) {
3656*6d7e741bSMaxime Coquelin 		/*
3657*6d7e741bSMaxime Coquelin 		 * Inject the RARP packet to the head of "pkts" array,
3658*6d7e741bSMaxime Coquelin 		 * so that switch's mac learning table will get updated first.
3659*6d7e741bSMaxime Coquelin 		 */
3660*6d7e741bSMaxime Coquelin 		pkts[nb_rx] = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
3661*6d7e741bSMaxime Coquelin 		if (pkts[nb_rx] == NULL) {
36620e21c7c0SDavid Marchand 			VHOST_DATA_LOG(dev->ifname, ERR, "failed to make RARP packet.");
366399a2dd95SBruce Richardson 			goto out;
366499a2dd95SBruce Richardson 		}
3665*6d7e741bSMaxime Coquelin 		nb_rx += 1;
366699a2dd95SBruce Richardson 	}
366799a2dd95SBruce Richardson 
3668ca7036b4SDavid Marchand 	if (vq_is_packed(dev)) {
3669ca7036b4SDavid Marchand 		if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
3670*6d7e741bSMaxime Coquelin 			nb_rx += virtio_dev_tx_packed_legacy(dev, vq, mbuf_pool,
3671*6d7e741bSMaxime Coquelin 					pkts + nb_rx, count - nb_rx);
367299a2dd95SBruce Richardson 		else
3673*6d7e741bSMaxime Coquelin 			nb_rx += virtio_dev_tx_packed_compliant(dev, vq, mbuf_pool,
3674*6d7e741bSMaxime Coquelin 					pkts + nb_rx, count - nb_rx);
3675ca7036b4SDavid Marchand 	} else {
3676ca7036b4SDavid Marchand 		if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
3677*6d7e741bSMaxime Coquelin 			nb_rx += virtio_dev_tx_split_legacy(dev, vq, mbuf_pool,
3678*6d7e741bSMaxime Coquelin 					pkts + nb_rx, count - nb_rx);
3679ca7036b4SDavid Marchand 		else
3680*6d7e741bSMaxime Coquelin 			nb_rx += virtio_dev_tx_split_compliant(dev, vq, mbuf_pool,
3681*6d7e741bSMaxime Coquelin 					pkts + nb_rx, count - nb_rx);
3682ca7036b4SDavid Marchand 	}
368399a2dd95SBruce Richardson 
36846ee0cf80SMaxime Coquelin 	vhost_queue_stats_update(dev, vq, pkts, nb_rx);
3685be75dc99SMaxime Coquelin 
368699a2dd95SBruce Richardson out:
368799a2dd95SBruce Richardson 	vhost_user_iotlb_rd_unlock(vq);
368899a2dd95SBruce Richardson 
368999a2dd95SBruce Richardson out_access_unlock:
369003f77d66SEelco Chaudron 	rte_rwlock_read_unlock(&vq->access_lock);
369199a2dd95SBruce Richardson 
36929fc93a1eSDavid Marchand out_no_unlock:
36936ee0cf80SMaxime Coquelin 	return nb_rx;
369499a2dd95SBruce Richardson }
369584d52043SXuan Ding 
369684d52043SXuan Ding static __rte_always_inline uint16_t
3697fe8477ebSCheng Jiang async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq,
369884d52043SXuan Ding 		struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
369984d52043SXuan Ding 		uint16_t vchan_id, bool legacy_ol_flags)
370003f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
370184d52043SXuan Ding {
370284d52043SXuan Ding 	uint16_t start_idx, from, i;
370384d52043SXuan Ding 	uint16_t nr_cpl_pkts = 0;
370484d52043SXuan Ding 	struct async_inflight_info *pkts_info = vq->async->pkts_info;
370584d52043SXuan Ding 
370684d52043SXuan Ding 	vhost_async_dma_check_completed(dev, dma_id, vchan_id, VHOST_DMA_MAX_COPY_COMPLETE);
370784d52043SXuan Ding 
370884d52043SXuan Ding 	start_idx = async_get_first_inflight_pkt_idx(vq);
370984d52043SXuan Ding 
371084d52043SXuan Ding 	from = start_idx;
371184d52043SXuan Ding 	while (vq->async->pkts_cmpl_flag[from] && count--) {
371284d52043SXuan Ding 		vq->async->pkts_cmpl_flag[from] = false;
3713fe8477ebSCheng Jiang 		from = (from + 1) % vq->size;
371484d52043SXuan Ding 		nr_cpl_pkts++;
371584d52043SXuan Ding 	}
371684d52043SXuan Ding 
371784d52043SXuan Ding 	if (nr_cpl_pkts == 0)
371884d52043SXuan Ding 		return 0;
371984d52043SXuan Ding 
372084d52043SXuan Ding 	for (i = 0; i < nr_cpl_pkts; i++) {
3721fe8477ebSCheng Jiang 		from = (start_idx + i) % vq->size;
372284d52043SXuan Ding 		pkts[i] = pkts_info[from].mbuf;
372384d52043SXuan Ding 
372484d52043SXuan Ding 		if (virtio_net_with_host_offload(dev))
372584d52043SXuan Ding 			vhost_dequeue_offload(dev, &pkts_info[from].nethdr, pkts[i],
372684d52043SXuan Ding 					      legacy_ol_flags);
372784d52043SXuan Ding 	}
372884d52043SXuan Ding 
372984d52043SXuan Ding 	/* write back completed descs to used ring and update used idx */
3730fe8477ebSCheng Jiang 	if (vq_is_packed(dev)) {
3731fe8477ebSCheng Jiang 		write_back_completed_descs_packed(vq, nr_cpl_pkts);
3732fe8477ebSCheng Jiang 		vhost_vring_call_packed(dev, vq);
3733fe8477ebSCheng Jiang 	} else {
373484d52043SXuan Ding 		write_back_completed_descs_split(vq, nr_cpl_pkts);
37355147b641STyler Retzlaff 		rte_atomic_fetch_add_explicit((unsigned short __rte_atomic *)&vq->used->idx,
37365147b641STyler Retzlaff 			nr_cpl_pkts, rte_memory_order_release);
373784d52043SXuan Ding 		vhost_vring_call_split(dev, vq);
3738fe8477ebSCheng Jiang 	}
373984d52043SXuan Ding 	vq->async->pkts_inflight_n -= nr_cpl_pkts;
374084d52043SXuan Ding 
374184d52043SXuan Ding 	return nr_cpl_pkts;
374284d52043SXuan Ding }
374384d52043SXuan Ding 
374484d52043SXuan Ding static __rte_always_inline uint16_t
374584d52043SXuan Ding virtio_dev_tx_async_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
374684d52043SXuan Ding 		struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
374784d52043SXuan Ding 		int16_t dma_id, uint16_t vchan_id, bool legacy_ol_flags)
374803f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
3749bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
375084d52043SXuan Ding {
375184d52043SXuan Ding 	static bool allocerr_warned;
375284d52043SXuan Ding 	bool dropped = false;
3753f4284e2dSMaxime Coquelin 	uint16_t avail_entries;
375484d52043SXuan Ding 	uint16_t pkt_idx, slot_idx = 0;
375584d52043SXuan Ding 	uint16_t nr_done_pkts = 0;
375684d52043SXuan Ding 	uint16_t pkt_err = 0;
375784d52043SXuan Ding 	uint16_t n_xfer;
375884d52043SXuan Ding 	struct vhost_async *async = vq->async;
375984d52043SXuan Ding 	struct async_inflight_info *pkts_info = async->pkts_info;
376084d52043SXuan Ding 	struct rte_mbuf *pkts_prealloc[MAX_PKT_BURST];
376184d52043SXuan Ding 	uint16_t pkts_size = count;
376284d52043SXuan Ding 
376384d52043SXuan Ding 	/**
376484d52043SXuan Ding 	 * The ordering between avail index and
376584d52043SXuan Ding 	 * desc reads needs to be enforced.
376684d52043SXuan Ding 	 */
37675147b641STyler Retzlaff 	avail_entries = rte_atomic_load_explicit((unsigned short __rte_atomic *)&vq->avail->idx,
37685147b641STyler Retzlaff 		rte_memory_order_acquire) - vq->last_avail_idx;
3769f4284e2dSMaxime Coquelin 	if (avail_entries == 0)
377084d52043SXuan Ding 		goto out;
377184d52043SXuan Ding 
377284d52043SXuan Ding 	rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
377384d52043SXuan Ding 
377484d52043SXuan Ding 	async_iter_reset(async);
377584d52043SXuan Ding 
377684d52043SXuan Ding 	count = RTE_MIN(count, MAX_PKT_BURST);
3777f4284e2dSMaxime Coquelin 	count = RTE_MIN(count, avail_entries);
37780e21c7c0SDavid Marchand 	VHOST_DATA_LOG(dev->ifname, DEBUG, "about to dequeue %u buffers", count);
377984d52043SXuan Ding 
3780458dc624SMaxime Coquelin 	if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count)) {
3781458dc624SMaxime Coquelin 		vq->stats.mbuf_alloc_failed += count;
378284d52043SXuan Ding 		goto out;
3783458dc624SMaxime Coquelin 	}
378484d52043SXuan Ding 
378584d52043SXuan Ding 	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
378684d52043SXuan Ding 		uint16_t head_idx = 0;
378784d52043SXuan Ding 		uint16_t nr_vec = 0;
378884d52043SXuan Ding 		uint16_t to;
378984d52043SXuan Ding 		uint32_t buf_len;
379084d52043SXuan Ding 		int err;
379184d52043SXuan Ding 		struct buf_vector buf_vec[BUF_VECTOR_MAX];
379284d52043SXuan Ding 		struct rte_mbuf *pkt = pkts_prealloc[pkt_idx];
379384d52043SXuan Ding 
379484d52043SXuan Ding 		if (unlikely(fill_vec_buf_split(dev, vq, vq->last_avail_idx,
379584d52043SXuan Ding 						&nr_vec, buf_vec,
379684d52043SXuan Ding 						&head_idx, &buf_len,
379784d52043SXuan Ding 						VHOST_ACCESS_RO) < 0)) {
379884d52043SXuan Ding 			dropped = true;
379984d52043SXuan Ding 			break;
380084d52043SXuan Ding 		}
380184d52043SXuan Ding 
380271bd0cc5SMaxime Coquelin 		if (unlikely(buf_len <= dev->vhost_hlen)) {
380371bd0cc5SMaxime Coquelin 			dropped = true;
380471bd0cc5SMaxime Coquelin 			break;
380571bd0cc5SMaxime Coquelin 		}
380671bd0cc5SMaxime Coquelin 
380771bd0cc5SMaxime Coquelin 		buf_len -= dev->vhost_hlen;
380871bd0cc5SMaxime Coquelin 
380984d52043SXuan Ding 		err = virtio_dev_pktmbuf_prep(dev, pkt, buf_len);
381084d52043SXuan Ding 		if (unlikely(err)) {
381184d52043SXuan Ding 			/**
381284d52043SXuan Ding 			 * mbuf allocation fails for jumbo packets when external
381384d52043SXuan Ding 			 * buffer allocation is not allowed and linear buffer
381484d52043SXuan Ding 			 * is required. Drop this packet.
381584d52043SXuan Ding 			 */
381684d52043SXuan Ding 			if (!allocerr_warned) {
38170e21c7c0SDavid Marchand 				VHOST_DATA_LOG(dev->ifname, ERR,
38180e21c7c0SDavid Marchand 					"%s: Failed mbuf alloc of size %d from %s",
381936c525a0SDavid Marchand 					__func__, buf_len, mbuf_pool->name);
382084d52043SXuan Ding 				allocerr_warned = true;
382184d52043SXuan Ding 			}
382284d52043SXuan Ding 			dropped = true;
3823fd03876eSCheng Jiang 			slot_idx--;
382484d52043SXuan Ding 			break;
382584d52043SXuan Ding 		}
382684d52043SXuan Ding 
382784d52043SXuan Ding 		slot_idx = (async->pkts_idx + pkt_idx) & (vq->size - 1);
382884d52043SXuan Ding 		err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkt, mbuf_pool,
382984d52043SXuan Ding 					legacy_ol_flags, slot_idx, true);
383084d52043SXuan Ding 		if (unlikely(err)) {
383184d52043SXuan Ding 			if (!allocerr_warned) {
38320e21c7c0SDavid Marchand 				VHOST_DATA_LOG(dev->ifname, ERR,
38330e21c7c0SDavid Marchand 					"%s: Failed to offload copies to async channel.",
383436c525a0SDavid Marchand 					__func__);
383584d52043SXuan Ding 				allocerr_warned = true;
383684d52043SXuan Ding 			}
383784d52043SXuan Ding 			dropped = true;
3838baac7d2cSCheng Jiang 			slot_idx--;
383984d52043SXuan Ding 			break;
384084d52043SXuan Ding 		}
384184d52043SXuan Ding 
384284d52043SXuan Ding 		pkts_info[slot_idx].mbuf = pkt;
384384d52043SXuan Ding 
384484d52043SXuan Ding 		/* store used descs */
384584d52043SXuan Ding 		to = async->desc_idx_split & (vq->size - 1);
384684d52043SXuan Ding 		async->descs_split[to].id = head_idx;
384784d52043SXuan Ding 		async->descs_split[to].len = 0;
384884d52043SXuan Ding 		async->desc_idx_split++;
384984d52043SXuan Ding 
385084d52043SXuan Ding 		vq->last_avail_idx++;
385115677ca2SMaxime Coquelin 		vhost_virtqueue_reconnect_log_split(vq);
385284d52043SXuan Ding 	}
385384d52043SXuan Ding 
385484d52043SXuan Ding 	if (unlikely(dropped))
385584d52043SXuan Ding 		rte_pktmbuf_free_bulk(&pkts_prealloc[pkt_idx], count - pkt_idx);
385684d52043SXuan Ding 
385784d52043SXuan Ding 	n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx,
385884d52043SXuan Ding 					  async->iov_iter, pkt_idx);
385984d52043SXuan Ding 
386084d52043SXuan Ding 	async->pkts_inflight_n += n_xfer;
386184d52043SXuan Ding 
386284d52043SXuan Ding 	pkt_err = pkt_idx - n_xfer;
386384d52043SXuan Ding 	if (unlikely(pkt_err)) {
38640e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, DEBUG, "%s: failed to transfer data.",
386536c525a0SDavid Marchand 			__func__);
386684d52043SXuan Ding 
386784d52043SXuan Ding 		pkt_idx = n_xfer;
386884d52043SXuan Ding 		/* recover available ring */
386984d52043SXuan Ding 		vq->last_avail_idx -= pkt_err;
387015677ca2SMaxime Coquelin 		vhost_virtqueue_reconnect_log_split(vq);
387184d52043SXuan Ding 
387284d52043SXuan Ding 		/**
387384d52043SXuan Ding 		 * recover async channel copy related structures and free pktmbufs
387484d52043SXuan Ding 		 * for error pkts.
387584d52043SXuan Ding 		 */
387684d52043SXuan Ding 		async->desc_idx_split -= pkt_err;
387784d52043SXuan Ding 		while (pkt_err-- > 0) {
387884d52043SXuan Ding 			rte_pktmbuf_free(pkts_info[slot_idx & (vq->size - 1)].mbuf);
387984d52043SXuan Ding 			slot_idx--;
388084d52043SXuan Ding 		}
388184d52043SXuan Ding 	}
388284d52043SXuan Ding 
388384d52043SXuan Ding 	async->pkts_idx += pkt_idx;
388484d52043SXuan Ding 	if (async->pkts_idx >= vq->size)
388584d52043SXuan Ding 		async->pkts_idx -= vq->size;
388684d52043SXuan Ding 
388784d52043SXuan Ding out:
388884d52043SXuan Ding 	/* DMA device may serve other queues, unconditionally check completed. */
3889fe8477ebSCheng Jiang 	nr_done_pkts = async_poll_dequeue_completed(dev, vq, pkts, pkts_size,
389084d52043SXuan Ding 							dma_id, vchan_id, legacy_ol_flags);
389184d52043SXuan Ding 
389284d52043SXuan Ding 	return nr_done_pkts;
389384d52043SXuan Ding }
389484d52043SXuan Ding 
389584d52043SXuan Ding __rte_noinline
389684d52043SXuan Ding static uint16_t
389784d52043SXuan Ding virtio_dev_tx_async_split_legacy(struct virtio_net *dev,
389884d52043SXuan Ding 		struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
389984d52043SXuan Ding 		struct rte_mbuf **pkts, uint16_t count,
390084d52043SXuan Ding 		int16_t dma_id, uint16_t vchan_id)
390103f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
3902bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
390384d52043SXuan Ding {
390484d52043SXuan Ding 	return virtio_dev_tx_async_split(dev, vq, mbuf_pool,
390584d52043SXuan Ding 				pkts, count, dma_id, vchan_id, true);
390684d52043SXuan Ding }
390784d52043SXuan Ding 
390884d52043SXuan Ding __rte_noinline
390984d52043SXuan Ding static uint16_t
391084d52043SXuan Ding virtio_dev_tx_async_split_compliant(struct virtio_net *dev,
391184d52043SXuan Ding 		struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
391284d52043SXuan Ding 		struct rte_mbuf **pkts, uint16_t count,
391384d52043SXuan Ding 		int16_t dma_id, uint16_t vchan_id)
391403f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
3915bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
391684d52043SXuan Ding {
391784d52043SXuan Ding 	return virtio_dev_tx_async_split(dev, vq, mbuf_pool,
391884d52043SXuan Ding 				pkts, count, dma_id, vchan_id, false);
391984d52043SXuan Ding }
392084d52043SXuan Ding 
3921fe8477ebSCheng Jiang static __rte_always_inline void
39225c3a6987SCheng Jiang vhost_async_shadow_dequeue_single_packed(struct vhost_virtqueue *vq,
39235c3a6987SCheng Jiang 				uint16_t buf_id, uint16_t count)
392403f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
3925fe8477ebSCheng Jiang {
3926fe8477ebSCheng Jiang 	struct vhost_async *async = vq->async;
3927fe8477ebSCheng Jiang 	uint16_t idx = async->buffer_idx_packed;
3928fe8477ebSCheng Jiang 
3929fe8477ebSCheng Jiang 	async->buffers_packed[idx].id = buf_id;
3930fe8477ebSCheng Jiang 	async->buffers_packed[idx].len = 0;
39315c3a6987SCheng Jiang 	async->buffers_packed[idx].count = count;
3932fe8477ebSCheng Jiang 
3933fe8477ebSCheng Jiang 	async->buffer_idx_packed++;
3934fe8477ebSCheng Jiang 	if (async->buffer_idx_packed >= vq->size)
3935fe8477ebSCheng Jiang 		async->buffer_idx_packed -= vq->size;
3936fe8477ebSCheng Jiang 
3937fe8477ebSCheng Jiang }
3938fe8477ebSCheng Jiang 
3939fe8477ebSCheng Jiang static __rte_always_inline int
3940fe8477ebSCheng Jiang virtio_dev_tx_async_single_packed(struct virtio_net *dev,
3941fe8477ebSCheng Jiang 			struct vhost_virtqueue *vq,
3942fe8477ebSCheng Jiang 			struct rte_mempool *mbuf_pool,
3943fe8477ebSCheng Jiang 			struct rte_mbuf *pkts,
3944fe8477ebSCheng Jiang 			uint16_t slot_idx,
3945fe8477ebSCheng Jiang 			bool legacy_ol_flags)
394603f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
3947bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
3948fe8477ebSCheng Jiang {
3949fe8477ebSCheng Jiang 	int err;
3950fe8477ebSCheng Jiang 	uint16_t buf_id, desc_count = 0;
3951fe8477ebSCheng Jiang 	uint16_t nr_vec = 0;
3952fe8477ebSCheng Jiang 	uint32_t buf_len;
3953fe8477ebSCheng Jiang 	struct buf_vector buf_vec[BUF_VECTOR_MAX];
39545c3a6987SCheng Jiang 	struct vhost_async *async = vq->async;
39555c3a6987SCheng Jiang 	struct async_inflight_info *pkts_info = async->pkts_info;
3956fe8477ebSCheng Jiang 	static bool allocerr_warned;
3957fe8477ebSCheng Jiang 
3958fe8477ebSCheng Jiang 	if (unlikely(fill_vec_buf_packed(dev, vq, vq->last_avail_idx, &desc_count,
3959fe8477ebSCheng Jiang 					 buf_vec, &nr_vec, &buf_id, &buf_len,
3960fe8477ebSCheng Jiang 					 VHOST_ACCESS_RO) < 0))
3961fe8477ebSCheng Jiang 		return -1;
3962fe8477ebSCheng Jiang 
3963fe8477ebSCheng Jiang 	if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) {
3964fe8477ebSCheng Jiang 		if (!allocerr_warned) {
39650e21c7c0SDavid Marchand 			VHOST_DATA_LOG(dev->ifname, ERR, "Failed mbuf alloc of size %d from %s.",
396636c525a0SDavid Marchand 				buf_len, mbuf_pool->name);
3967fe8477ebSCheng Jiang 
3968fe8477ebSCheng Jiang 			allocerr_warned = true;
3969fe8477ebSCheng Jiang 		}
3970fe8477ebSCheng Jiang 		return -1;
3971fe8477ebSCheng Jiang 	}
3972fe8477ebSCheng Jiang 
3973fe8477ebSCheng Jiang 	err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts, mbuf_pool,
3974fe8477ebSCheng Jiang 		legacy_ol_flags, slot_idx, true);
3975fe8477ebSCheng Jiang 	if (unlikely(err)) {
3976fe8477ebSCheng Jiang 		rte_pktmbuf_free(pkts);
3977fe8477ebSCheng Jiang 		if (!allocerr_warned) {
39780e21c7c0SDavid Marchand 			VHOST_DATA_LOG(dev->ifname, ERR, "Failed to copy desc to mbuf on.");
3979fe8477ebSCheng Jiang 			allocerr_warned = true;
3980fe8477ebSCheng Jiang 		}
3981fe8477ebSCheng Jiang 		return -1;
3982fe8477ebSCheng Jiang 	}
3983fe8477ebSCheng Jiang 
39845c3a6987SCheng Jiang 	pkts_info[slot_idx].descs = desc_count;
39855c3a6987SCheng Jiang 
3986fe8477ebSCheng Jiang 	/* update async shadow packed ring */
39875c3a6987SCheng Jiang 	vhost_async_shadow_dequeue_single_packed(vq, buf_id, desc_count);
39885c3a6987SCheng Jiang 
39895c3a6987SCheng Jiang 	vq_inc_last_avail_packed(vq, desc_count);
3990fe8477ebSCheng Jiang 
3991fe8477ebSCheng Jiang 	return err;
3992fe8477ebSCheng Jiang }
3993fe8477ebSCheng Jiang 
3994c2fa52bfSCheng Jiang static __rte_always_inline int
3995c2fa52bfSCheng Jiang virtio_dev_tx_async_packed_batch(struct virtio_net *dev,
3996c2fa52bfSCheng Jiang 			   struct vhost_virtqueue *vq,
3997c2fa52bfSCheng Jiang 			   struct rte_mbuf **pkts, uint16_t slot_idx,
3998c2fa52bfSCheng Jiang 			   uint16_t dma_id, uint16_t vchan_id)
399903f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
4000bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
4001c2fa52bfSCheng Jiang {
4002c2fa52bfSCheng Jiang 	uint16_t avail_idx = vq->last_avail_idx;
4003c2fa52bfSCheng Jiang 	uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
4004c2fa52bfSCheng Jiang 	struct vhost_async *async = vq->async;
4005c2fa52bfSCheng Jiang 	struct async_inflight_info *pkts_info = async->pkts_info;
4006c2fa52bfSCheng Jiang 	struct virtio_net_hdr *hdr;
4007c2fa52bfSCheng Jiang 	uint32_t mbuf_offset = 0;
4008c2fa52bfSCheng Jiang 	uintptr_t desc_addrs[PACKED_BATCH_SIZE];
4009c2fa52bfSCheng Jiang 	uint64_t desc_vva;
4010c2fa52bfSCheng Jiang 	uint64_t lens[PACKED_BATCH_SIZE];
4011c2fa52bfSCheng Jiang 	void *host_iova[PACKED_BATCH_SIZE];
4012c2fa52bfSCheng Jiang 	uint64_t mapped_len[PACKED_BATCH_SIZE];
4013c2fa52bfSCheng Jiang 	uint16_t ids[PACKED_BATCH_SIZE];
4014c2fa52bfSCheng Jiang 	uint16_t i;
4015c2fa52bfSCheng Jiang 
4016c2fa52bfSCheng Jiang 	if (vhost_async_tx_batch_packed_check(dev, vq, pkts, avail_idx,
4017c2fa52bfSCheng Jiang 					     desc_addrs, lens, ids, dma_id, vchan_id))
4018c2fa52bfSCheng Jiang 		return -1;
4019c2fa52bfSCheng Jiang 
4020c2fa52bfSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
4021c2fa52bfSCheng Jiang 		rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
4022c2fa52bfSCheng Jiang 
4023c2fa52bfSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
4024c2fa52bfSCheng Jiang 		host_iova[i] = (void *)(uintptr_t)gpa_to_first_hpa(dev,
4025c2fa52bfSCheng Jiang 			desc_addrs[i] + buf_offset, pkts[i]->pkt_len, &mapped_len[i]);
4026c2fa52bfSCheng Jiang 	}
4027c2fa52bfSCheng Jiang 
4028c2fa52bfSCheng Jiang 	vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
4029c2fa52bfSCheng Jiang 		async_iter_initialize(dev, async);
4030c2fa52bfSCheng Jiang 		async_iter_add_iovec(dev, async,
4031c2fa52bfSCheng Jiang 		host_iova[i],
4032c2fa52bfSCheng Jiang 		(void *)(uintptr_t)rte_pktmbuf_iova_offset(pkts[i], mbuf_offset),
4033c2fa52bfSCheng Jiang 		mapped_len[i]);
4034c2fa52bfSCheng Jiang 		async->iter_idx++;
4035c2fa52bfSCheng Jiang 	}
4036c2fa52bfSCheng Jiang 
4037c2fa52bfSCheng Jiang 	if (virtio_net_with_host_offload(dev)) {
4038c2fa52bfSCheng Jiang 		vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
4039c2fa52bfSCheng Jiang 			desc_vva = vhost_iova_to_vva(dev, vq, desc_addrs[i],
4040c2fa52bfSCheng Jiang 						&lens[i], VHOST_ACCESS_RO);
4041c2fa52bfSCheng Jiang 			hdr = (struct virtio_net_hdr *)(uintptr_t)desc_vva;
4042c2fa52bfSCheng Jiang 			pkts_info[slot_idx + i].nethdr = *hdr;
4043c2fa52bfSCheng Jiang 		}
4044c2fa52bfSCheng Jiang 	}
4045c2fa52bfSCheng Jiang 
4046c2fa52bfSCheng Jiang 	vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE);
4047c2fa52bfSCheng Jiang 
4048c2fa52bfSCheng Jiang 	vhost_async_shadow_dequeue_packed_batch(vq, ids);
4049c2fa52bfSCheng Jiang 
4050c2fa52bfSCheng Jiang 	return 0;
4051c2fa52bfSCheng Jiang }
4052c2fa52bfSCheng Jiang 
4053fe8477ebSCheng Jiang static __rte_always_inline uint16_t
4054fe8477ebSCheng Jiang virtio_dev_tx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
4055fe8477ebSCheng Jiang 		struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
4056fe8477ebSCheng Jiang 		uint16_t count, uint16_t dma_id, uint16_t vchan_id, bool legacy_ol_flags)
405703f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
4058bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
4059fe8477ebSCheng Jiang {
4060c2fa52bfSCheng Jiang 	uint32_t pkt_idx = 0;
4061fe8477ebSCheng Jiang 	uint16_t slot_idx = 0;
4062fe8477ebSCheng Jiang 	uint16_t nr_done_pkts = 0;
4063fe8477ebSCheng Jiang 	uint16_t pkt_err = 0;
4064fe8477ebSCheng Jiang 	uint32_t n_xfer;
4065c2fa52bfSCheng Jiang 	uint16_t i;
4066fe8477ebSCheng Jiang 	struct vhost_async *async = vq->async;
4067fe8477ebSCheng Jiang 	struct async_inflight_info *pkts_info = async->pkts_info;
4068fe8477ebSCheng Jiang 	struct rte_mbuf *pkts_prealloc[MAX_PKT_BURST];
4069fe8477ebSCheng Jiang 
40700e21c7c0SDavid Marchand 	VHOST_DATA_LOG(dev->ifname, DEBUG, "(%d) about to dequeue %u buffers", dev->vid, count);
4071fe8477ebSCheng Jiang 
4072fe8477ebSCheng Jiang 	async_iter_reset(async);
4073fe8477ebSCheng Jiang 
4074458dc624SMaxime Coquelin 	if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count)) {
4075458dc624SMaxime Coquelin 		vq->stats.mbuf_alloc_failed += count;
4076fe8477ebSCheng Jiang 		goto out;
4077458dc624SMaxime Coquelin 	}
4078fe8477ebSCheng Jiang 
4079c2fa52bfSCheng Jiang 	do {
4080fe8477ebSCheng Jiang 		struct rte_mbuf *pkt = pkts_prealloc[pkt_idx];
4081fe8477ebSCheng Jiang 
4082fe8477ebSCheng Jiang 		rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
4083fe8477ebSCheng Jiang 
4084fe8477ebSCheng Jiang 		slot_idx = (async->pkts_idx + pkt_idx) % vq->size;
4085c2fa52bfSCheng Jiang 		if (count - pkt_idx >= PACKED_BATCH_SIZE) {
4086c2fa52bfSCheng Jiang 			if (!virtio_dev_tx_async_packed_batch(dev, vq, &pkts_prealloc[pkt_idx],
4087c2fa52bfSCheng Jiang 						slot_idx, dma_id, vchan_id)) {
4088c2fa52bfSCheng Jiang 				for (i = 0; i < PACKED_BATCH_SIZE; i++) {
4089c2fa52bfSCheng Jiang 					slot_idx = (async->pkts_idx + pkt_idx) % vq->size;
4090c2fa52bfSCheng Jiang 					pkts_info[slot_idx].descs = 1;
4091c2fa52bfSCheng Jiang 					pkts_info[slot_idx].nr_buffers = 1;
4092c2fa52bfSCheng Jiang 					pkts_info[slot_idx].mbuf = pkts_prealloc[pkt_idx];
4093c2fa52bfSCheng Jiang 					pkt_idx++;
4094c2fa52bfSCheng Jiang 				}
4095c2fa52bfSCheng Jiang 				continue;
4096c2fa52bfSCheng Jiang 			}
4097c2fa52bfSCheng Jiang 		}
4098c2fa52bfSCheng Jiang 
4099fe8477ebSCheng Jiang 		if (unlikely(virtio_dev_tx_async_single_packed(dev, vq, mbuf_pool, pkt,
4100fe8477ebSCheng Jiang 				slot_idx, legacy_ol_flags))) {
4101fe8477ebSCheng Jiang 			rte_pktmbuf_free_bulk(&pkts_prealloc[pkt_idx], count - pkt_idx);
4102fd03876eSCheng Jiang 
4103fd03876eSCheng Jiang 			if (slot_idx == 0)
4104fd03876eSCheng Jiang 				slot_idx = vq->size - 1;
4105fd03876eSCheng Jiang 			else
4106fd03876eSCheng Jiang 				slot_idx--;
4107fd03876eSCheng Jiang 
4108fe8477ebSCheng Jiang 			break;
4109fe8477ebSCheng Jiang 		}
4110fe8477ebSCheng Jiang 
4111fe8477ebSCheng Jiang 		pkts_info[slot_idx].mbuf = pkt;
4112c2fa52bfSCheng Jiang 		pkt_idx++;
4113c2fa52bfSCheng Jiang 	} while (pkt_idx < count);
4114fe8477ebSCheng Jiang 
4115fe8477ebSCheng Jiang 	n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx,
4116fe8477ebSCheng Jiang 					async->iov_iter, pkt_idx);
4117fe8477ebSCheng Jiang 
4118fe8477ebSCheng Jiang 	async->pkts_inflight_n += n_xfer;
4119fe8477ebSCheng Jiang 
4120fe8477ebSCheng Jiang 	pkt_err = pkt_idx - n_xfer;
4121fe8477ebSCheng Jiang 
4122fe8477ebSCheng Jiang 	if (unlikely(pkt_err)) {
41235c3a6987SCheng Jiang 		uint16_t descs_err = 0;
41245c3a6987SCheng Jiang 
4125fe8477ebSCheng Jiang 		pkt_idx -= pkt_err;
4126fe8477ebSCheng Jiang 
4127fe8477ebSCheng Jiang 		/**
4128fe8477ebSCheng Jiang 		 * recover DMA-copy related structures and free pktmbuf for DMA-error pkts.
4129fe8477ebSCheng Jiang 		 */
4130fe8477ebSCheng Jiang 		if (async->buffer_idx_packed >= pkt_err)
4131fe8477ebSCheng Jiang 			async->buffer_idx_packed -= pkt_err;
4132fe8477ebSCheng Jiang 		else
4133fe8477ebSCheng Jiang 			async->buffer_idx_packed += vq->size - pkt_err;
4134fe8477ebSCheng Jiang 
4135fe8477ebSCheng Jiang 		while (pkt_err-- > 0) {
4136fd03876eSCheng Jiang 			rte_pktmbuf_free(pkts_info[slot_idx].mbuf);
4137fd03876eSCheng Jiang 			descs_err += pkts_info[slot_idx].descs;
4138fd03876eSCheng Jiang 
4139fd03876eSCheng Jiang 			if (slot_idx == 0)
4140fd03876eSCheng Jiang 				slot_idx = vq->size - 1;
4141fd03876eSCheng Jiang 			else
4142fe8477ebSCheng Jiang 				slot_idx--;
4143fe8477ebSCheng Jiang 		}
4144fe8477ebSCheng Jiang 
4145fe8477ebSCheng Jiang 		/* recover available ring */
41465c3a6987SCheng Jiang 		if (vq->last_avail_idx >= descs_err) {
41475c3a6987SCheng Jiang 			vq->last_avail_idx -= descs_err;
4148fe8477ebSCheng Jiang 		} else {
41495c3a6987SCheng Jiang 			vq->last_avail_idx += vq->size - descs_err;
4150fe8477ebSCheng Jiang 			vq->avail_wrap_counter ^= 1;
4151fe8477ebSCheng Jiang 		}
415215677ca2SMaxime Coquelin 		vhost_virtqueue_reconnect_log_packed(vq);
4153fe8477ebSCheng Jiang 	}
4154fe8477ebSCheng Jiang 
4155fe8477ebSCheng Jiang 	async->pkts_idx += pkt_idx;
4156fe8477ebSCheng Jiang 	if (async->pkts_idx >= vq->size)
4157fe8477ebSCheng Jiang 		async->pkts_idx -= vq->size;
4158fe8477ebSCheng Jiang 
4159fe8477ebSCheng Jiang out:
4160fe8477ebSCheng Jiang 	nr_done_pkts = async_poll_dequeue_completed(dev, vq, pkts, count,
4161fe8477ebSCheng Jiang 					dma_id, vchan_id, legacy_ol_flags);
4162fe8477ebSCheng Jiang 
4163fe8477ebSCheng Jiang 	return nr_done_pkts;
4164fe8477ebSCheng Jiang }
4165fe8477ebSCheng Jiang 
4166fe8477ebSCheng Jiang __rte_noinline
4167fe8477ebSCheng Jiang static uint16_t
4168fe8477ebSCheng Jiang virtio_dev_tx_async_packed_legacy(struct virtio_net *dev, struct vhost_virtqueue *vq,
4169fe8477ebSCheng Jiang 		struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
4170fe8477ebSCheng Jiang 		uint16_t count, uint16_t dma_id, uint16_t vchan_id)
417103f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
4172bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
4173fe8477ebSCheng Jiang {
4174fe8477ebSCheng Jiang 	return virtio_dev_tx_async_packed(dev, vq, mbuf_pool,
4175fe8477ebSCheng Jiang 				pkts, count, dma_id, vchan_id, true);
4176fe8477ebSCheng Jiang }
4177fe8477ebSCheng Jiang 
4178fe8477ebSCheng Jiang __rte_noinline
4179fe8477ebSCheng Jiang static uint16_t
4180fe8477ebSCheng Jiang virtio_dev_tx_async_packed_compliant(struct virtio_net *dev, struct vhost_virtqueue *vq,
4181fe8477ebSCheng Jiang 		struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
4182fe8477ebSCheng Jiang 		uint16_t count, uint16_t dma_id, uint16_t vchan_id)
418303f77d66SEelco Chaudron 	__rte_shared_locks_required(&vq->access_lock)
4184bf42fb30SDavid Marchand 	__rte_shared_locks_required(&vq->iotlb_lock)
4185fe8477ebSCheng Jiang {
4186fe8477ebSCheng Jiang 	return virtio_dev_tx_async_packed(dev, vq, mbuf_pool,
4187fe8477ebSCheng Jiang 				pkts, count, dma_id, vchan_id, false);
4188fe8477ebSCheng Jiang }
4189fe8477ebSCheng Jiang 
419084d52043SXuan Ding uint16_t
419184d52043SXuan Ding rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id,
419284d52043SXuan Ding 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
419384d52043SXuan Ding 	int *nr_inflight, int16_t dma_id, uint16_t vchan_id)
419484d52043SXuan Ding {
419584d52043SXuan Ding 	struct virtio_net *dev;
419684d52043SXuan Ding 	struct vhost_virtqueue *vq;
419784d52043SXuan Ding 	int16_t success = 1;
41986ee0cf80SMaxime Coquelin 	uint16_t nb_rx = 0;
419984d52043SXuan Ding 
420084d52043SXuan Ding 	dev = get_device(vid);
420184d52043SXuan Ding 	if (!dev || !nr_inflight)
42026ee0cf80SMaxime Coquelin 		goto out_no_unlock;
420384d52043SXuan Ding 
420484d52043SXuan Ding 	*nr_inflight = -1;
420584d52043SXuan Ding 
420684d52043SXuan Ding 	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
42070e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR, "%s: built-in vhost net backend is disabled.",
420836c525a0SDavid Marchand 			__func__);
42096ee0cf80SMaxime Coquelin 		goto out_no_unlock;
421084d52043SXuan Ding 	}
421184d52043SXuan Ding 
421284d52043SXuan Ding 	if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {
42130e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR, "%s: invalid virtqueue idx %d.",
421436c525a0SDavid Marchand 			__func__, queue_id);
42156ee0cf80SMaxime Coquelin 		goto out_no_unlock;
421684d52043SXuan Ding 	}
421784d52043SXuan Ding 
421884d52043SXuan Ding 	if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) {
42190e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR, "%s: invalid dma id %d.",
422036c525a0SDavid Marchand 			__func__, dma_id);
42216ee0cf80SMaxime Coquelin 		goto out_no_unlock;
422284d52043SXuan Ding 	}
422384d52043SXuan Ding 
422484d52043SXuan Ding 	if (unlikely(!dma_copy_track[dma_id].vchans ||
422584d52043SXuan Ding 				!dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) {
42260e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR, "%s: invalid channel %d:%u.",
422736c525a0SDavid Marchand 			__func__, dma_id, vchan_id);
42286ee0cf80SMaxime Coquelin 		goto out_no_unlock;
422984d52043SXuan Ding 	}
423084d52043SXuan Ding 
423184d52043SXuan Ding 	vq = dev->virtqueue[queue_id];
423284d52043SXuan Ding 
423303f77d66SEelco Chaudron 	if (unlikely(rte_rwlock_read_trylock(&vq->access_lock) != 0))
42346ee0cf80SMaxime Coquelin 		goto out_no_unlock;
423584d52043SXuan Ding 
42366ee0cf80SMaxime Coquelin 	if (unlikely(vq->enabled == 0))
423784d52043SXuan Ding 		goto out_access_unlock;
423884d52043SXuan Ding 
423984d52043SXuan Ding 	if (unlikely(!vq->async)) {
42400e21c7c0SDavid Marchand 		VHOST_DATA_LOG(dev->ifname, ERR, "%s: async not registered for queue id %d.",
424136c525a0SDavid Marchand 			__func__, queue_id);
424284d52043SXuan Ding 		goto out_access_unlock;
424384d52043SXuan Ding 	}
424484d52043SXuan Ding 
424584d52043SXuan Ding 	vhost_user_iotlb_rd_lock(vq);
424684d52043SXuan Ding 
42479fc93a1eSDavid Marchand 	if (unlikely(vq->access_ok == 0)) {
42489fc93a1eSDavid Marchand 		vhost_user_iotlb_rd_unlock(vq);
42499fc93a1eSDavid Marchand 		rte_rwlock_read_unlock(&vq->access_lock);
42509fc93a1eSDavid Marchand 
42519fc93a1eSDavid Marchand 		virtio_dev_vring_translate(dev, vq);
42529fc93a1eSDavid Marchand 		goto out_no_unlock;
425384d52043SXuan Ding 	}
425484d52043SXuan Ding 
425584d52043SXuan Ding 	/*
425684d52043SXuan Ding 	 * Construct a RARP broadcast packet, and inject it to the "pkts"
425784d52043SXuan Ding 	 * array, to looks like that guest actually send such packet.
425884d52043SXuan Ding 	 *
425984d52043SXuan Ding 	 * Check user_send_rarp() for more information.
426084d52043SXuan Ding 	 *
426184d52043SXuan Ding 	 * broadcast_rarp shares a cacheline in the virtio_net structure
426284d52043SXuan Ding 	 * with some fields that are accessed during enqueue and
42635147b641STyler Retzlaff 	 * rte_atomic_compare_exchange_strong_explicit causes a write if performed compare
426484d52043SXuan Ding 	 * and exchange. This could result in false sharing between enqueue
426584d52043SXuan Ding 	 * and dequeue.
426684d52043SXuan Ding 	 *
426784d52043SXuan Ding 	 * Prevent unnecessary false sharing by reading broadcast_rarp first
426884d52043SXuan Ding 	 * and only performing compare and exchange if the read indicates it
426984d52043SXuan Ding 	 * is likely to be set.
427084d52043SXuan Ding 	 */
42715147b641STyler Retzlaff 	if (unlikely(rte_atomic_load_explicit(&dev->broadcast_rarp, rte_memory_order_acquire) &&
42725147b641STyler Retzlaff 			rte_atomic_compare_exchange_strong_explicit(&dev->broadcast_rarp,
42735147b641STyler Retzlaff 			&success, 0, rte_memory_order_release, rte_memory_order_relaxed))) {
4274*6d7e741bSMaxime Coquelin 		/*
4275*6d7e741bSMaxime Coquelin 		 * Inject the RARP packet to the head of "pkts" array,
4276*6d7e741bSMaxime Coquelin 		 * so that switch's mac learning table will get updated first.
4277*6d7e741bSMaxime Coquelin 		 */
4278*6d7e741bSMaxime Coquelin 		pkts[nb_rx] = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
4279*6d7e741bSMaxime Coquelin 		if (pkts[nb_rx] == NULL) {
42800e21c7c0SDavid Marchand 			VHOST_DATA_LOG(dev->ifname, ERR, "failed to make RARP packet.");
428184d52043SXuan Ding 			goto out;
428284d52043SXuan Ding 		}
4283*6d7e741bSMaxime Coquelin 		nb_rx += 1;
428484d52043SXuan Ding 	}
428584d52043SXuan Ding 
4286fe8477ebSCheng Jiang 	if (vq_is_packed(dev)) {
428784d52043SXuan Ding 		if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
4288*6d7e741bSMaxime Coquelin 			nb_rx += virtio_dev_tx_async_packed_legacy(dev, vq, mbuf_pool,
4289*6d7e741bSMaxime Coquelin 					pkts + nb_rx, count - nb_rx, dma_id, vchan_id);
429084d52043SXuan Ding 		else
4291*6d7e741bSMaxime Coquelin 			nb_rx += virtio_dev_tx_async_packed_compliant(dev, vq, mbuf_pool,
4292*6d7e741bSMaxime Coquelin 					pkts + nb_rx, count - nb_rx, dma_id, vchan_id);
4293fe8477ebSCheng Jiang 	} else {
4294fe8477ebSCheng Jiang 		if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
4295*6d7e741bSMaxime Coquelin 			nb_rx += virtio_dev_tx_async_split_legacy(dev, vq, mbuf_pool,
4296*6d7e741bSMaxime Coquelin 					pkts + nb_rx, count - nb_rx, dma_id, vchan_id);
4297fe8477ebSCheng Jiang 		else
4298*6d7e741bSMaxime Coquelin 			nb_rx += virtio_dev_tx_async_split_compliant(dev, vq, mbuf_pool,
4299*6d7e741bSMaxime Coquelin 					pkts + nb_rx, count - nb_rx, dma_id, vchan_id);
4300fe8477ebSCheng Jiang 	}
430184d52043SXuan Ding 
430284d52043SXuan Ding 	*nr_inflight = vq->async->pkts_inflight_n;
43036ee0cf80SMaxime Coquelin 	vhost_queue_stats_update(dev, vq, pkts, nb_rx);
430484d52043SXuan Ding 
430584d52043SXuan Ding out:
430684d52043SXuan Ding 	vhost_user_iotlb_rd_unlock(vq);
430784d52043SXuan Ding 
430884d52043SXuan Ding out_access_unlock:
430903f77d66SEelco Chaudron 	rte_rwlock_read_unlock(&vq->access_lock);
431084d52043SXuan Ding 
43119fc93a1eSDavid Marchand out_no_unlock:
43126ee0cf80SMaxime Coquelin 	return nb_rx;
431384d52043SXuan Ding }
4314