xref: /dpdk/drivers/net/virtio/virtio_rxtx.c (revision 6958e40dbc189ccd287983ea91e257d5ccf26810)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10 
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27 
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35 
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41 
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45 	struct virtnet_rx *rxvq = rxq;
46 	struct virtqueue *vq = rxvq->vq;
47 
48 	return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50 
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54 	vq->vq_free_cnt += num;
55 	vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57 
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61 	struct vring_desc *dp, *dp_tail;
62 	struct vq_desc_extra *dxp;
63 	uint16_t desc_idx_last = desc_idx;
64 
65 	dp  = &vq->vq_split.ring.desc[desc_idx];
66 	dxp = &vq->vq_descx[desc_idx];
67 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68 	if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69 		while (dp->flags & VRING_DESC_F_NEXT) {
70 			desc_idx_last = dp->next;
71 			dp = &vq->vq_split.ring.desc[dp->next];
72 		}
73 	}
74 	dxp->ndescs = 0;
75 
76 	/*
77 	 * We must append the existing free chain, if any, to the end of
78 	 * newly freed chain. If the virtqueue was completely used, then
79 	 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80 	 */
81 	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82 		vq->vq_desc_head_idx = desc_idx;
83 	} else {
84 		dp_tail = &vq->vq_split.ring.desc[vq->vq_desc_tail_idx];
85 		dp_tail->next = desc_idx;
86 	}
87 
88 	vq->vq_desc_tail_idx = desc_idx_last;
89 	dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91 
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95 	struct vq_desc_extra *dxp;
96 
97 	dxp = &vq->vq_descx[id];
98 	vq->vq_free_cnt += dxp->ndescs;
99 
100 	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101 		vq->vq_desc_head_idx = id;
102 	else
103 		vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104 
105 	vq->vq_desc_tail_idx = id;
106 	dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108 
109 void
110 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
111 {
112 	uint32_t s = mbuf->pkt_len;
113 	struct rte_ether_addr *ea;
114 
115 	stats->bytes += s;
116 
117 	if (s == 64) {
118 		stats->size_bins[1]++;
119 	} else if (s > 64 && s < 1024) {
120 		uint32_t bin;
121 
122 		/* count zeros, and offset into correct bin */
123 		bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
124 		stats->size_bins[bin]++;
125 	} else {
126 		if (s < 64)
127 			stats->size_bins[0]++;
128 		else if (s < 1519)
129 			stats->size_bins[6]++;
130 		else
131 			stats->size_bins[7]++;
132 	}
133 
134 	ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
135 	if (rte_is_multicast_ether_addr(ea)) {
136 		if (rte_is_broadcast_ether_addr(ea))
137 			stats->broadcast++;
138 		else
139 			stats->multicast++;
140 	}
141 }
142 
143 static inline void
144 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
145 {
146 	VIRTIO_DUMP_PACKET(m, m->data_len);
147 
148 	virtio_update_packet_stats(&rxvq->stats, m);
149 }
150 
151 static uint16_t
152 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
153 				  struct rte_mbuf **rx_pkts,
154 				  uint32_t *len,
155 				  uint16_t num)
156 {
157 	struct rte_mbuf *cookie;
158 	uint16_t used_idx;
159 	uint16_t id;
160 	struct vring_packed_desc *desc;
161 	uint16_t i;
162 
163 	desc = vq->vq_packed.ring.desc;
164 
165 	for (i = 0; i < num; i++) {
166 		used_idx = vq->vq_used_cons_idx;
167 		if (!desc_is_used(&desc[used_idx], vq))
168 			return i;
169 		virtio_rmb(vq->hw->weak_barriers);
170 		len[i] = desc[used_idx].len;
171 		id = desc[used_idx].id;
172 		cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
173 		if (unlikely(cookie == NULL)) {
174 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
175 				vq->vq_used_cons_idx);
176 			break;
177 		}
178 		rte_prefetch0(cookie);
179 		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
180 		rx_pkts[i] = cookie;
181 
182 		vq->vq_free_cnt++;
183 		vq->vq_used_cons_idx++;
184 		if (vq->vq_used_cons_idx >= vq->vq_nentries) {
185 			vq->vq_used_cons_idx -= vq->vq_nentries;
186 			vq->vq_packed.used_wrap_counter ^= 1;
187 		}
188 	}
189 
190 	return i;
191 }
192 
193 static uint16_t
194 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
195 			   uint32_t *len, uint16_t num)
196 {
197 	struct vring_used_elem *uep;
198 	struct rte_mbuf *cookie;
199 	uint16_t used_idx, desc_idx;
200 	uint16_t i;
201 
202 	/*  Caller does the check */
203 	for (i = 0; i < num ; i++) {
204 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
205 		uep = &vq->vq_split.ring.used->ring[used_idx];
206 		desc_idx = (uint16_t) uep->id;
207 		len[i] = uep->len;
208 		cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
209 
210 		if (unlikely(cookie == NULL)) {
211 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
212 				vq->vq_used_cons_idx);
213 			break;
214 		}
215 
216 		rte_prefetch0(cookie);
217 		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
218 		rx_pkts[i]  = cookie;
219 		vq->vq_used_cons_idx++;
220 		vq_ring_free_chain(vq, desc_idx);
221 		vq->vq_descx[desc_idx].cookie = NULL;
222 	}
223 
224 	return i;
225 }
226 
227 static uint16_t
228 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
229 			struct rte_mbuf **rx_pkts,
230 			uint32_t *len,
231 			uint16_t num)
232 {
233 	struct vring_used_elem *uep;
234 	struct rte_mbuf *cookie;
235 	uint16_t used_idx = 0;
236 	uint16_t i;
237 
238 	if (unlikely(num == 0))
239 		return 0;
240 
241 	for (i = 0; i < num; i++) {
242 		used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
243 		/* Desc idx same as used idx */
244 		uep = &vq->vq_split.ring.used->ring[used_idx];
245 		len[i] = uep->len;
246 		cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
247 
248 		if (unlikely(cookie == NULL)) {
249 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
250 				vq->vq_used_cons_idx);
251 			break;
252 		}
253 
254 		rte_prefetch0(cookie);
255 		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
256 		rx_pkts[i]  = cookie;
257 		vq->vq_used_cons_idx++;
258 		vq->vq_descx[used_idx].cookie = NULL;
259 	}
260 
261 	vq_ring_free_inorder(vq, used_idx, i);
262 	return i;
263 }
264 
265 #ifndef DEFAULT_TX_FREE_THRESH
266 #define DEFAULT_TX_FREE_THRESH 32
267 #endif
268 
269 static void
270 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
271 {
272 	uint16_t used_idx, id, curr_id, free_cnt = 0;
273 	uint16_t size = vq->vq_nentries;
274 	struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
275 	struct vq_desc_extra *dxp;
276 
277 	used_idx = vq->vq_used_cons_idx;
278 	while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
279 		virtio_rmb(vq->hw->weak_barriers);
280 		id = desc[used_idx].id;
281 		do {
282 			curr_id = used_idx;
283 			dxp = &vq->vq_descx[used_idx];
284 			used_idx += dxp->ndescs;
285 			free_cnt += dxp->ndescs;
286 			num -= dxp->ndescs;
287 			if (used_idx >= size) {
288 				used_idx -= size;
289 				vq->vq_packed.used_wrap_counter ^= 1;
290 			}
291 			if (dxp->cookie != NULL) {
292 				rte_pktmbuf_free(dxp->cookie);
293 				dxp->cookie = NULL;
294 			}
295 		} while (curr_id != id);
296 	}
297 	vq->vq_used_cons_idx = used_idx;
298 	vq->vq_free_cnt += free_cnt;
299 }
300 
301 static void
302 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
303 {
304 	uint16_t used_idx, id;
305 	uint16_t size = vq->vq_nentries;
306 	struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
307 	struct vq_desc_extra *dxp;
308 
309 	used_idx = vq->vq_used_cons_idx;
310 	while (num-- && desc_is_used(&desc[used_idx], vq)) {
311 		virtio_rmb(vq->hw->weak_barriers);
312 		id = desc[used_idx].id;
313 		dxp = &vq->vq_descx[id];
314 		vq->vq_used_cons_idx += dxp->ndescs;
315 		if (vq->vq_used_cons_idx >= size) {
316 			vq->vq_used_cons_idx -= size;
317 			vq->vq_packed.used_wrap_counter ^= 1;
318 		}
319 		vq_ring_free_id_packed(vq, id);
320 		if (dxp->cookie != NULL) {
321 			rte_pktmbuf_free(dxp->cookie);
322 			dxp->cookie = NULL;
323 		}
324 		used_idx = vq->vq_used_cons_idx;
325 	}
326 }
327 
328 /* Cleanup from completed transmits. */
329 static inline void
330 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order)
331 {
332 	if (in_order)
333 		virtio_xmit_cleanup_inorder_packed(vq, num);
334 	else
335 		virtio_xmit_cleanup_normal_packed(vq, num);
336 }
337 
338 static void
339 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
340 {
341 	uint16_t i, used_idx, desc_idx;
342 	for (i = 0; i < num; i++) {
343 		struct vring_used_elem *uep;
344 		struct vq_desc_extra *dxp;
345 
346 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
347 		uep = &vq->vq_split.ring.used->ring[used_idx];
348 
349 		desc_idx = (uint16_t) uep->id;
350 		dxp = &vq->vq_descx[desc_idx];
351 		vq->vq_used_cons_idx++;
352 		vq_ring_free_chain(vq, desc_idx);
353 
354 		if (dxp->cookie != NULL) {
355 			rte_pktmbuf_free(dxp->cookie);
356 			dxp->cookie = NULL;
357 		}
358 	}
359 }
360 
361 /* Cleanup from completed inorder transmits. */
362 static __rte_always_inline void
363 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
364 {
365 	uint16_t i, idx = vq->vq_used_cons_idx;
366 	int16_t free_cnt = 0;
367 	struct vq_desc_extra *dxp = NULL;
368 
369 	if (unlikely(num == 0))
370 		return;
371 
372 	for (i = 0; i < num; i++) {
373 		dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
374 		free_cnt += dxp->ndescs;
375 		if (dxp->cookie != NULL) {
376 			rte_pktmbuf_free(dxp->cookie);
377 			dxp->cookie = NULL;
378 		}
379 	}
380 
381 	vq->vq_free_cnt += free_cnt;
382 	vq->vq_used_cons_idx = idx;
383 }
384 
385 static inline int
386 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
387 			struct rte_mbuf **cookies,
388 			uint16_t num)
389 {
390 	struct vq_desc_extra *dxp;
391 	struct virtio_hw *hw = vq->hw;
392 	struct vring_desc *start_dp;
393 	uint16_t head_idx, idx, i = 0;
394 
395 	if (unlikely(vq->vq_free_cnt == 0))
396 		return -ENOSPC;
397 	if (unlikely(vq->vq_free_cnt < num))
398 		return -EMSGSIZE;
399 
400 	head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
401 	start_dp = vq->vq_split.ring.desc;
402 
403 	while (i < num) {
404 		idx = head_idx & (vq->vq_nentries - 1);
405 		dxp = &vq->vq_descx[idx];
406 		dxp->cookie = (void *)cookies[i];
407 		dxp->ndescs = 1;
408 
409 		start_dp[idx].addr =
410 				VIRTIO_MBUF_ADDR(cookies[i], vq) +
411 				RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
412 		start_dp[idx].len =
413 				cookies[i]->buf_len -
414 				RTE_PKTMBUF_HEADROOM +
415 				hw->vtnet_hdr_size;
416 		start_dp[idx].flags =  VRING_DESC_F_WRITE;
417 
418 		vq_update_avail_ring(vq, idx);
419 		head_idx++;
420 		i++;
421 	}
422 
423 	vq->vq_desc_head_idx += num;
424 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
425 	return 0;
426 }
427 
428 static inline int
429 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie,
430 				uint16_t num)
431 {
432 	struct vq_desc_extra *dxp;
433 	struct virtio_hw *hw = vq->hw;
434 	struct vring_desc *start_dp = vq->vq_split.ring.desc;
435 	uint16_t idx, i;
436 
437 	if (unlikely(vq->vq_free_cnt == 0))
438 		return -ENOSPC;
439 	if (unlikely(vq->vq_free_cnt < num))
440 		return -EMSGSIZE;
441 
442 	if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries))
443 		return -EFAULT;
444 
445 	for (i = 0; i < num; i++) {
446 		idx = vq->vq_desc_head_idx;
447 		dxp = &vq->vq_descx[idx];
448 		dxp->cookie = (void *)cookie[i];
449 		dxp->ndescs = 1;
450 
451 		start_dp[idx].addr =
452 			VIRTIO_MBUF_ADDR(cookie[i], vq) +
453 			RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
454 		start_dp[idx].len =
455 			cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +
456 			hw->vtnet_hdr_size;
457 		start_dp[idx].flags = VRING_DESC_F_WRITE;
458 		vq->vq_desc_head_idx = start_dp[idx].next;
459 		vq_update_avail_ring(vq, idx);
460 		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
461 			vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
462 			break;
463 		}
464 	}
465 
466 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
467 
468 	return 0;
469 }
470 
471 static inline int
472 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
473 				     struct rte_mbuf **cookie, uint16_t num)
474 {
475 	struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
476 	uint16_t flags = vq->vq_packed.cached_flags;
477 	struct virtio_hw *hw = vq->hw;
478 	struct vq_desc_extra *dxp;
479 	uint16_t idx;
480 	int i;
481 
482 	if (unlikely(vq->vq_free_cnt == 0))
483 		return -ENOSPC;
484 	if (unlikely(vq->vq_free_cnt < num))
485 		return -EMSGSIZE;
486 
487 	for (i = 0; i < num; i++) {
488 		idx = vq->vq_avail_idx;
489 		dxp = &vq->vq_descx[idx];
490 		dxp->cookie = (void *)cookie[i];
491 		dxp->ndescs = 1;
492 
493 		start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
494 				RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
495 		start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
496 					+ hw->vtnet_hdr_size;
497 
498 		vq->vq_desc_head_idx = dxp->next;
499 		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
500 			vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
501 		virtio_wmb(hw->weak_barriers);
502 		start_dp[idx].flags = flags;
503 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
504 			vq->vq_avail_idx -= vq->vq_nentries;
505 			vq->vq_packed.cached_flags ^=
506 				VRING_PACKED_DESC_F_AVAIL_USED;
507 			flags = vq->vq_packed.cached_flags;
508 		}
509 	}
510 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
511 	return 0;
512 }
513 
514 /* When doing TSO, the IP length is not included in the pseudo header
515  * checksum of the packet given to the PMD, but for virtio it is
516  * expected.
517  */
518 static void
519 virtio_tso_fix_cksum(struct rte_mbuf *m)
520 {
521 	/* common case: header is not fragmented */
522 	if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
523 			m->l4_len)) {
524 		struct rte_ipv4_hdr *iph;
525 		struct rte_ipv6_hdr *ip6h;
526 		struct rte_tcp_hdr *th;
527 		uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
528 		uint32_t tmp;
529 
530 		iph = rte_pktmbuf_mtod_offset(m,
531 					struct rte_ipv4_hdr *, m->l2_len);
532 		th = RTE_PTR_ADD(iph, m->l3_len);
533 		if ((iph->version_ihl >> 4) == 4) {
534 			iph->hdr_checksum = 0;
535 			iph->hdr_checksum = rte_ipv4_cksum(iph);
536 			ip_len = iph->total_length;
537 			ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
538 				m->l3_len);
539 		} else {
540 			ip6h = (struct rte_ipv6_hdr *)iph;
541 			ip_paylen = ip6h->payload_len;
542 		}
543 
544 		/* calculate the new phdr checksum not including ip_paylen */
545 		prev_cksum = th->cksum;
546 		tmp = prev_cksum;
547 		tmp += ip_paylen;
548 		tmp = (tmp & 0xffff) + (tmp >> 16);
549 		new_cksum = tmp;
550 
551 		/* replace it in the packet */
552 		th->cksum = new_cksum;
553 	}
554 }
555 
556 
557 /* avoid write operation when necessary, to lessen cache issues */
558 #define ASSIGN_UNLESS_EQUAL(var, val) do {	\
559 	if ((var) != (val))			\
560 		(var) = (val);			\
561 } while (0)
562 
563 #define virtqueue_clear_net_hdr(_hdr) do {		\
564 	ASSIGN_UNLESS_EQUAL((_hdr)->csum_start, 0);	\
565 	ASSIGN_UNLESS_EQUAL((_hdr)->csum_offset, 0);	\
566 	ASSIGN_UNLESS_EQUAL((_hdr)->flags, 0);		\
567 	ASSIGN_UNLESS_EQUAL((_hdr)->gso_type, 0);	\
568 	ASSIGN_UNLESS_EQUAL((_hdr)->gso_size, 0);	\
569 	ASSIGN_UNLESS_EQUAL((_hdr)->hdr_len, 0);	\
570 } while (0)
571 
572 static inline void
573 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
574 			struct rte_mbuf *cookie,
575 			bool offload)
576 {
577 	if (offload) {
578 		if (cookie->ol_flags & PKT_TX_TCP_SEG)
579 			cookie->ol_flags |= PKT_TX_TCP_CKSUM;
580 
581 		switch (cookie->ol_flags & PKT_TX_L4_MASK) {
582 		case PKT_TX_UDP_CKSUM:
583 			hdr->csum_start = cookie->l2_len + cookie->l3_len;
584 			hdr->csum_offset = offsetof(struct rte_udp_hdr,
585 				dgram_cksum);
586 			hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
587 			break;
588 
589 		case PKT_TX_TCP_CKSUM:
590 			hdr->csum_start = cookie->l2_len + cookie->l3_len;
591 			hdr->csum_offset = offsetof(struct rte_tcp_hdr, cksum);
592 			hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
593 			break;
594 
595 		default:
596 			ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
597 			ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
598 			ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
599 			break;
600 		}
601 
602 		/* TCP Segmentation Offload */
603 		if (cookie->ol_flags & PKT_TX_TCP_SEG) {
604 			hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
605 				VIRTIO_NET_HDR_GSO_TCPV6 :
606 				VIRTIO_NET_HDR_GSO_TCPV4;
607 			hdr->gso_size = cookie->tso_segsz;
608 			hdr->hdr_len =
609 				cookie->l2_len +
610 				cookie->l3_len +
611 				cookie->l4_len;
612 		} else {
613 			ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
614 			ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
615 			ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
616 		}
617 	}
618 }
619 
620 static inline void
621 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
622 			struct rte_mbuf **cookies,
623 			uint16_t num)
624 {
625 	struct vq_desc_extra *dxp;
626 	struct virtqueue *vq = txvq->vq;
627 	struct vring_desc *start_dp;
628 	struct virtio_net_hdr *hdr;
629 	uint16_t idx;
630 	uint16_t head_size = vq->hw->vtnet_hdr_size;
631 	uint16_t i = 0;
632 
633 	idx = vq->vq_desc_head_idx;
634 	start_dp = vq->vq_split.ring.desc;
635 
636 	while (i < num) {
637 		idx = idx & (vq->vq_nentries - 1);
638 		dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
639 		dxp->cookie = (void *)cookies[i];
640 		dxp->ndescs = 1;
641 		virtio_update_packet_stats(&txvq->stats, cookies[i]);
642 
643 		hdr = (struct virtio_net_hdr *)
644 			rte_pktmbuf_prepend(cookies[i], head_size);
645 		cookies[i]->pkt_len -= head_size;
646 
647 		/* if offload disabled, hdr is not zeroed yet, do it now */
648 		if (!vq->hw->has_tx_offload)
649 			virtqueue_clear_net_hdr(hdr);
650 		else
651 			virtqueue_xmit_offload(hdr, cookies[i], true);
652 
653 		start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
654 		start_dp[idx].len   = cookies[i]->data_len;
655 		start_dp[idx].flags = 0;
656 
657 		vq_update_avail_ring(vq, idx);
658 
659 		idx++;
660 		i++;
661 	};
662 
663 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
664 	vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
665 }
666 
667 static inline void
668 virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
669 				   struct rte_mbuf *cookie,
670 				   int in_order)
671 {
672 	struct virtqueue *vq = txvq->vq;
673 	struct vring_packed_desc *dp;
674 	struct vq_desc_extra *dxp;
675 	uint16_t idx, id, flags;
676 	uint16_t head_size = vq->hw->vtnet_hdr_size;
677 	struct virtio_net_hdr *hdr;
678 
679 	id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
680 	idx = vq->vq_avail_idx;
681 	dp = &vq->vq_packed.ring.desc[idx];
682 
683 	dxp = &vq->vq_descx[id];
684 	dxp->ndescs = 1;
685 	dxp->cookie = cookie;
686 
687 	flags = vq->vq_packed.cached_flags;
688 
689 	/* prepend cannot fail, checked by caller */
690 	hdr = (struct virtio_net_hdr *)
691 		rte_pktmbuf_prepend(cookie, head_size);
692 	cookie->pkt_len -= head_size;
693 
694 	/* if offload disabled, hdr is not zeroed yet, do it now */
695 	if (!vq->hw->has_tx_offload)
696 		virtqueue_clear_net_hdr(hdr);
697 	else
698 		virtqueue_xmit_offload(hdr, cookie, true);
699 
700 	dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
701 	dp->len  = cookie->data_len;
702 	dp->id   = id;
703 
704 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
705 		vq->vq_avail_idx -= vq->vq_nentries;
706 		vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
707 	}
708 
709 	vq->vq_free_cnt--;
710 
711 	if (!in_order) {
712 		vq->vq_desc_head_idx = dxp->next;
713 		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
714 			vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
715 	}
716 
717 	virtio_wmb(vq->hw->weak_barriers);
718 	dp->flags = flags;
719 }
720 
721 static inline void
722 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
723 			      uint16_t needed, int can_push, int in_order)
724 {
725 	struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
726 	struct vq_desc_extra *dxp;
727 	struct virtqueue *vq = txvq->vq;
728 	struct vring_packed_desc *start_dp, *head_dp;
729 	uint16_t idx, id, head_idx, head_flags;
730 	uint16_t head_size = vq->hw->vtnet_hdr_size;
731 	struct virtio_net_hdr *hdr;
732 	uint16_t prev;
733 
734 	id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
735 
736 	dxp = &vq->vq_descx[id];
737 	dxp->ndescs = needed;
738 	dxp->cookie = cookie;
739 
740 	head_idx = vq->vq_avail_idx;
741 	idx = head_idx;
742 	prev = head_idx;
743 	start_dp = vq->vq_packed.ring.desc;
744 
745 	head_dp = &vq->vq_packed.ring.desc[idx];
746 	head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
747 	head_flags |= vq->vq_packed.cached_flags;
748 
749 	if (can_push) {
750 		/* prepend cannot fail, checked by caller */
751 		hdr = (struct virtio_net_hdr *)
752 			rte_pktmbuf_prepend(cookie, head_size);
753 		/* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
754 		 * which is wrong. Below subtract restores correct pkt size.
755 		 */
756 		cookie->pkt_len -= head_size;
757 
758 		/* if offload disabled, it is not zeroed below, do it now */
759 		if (!vq->hw->has_tx_offload)
760 			virtqueue_clear_net_hdr(hdr);
761 	} else {
762 		/* setup first tx ring slot to point to header
763 		 * stored in reserved region.
764 		 */
765 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
766 			RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
767 		start_dp[idx].len   = vq->hw->vtnet_hdr_size;
768 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
769 		idx++;
770 		if (idx >= vq->vq_nentries) {
771 			idx -= vq->vq_nentries;
772 			vq->vq_packed.cached_flags ^=
773 				VRING_PACKED_DESC_F_AVAIL_USED;
774 		}
775 	}
776 
777 	virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
778 
779 	do {
780 		uint16_t flags;
781 
782 		start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
783 		start_dp[idx].len  = cookie->data_len;
784 		if (likely(idx != head_idx)) {
785 			flags = cookie->next ? VRING_DESC_F_NEXT : 0;
786 			flags |= vq->vq_packed.cached_flags;
787 			start_dp[idx].flags = flags;
788 		}
789 		prev = idx;
790 		idx++;
791 		if (idx >= vq->vq_nentries) {
792 			idx -= vq->vq_nentries;
793 			vq->vq_packed.cached_flags ^=
794 				VRING_PACKED_DESC_F_AVAIL_USED;
795 		}
796 	} while ((cookie = cookie->next) != NULL);
797 
798 	start_dp[prev].id = id;
799 
800 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
801 	vq->vq_avail_idx = idx;
802 
803 	if (!in_order) {
804 		vq->vq_desc_head_idx = dxp->next;
805 		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
806 			vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
807 	}
808 
809 	virtio_wmb(vq->hw->weak_barriers);
810 	head_dp->flags = head_flags;
811 }
812 
813 static inline void
814 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
815 			uint16_t needed, int use_indirect, int can_push,
816 			int in_order)
817 {
818 	struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
819 	struct vq_desc_extra *dxp;
820 	struct virtqueue *vq = txvq->vq;
821 	struct vring_desc *start_dp;
822 	uint16_t seg_num = cookie->nb_segs;
823 	uint16_t head_idx, idx;
824 	uint16_t head_size = vq->hw->vtnet_hdr_size;
825 	struct virtio_net_hdr *hdr;
826 
827 	head_idx = vq->vq_desc_head_idx;
828 	idx = head_idx;
829 	if (in_order)
830 		dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
831 	else
832 		dxp = &vq->vq_descx[idx];
833 	dxp->cookie = (void *)cookie;
834 	dxp->ndescs = needed;
835 
836 	start_dp = vq->vq_split.ring.desc;
837 
838 	if (can_push) {
839 		/* prepend cannot fail, checked by caller */
840 		hdr = (struct virtio_net_hdr *)
841 			rte_pktmbuf_prepend(cookie, head_size);
842 		/* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
843 		 * which is wrong. Below subtract restores correct pkt size.
844 		 */
845 		cookie->pkt_len -= head_size;
846 
847 		/* if offload disabled, it is not zeroed below, do it now */
848 		if (!vq->hw->has_tx_offload)
849 			virtqueue_clear_net_hdr(hdr);
850 	} else if (use_indirect) {
851 		/* setup tx ring slot to point to indirect
852 		 * descriptor list stored in reserved region.
853 		 *
854 		 * the first slot in indirect ring is already preset
855 		 * to point to the header in reserved region
856 		 */
857 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
858 			RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
859 		start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
860 		start_dp[idx].flags = VRING_DESC_F_INDIRECT;
861 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
862 
863 		/* loop below will fill in rest of the indirect elements */
864 		start_dp = txr[idx].tx_indir;
865 		idx = 1;
866 	} else {
867 		/* setup first tx ring slot to point to header
868 		 * stored in reserved region.
869 		 */
870 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
871 			RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
872 		start_dp[idx].len   = vq->hw->vtnet_hdr_size;
873 		start_dp[idx].flags = VRING_DESC_F_NEXT;
874 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
875 
876 		idx = start_dp[idx].next;
877 	}
878 
879 	virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
880 
881 	do {
882 		start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
883 		start_dp[idx].len   = cookie->data_len;
884 		start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
885 		idx = start_dp[idx].next;
886 	} while ((cookie = cookie->next) != NULL);
887 
888 	if (use_indirect)
889 		idx = vq->vq_split.ring.desc[head_idx].next;
890 
891 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
892 
893 	vq->vq_desc_head_idx = idx;
894 	vq_update_avail_ring(vq, head_idx);
895 
896 	if (!in_order) {
897 		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
898 			vq->vq_desc_tail_idx = idx;
899 	}
900 }
901 
902 void
903 virtio_dev_cq_start(struct rte_eth_dev *dev)
904 {
905 	struct virtio_hw *hw = dev->data->dev_private;
906 
907 	if (hw->cvq && hw->cvq->vq) {
908 		rte_spinlock_init(&hw->cvq->lock);
909 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
910 	}
911 }
912 
913 int
914 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
915 			uint16_t queue_idx,
916 			uint16_t nb_desc,
917 			unsigned int socket_id __rte_unused,
918 			const struct rte_eth_rxconf *rx_conf __rte_unused,
919 			struct rte_mempool *mp)
920 {
921 	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
922 	struct virtio_hw *hw = dev->data->dev_private;
923 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
924 	struct virtnet_rx *rxvq;
925 
926 	PMD_INIT_FUNC_TRACE();
927 
928 	if (nb_desc == 0 || nb_desc > vq->vq_nentries)
929 		nb_desc = vq->vq_nentries;
930 	vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
931 
932 	rxvq = &vq->rxq;
933 	rxvq->queue_id = queue_idx;
934 	rxvq->mpool = mp;
935 	dev->data->rx_queues[queue_idx] = rxvq;
936 
937 	return 0;
938 }
939 
940 int
941 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
942 {
943 	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
944 	struct virtio_hw *hw = dev->data->dev_private;
945 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
946 	struct virtnet_rx *rxvq = &vq->rxq;
947 	struct rte_mbuf *m;
948 	uint16_t desc_idx;
949 	int error, nbufs, i;
950 
951 	PMD_INIT_FUNC_TRACE();
952 
953 	/* Allocate blank mbufs for the each rx descriptor */
954 	nbufs = 0;
955 
956 	if (hw->use_simple_rx) {
957 		for (desc_idx = 0; desc_idx < vq->vq_nentries;
958 		     desc_idx++) {
959 			vq->vq_split.ring.avail->ring[desc_idx] = desc_idx;
960 			vq->vq_split.ring.desc[desc_idx].flags =
961 				VRING_DESC_F_WRITE;
962 		}
963 
964 		virtio_rxq_vec_setup(rxvq);
965 	}
966 
967 	memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
968 	for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
969 	     desc_idx++) {
970 		vq->sw_ring[vq->vq_nentries + desc_idx] =
971 			&rxvq->fake_mbuf;
972 	}
973 
974 	if (hw->use_simple_rx) {
975 		while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
976 			virtio_rxq_rearm_vec(rxvq);
977 			nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
978 		}
979 	} else if (hw->use_inorder_rx) {
980 		if ((!virtqueue_full(vq))) {
981 			uint16_t free_cnt = vq->vq_free_cnt;
982 			struct rte_mbuf *pkts[free_cnt];
983 
984 			if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
985 				free_cnt)) {
986 				error = virtqueue_enqueue_refill_inorder(vq,
987 						pkts,
988 						free_cnt);
989 				if (unlikely(error)) {
990 					for (i = 0; i < free_cnt; i++)
991 						rte_pktmbuf_free(pkts[i]);
992 				}
993 			}
994 
995 			nbufs += free_cnt;
996 			vq_update_avail_idx(vq);
997 		}
998 	} else {
999 		while (!virtqueue_full(vq)) {
1000 			m = rte_mbuf_raw_alloc(rxvq->mpool);
1001 			if (m == NULL)
1002 				break;
1003 
1004 			/* Enqueue allocated buffers */
1005 			if (vtpci_packed_queue(vq->hw))
1006 				error = virtqueue_enqueue_recv_refill_packed(vq,
1007 						&m, 1);
1008 			else
1009 				error = virtqueue_enqueue_recv_refill(vq,
1010 						&m, 1);
1011 			if (error) {
1012 				rte_pktmbuf_free(m);
1013 				break;
1014 			}
1015 			nbufs++;
1016 		}
1017 
1018 		if (!vtpci_packed_queue(vq->hw))
1019 			vq_update_avail_idx(vq);
1020 	}
1021 
1022 	PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
1023 
1024 	VIRTQUEUE_DUMP(vq);
1025 
1026 	return 0;
1027 }
1028 
1029 /*
1030  * struct rte_eth_dev *dev: Used to update dev
1031  * uint16_t nb_desc: Defaults to values read from config space
1032  * unsigned int socket_id: Used to allocate memzone
1033  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
1034  * uint16_t queue_idx: Just used as an index in dev txq list
1035  */
1036 int
1037 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
1038 			uint16_t queue_idx,
1039 			uint16_t nb_desc,
1040 			unsigned int socket_id __rte_unused,
1041 			const struct rte_eth_txconf *tx_conf)
1042 {
1043 	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1044 	struct virtio_hw *hw = dev->data->dev_private;
1045 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1046 	struct virtnet_tx *txvq;
1047 	uint16_t tx_free_thresh;
1048 
1049 	PMD_INIT_FUNC_TRACE();
1050 
1051 	if (nb_desc == 0 || nb_desc > vq->vq_nentries)
1052 		nb_desc = vq->vq_nentries;
1053 	vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
1054 
1055 	txvq = &vq->txq;
1056 	txvq->queue_id = queue_idx;
1057 
1058 	tx_free_thresh = tx_conf->tx_free_thresh;
1059 	if (tx_free_thresh == 0)
1060 		tx_free_thresh =
1061 			RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
1062 
1063 	if (tx_free_thresh >= (vq->vq_nentries - 3)) {
1064 		RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
1065 			"number of TX entries minus 3 (%u)."
1066 			" (tx_free_thresh=%u port=%u queue=%u)\n",
1067 			vq->vq_nentries - 3,
1068 			tx_free_thresh, dev->data->port_id, queue_idx);
1069 		return -EINVAL;
1070 	}
1071 
1072 	vq->vq_free_thresh = tx_free_thresh;
1073 
1074 	dev->data->tx_queues[queue_idx] = txvq;
1075 	return 0;
1076 }
1077 
1078 int
1079 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
1080 				uint16_t queue_idx)
1081 {
1082 	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1083 	struct virtio_hw *hw = dev->data->dev_private;
1084 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1085 
1086 	PMD_INIT_FUNC_TRACE();
1087 
1088 	if (!vtpci_packed_queue(hw)) {
1089 		if (hw->use_inorder_tx)
1090 			vq->vq_split.ring.desc[vq->vq_nentries - 1].next = 0;
1091 	}
1092 
1093 	VIRTQUEUE_DUMP(vq);
1094 
1095 	return 0;
1096 }
1097 
1098 static inline void
1099 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
1100 {
1101 	int error;
1102 	/*
1103 	 * Requeue the discarded mbuf. This should always be
1104 	 * successful since it was just dequeued.
1105 	 */
1106 	if (vtpci_packed_queue(vq->hw))
1107 		error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
1108 	else
1109 		error = virtqueue_enqueue_recv_refill(vq, &m, 1);
1110 
1111 	if (unlikely(error)) {
1112 		RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1113 		rte_pktmbuf_free(m);
1114 	}
1115 }
1116 
1117 static inline void
1118 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1119 {
1120 	int error;
1121 
1122 	error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1123 	if (unlikely(error)) {
1124 		RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1125 		rte_pktmbuf_free(m);
1126 	}
1127 }
1128 
1129 /* Optionally fill offload information in structure */
1130 static inline int
1131 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1132 {
1133 	struct rte_net_hdr_lens hdr_lens;
1134 	uint32_t hdrlen, ptype;
1135 	int l4_supported = 0;
1136 
1137 	/* nothing to do */
1138 	if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1139 		return 0;
1140 
1141 	m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1142 
1143 	ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1144 	m->packet_type = ptype;
1145 	if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1146 	    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1147 	    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1148 		l4_supported = 1;
1149 
1150 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1151 		hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1152 		if (hdr->csum_start <= hdrlen && l4_supported) {
1153 			m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1154 		} else {
1155 			/* Unknown proto or tunnel, do sw cksum. We can assume
1156 			 * the cksum field is in the first segment since the
1157 			 * buffers we provided to the host are large enough.
1158 			 * In case of SCTP, this will be wrong since it's a CRC
1159 			 * but there's nothing we can do.
1160 			 */
1161 			uint16_t csum = 0, off;
1162 
1163 			rte_raw_cksum_mbuf(m, hdr->csum_start,
1164 				rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1165 				&csum);
1166 			if (likely(csum != 0xffff))
1167 				csum = ~csum;
1168 			off = hdr->csum_offset + hdr->csum_start;
1169 			if (rte_pktmbuf_data_len(m) >= off + 1)
1170 				*rte_pktmbuf_mtod_offset(m, uint16_t *,
1171 					off) = csum;
1172 		}
1173 	} else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1174 		m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1175 	}
1176 
1177 	/* GSO request, save required information in mbuf */
1178 	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1179 		/* Check unsupported modes */
1180 		if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1181 		    (hdr->gso_size == 0)) {
1182 			return -EINVAL;
1183 		}
1184 
1185 		/* Update mss lengthes in mbuf */
1186 		m->tso_segsz = hdr->gso_size;
1187 		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1188 			case VIRTIO_NET_HDR_GSO_TCPV4:
1189 			case VIRTIO_NET_HDR_GSO_TCPV6:
1190 				m->ol_flags |= PKT_RX_LRO | \
1191 					PKT_RX_L4_CKSUM_NONE;
1192 				break;
1193 			default:
1194 				return -EINVAL;
1195 		}
1196 	}
1197 
1198 	return 0;
1199 }
1200 
1201 #define VIRTIO_MBUF_BURST_SZ 64
1202 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1203 uint16_t
1204 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1205 {
1206 	struct virtnet_rx *rxvq = rx_queue;
1207 	struct virtqueue *vq = rxvq->vq;
1208 	struct virtio_hw *hw = vq->hw;
1209 	struct rte_mbuf *rxm;
1210 	uint16_t nb_used, num, nb_rx;
1211 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1212 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1213 	int error;
1214 	uint32_t i, nb_enqueued;
1215 	uint32_t hdr_size;
1216 	struct virtio_net_hdr *hdr;
1217 
1218 	nb_rx = 0;
1219 	if (unlikely(hw->started == 0))
1220 		return nb_rx;
1221 
1222 	nb_used = VIRTQUEUE_NUSED(vq);
1223 
1224 	virtio_rmb(hw->weak_barriers);
1225 
1226 	num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1227 	if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1228 		num = VIRTIO_MBUF_BURST_SZ;
1229 	if (likely(num > DESC_PER_CACHELINE))
1230 		num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1231 
1232 	num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1233 	PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1234 
1235 	nb_enqueued = 0;
1236 	hdr_size = hw->vtnet_hdr_size;
1237 
1238 	for (i = 0; i < num ; i++) {
1239 		rxm = rcv_pkts[i];
1240 
1241 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1242 
1243 		if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1244 			PMD_RX_LOG(ERR, "Packet drop");
1245 			nb_enqueued++;
1246 			virtio_discard_rxbuf(vq, rxm);
1247 			rxvq->stats.errors++;
1248 			continue;
1249 		}
1250 
1251 		rxm->port = rxvq->port_id;
1252 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1253 		rxm->ol_flags = 0;
1254 		rxm->vlan_tci = 0;
1255 
1256 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1257 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
1258 
1259 		hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1260 			RTE_PKTMBUF_HEADROOM - hdr_size);
1261 
1262 		if (hw->vlan_strip)
1263 			rte_vlan_strip(rxm);
1264 
1265 		if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1266 			virtio_discard_rxbuf(vq, rxm);
1267 			rxvq->stats.errors++;
1268 			continue;
1269 		}
1270 
1271 		virtio_rx_stats_updated(rxvq, rxm);
1272 
1273 		rx_pkts[nb_rx++] = rxm;
1274 	}
1275 
1276 	rxvq->stats.packets += nb_rx;
1277 
1278 	/* Allocate new mbuf for the used descriptor */
1279 	if (likely(!virtqueue_full(vq))) {
1280 		uint16_t free_cnt = vq->vq_free_cnt;
1281 		struct rte_mbuf *new_pkts[free_cnt];
1282 
1283 		if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1284 						free_cnt) == 0)) {
1285 			error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1286 					free_cnt);
1287 			if (unlikely(error)) {
1288 				for (i = 0; i < free_cnt; i++)
1289 					rte_pktmbuf_free(new_pkts[i]);
1290 			}
1291 			nb_enqueued += free_cnt;
1292 		} else {
1293 			struct rte_eth_dev *dev =
1294 				&rte_eth_devices[rxvq->port_id];
1295 			dev->data->rx_mbuf_alloc_failed += free_cnt;
1296 		}
1297 	}
1298 
1299 	if (likely(nb_enqueued)) {
1300 		vq_update_avail_idx(vq);
1301 
1302 		if (unlikely(virtqueue_kick_prepare(vq))) {
1303 			virtqueue_notify(vq);
1304 			PMD_RX_LOG(DEBUG, "Notified");
1305 		}
1306 	}
1307 
1308 	return nb_rx;
1309 }
1310 
1311 uint16_t
1312 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1313 			uint16_t nb_pkts)
1314 {
1315 	struct virtnet_rx *rxvq = rx_queue;
1316 	struct virtqueue *vq = rxvq->vq;
1317 	struct virtio_hw *hw = vq->hw;
1318 	struct rte_mbuf *rxm;
1319 	uint16_t num, nb_rx;
1320 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1321 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1322 	int error;
1323 	uint32_t i, nb_enqueued;
1324 	uint32_t hdr_size;
1325 	struct virtio_net_hdr *hdr;
1326 
1327 	nb_rx = 0;
1328 	if (unlikely(hw->started == 0))
1329 		return nb_rx;
1330 
1331 	num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1332 	if (likely(num > DESC_PER_CACHELINE))
1333 		num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1334 
1335 	num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1336 	PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1337 
1338 	nb_enqueued = 0;
1339 	hdr_size = hw->vtnet_hdr_size;
1340 
1341 	for (i = 0; i < num; i++) {
1342 		rxm = rcv_pkts[i];
1343 
1344 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1345 
1346 		if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1347 			PMD_RX_LOG(ERR, "Packet drop");
1348 			nb_enqueued++;
1349 			virtio_discard_rxbuf(vq, rxm);
1350 			rxvq->stats.errors++;
1351 			continue;
1352 		}
1353 
1354 		rxm->port = rxvq->port_id;
1355 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1356 		rxm->ol_flags = 0;
1357 		rxm->vlan_tci = 0;
1358 
1359 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1360 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
1361 
1362 		hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1363 			RTE_PKTMBUF_HEADROOM - hdr_size);
1364 
1365 		if (hw->vlan_strip)
1366 			rte_vlan_strip(rxm);
1367 
1368 		if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1369 			virtio_discard_rxbuf(vq, rxm);
1370 			rxvq->stats.errors++;
1371 			continue;
1372 		}
1373 
1374 		virtio_rx_stats_updated(rxvq, rxm);
1375 
1376 		rx_pkts[nb_rx++] = rxm;
1377 	}
1378 
1379 	rxvq->stats.packets += nb_rx;
1380 
1381 	/* Allocate new mbuf for the used descriptor */
1382 	if (likely(!virtqueue_full(vq))) {
1383 		uint16_t free_cnt = vq->vq_free_cnt;
1384 		struct rte_mbuf *new_pkts[free_cnt];
1385 
1386 		if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1387 						free_cnt) == 0)) {
1388 			error = virtqueue_enqueue_recv_refill_packed(vq,
1389 					new_pkts, free_cnt);
1390 			if (unlikely(error)) {
1391 				for (i = 0; i < free_cnt; i++)
1392 					rte_pktmbuf_free(new_pkts[i]);
1393 			}
1394 			nb_enqueued += free_cnt;
1395 		} else {
1396 			struct rte_eth_dev *dev =
1397 				&rte_eth_devices[rxvq->port_id];
1398 			dev->data->rx_mbuf_alloc_failed += free_cnt;
1399 		}
1400 	}
1401 
1402 	if (likely(nb_enqueued)) {
1403 		if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1404 			virtqueue_notify(vq);
1405 			PMD_RX_LOG(DEBUG, "Notified");
1406 		}
1407 	}
1408 
1409 	return nb_rx;
1410 }
1411 
1412 
1413 uint16_t
1414 virtio_recv_pkts_inorder(void *rx_queue,
1415 			struct rte_mbuf **rx_pkts,
1416 			uint16_t nb_pkts)
1417 {
1418 	struct virtnet_rx *rxvq = rx_queue;
1419 	struct virtqueue *vq = rxvq->vq;
1420 	struct virtio_hw *hw = vq->hw;
1421 	struct rte_mbuf *rxm;
1422 	struct rte_mbuf *prev = NULL;
1423 	uint16_t nb_used, num, nb_rx;
1424 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1425 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1426 	int error;
1427 	uint32_t nb_enqueued;
1428 	uint32_t seg_num;
1429 	uint32_t seg_res;
1430 	uint32_t hdr_size;
1431 	int32_t i;
1432 
1433 	nb_rx = 0;
1434 	if (unlikely(hw->started == 0))
1435 		return nb_rx;
1436 
1437 	nb_used = VIRTQUEUE_NUSED(vq);
1438 	nb_used = RTE_MIN(nb_used, nb_pkts);
1439 	nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1440 
1441 	virtio_rmb(hw->weak_barriers);
1442 
1443 	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1444 
1445 	nb_enqueued = 0;
1446 	seg_num = 1;
1447 	seg_res = 0;
1448 	hdr_size = hw->vtnet_hdr_size;
1449 
1450 	num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1451 
1452 	for (i = 0; i < num; i++) {
1453 		struct virtio_net_hdr_mrg_rxbuf *header;
1454 
1455 		PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1456 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1457 
1458 		rxm = rcv_pkts[i];
1459 
1460 		if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1461 			PMD_RX_LOG(ERR, "Packet drop");
1462 			nb_enqueued++;
1463 			virtio_discard_rxbuf_inorder(vq, rxm);
1464 			rxvq->stats.errors++;
1465 			continue;
1466 		}
1467 
1468 		header = (struct virtio_net_hdr_mrg_rxbuf *)
1469 			 ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1470 			 - hdr_size);
1471 
1472 		if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1473 			seg_num = header->num_buffers;
1474 			if (seg_num == 0)
1475 				seg_num = 1;
1476 		} else {
1477 			seg_num = 1;
1478 		}
1479 
1480 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1481 		rxm->nb_segs = seg_num;
1482 		rxm->ol_flags = 0;
1483 		rxm->vlan_tci = 0;
1484 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1485 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
1486 
1487 		rxm->port = rxvq->port_id;
1488 
1489 		rx_pkts[nb_rx] = rxm;
1490 		prev = rxm;
1491 
1492 		if (vq->hw->has_rx_offload &&
1493 				virtio_rx_offload(rxm, &header->hdr) < 0) {
1494 			virtio_discard_rxbuf_inorder(vq, rxm);
1495 			rxvq->stats.errors++;
1496 			continue;
1497 		}
1498 
1499 		if (hw->vlan_strip)
1500 			rte_vlan_strip(rx_pkts[nb_rx]);
1501 
1502 		seg_res = seg_num - 1;
1503 
1504 		/* Merge remaining segments */
1505 		while (seg_res != 0 && i < (num - 1)) {
1506 			i++;
1507 
1508 			rxm = rcv_pkts[i];
1509 			rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1510 			rxm->pkt_len = (uint32_t)(len[i]);
1511 			rxm->data_len = (uint16_t)(len[i]);
1512 
1513 			rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1514 
1515 			prev->next = rxm;
1516 			prev = rxm;
1517 			seg_res -= 1;
1518 		}
1519 
1520 		if (!seg_res) {
1521 			virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1522 			nb_rx++;
1523 		}
1524 	}
1525 
1526 	/* Last packet still need merge segments */
1527 	while (seg_res != 0) {
1528 		uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1529 					VIRTIO_MBUF_BURST_SZ);
1530 
1531 		if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1532 			virtio_rmb(hw->weak_barriers);
1533 			num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1534 							   rcv_cnt);
1535 			uint16_t extra_idx = 0;
1536 
1537 			rcv_cnt = num;
1538 			while (extra_idx < rcv_cnt) {
1539 				rxm = rcv_pkts[extra_idx];
1540 				rxm->data_off =
1541 					RTE_PKTMBUF_HEADROOM - hdr_size;
1542 				rxm->pkt_len = (uint32_t)(len[extra_idx]);
1543 				rxm->data_len = (uint16_t)(len[extra_idx]);
1544 				prev->next = rxm;
1545 				prev = rxm;
1546 				rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1547 				extra_idx += 1;
1548 			};
1549 			seg_res -= rcv_cnt;
1550 
1551 			if (!seg_res) {
1552 				virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1553 				nb_rx++;
1554 			}
1555 		} else {
1556 			PMD_RX_LOG(ERR,
1557 					"No enough segments for packet.");
1558 			rte_pktmbuf_free(rx_pkts[nb_rx]);
1559 			rxvq->stats.errors++;
1560 			break;
1561 		}
1562 	}
1563 
1564 	rxvq->stats.packets += nb_rx;
1565 
1566 	/* Allocate new mbuf for the used descriptor */
1567 
1568 	if (likely(!virtqueue_full(vq))) {
1569 		/* free_cnt may include mrg descs */
1570 		uint16_t free_cnt = vq->vq_free_cnt;
1571 		struct rte_mbuf *new_pkts[free_cnt];
1572 
1573 		if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1574 			error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1575 					free_cnt);
1576 			if (unlikely(error)) {
1577 				for (i = 0; i < free_cnt; i++)
1578 					rte_pktmbuf_free(new_pkts[i]);
1579 			}
1580 			nb_enqueued += free_cnt;
1581 		} else {
1582 			struct rte_eth_dev *dev =
1583 				&rte_eth_devices[rxvq->port_id];
1584 			dev->data->rx_mbuf_alloc_failed += free_cnt;
1585 		}
1586 	}
1587 
1588 	if (likely(nb_enqueued)) {
1589 		vq_update_avail_idx(vq);
1590 
1591 		if (unlikely(virtqueue_kick_prepare(vq))) {
1592 			virtqueue_notify(vq);
1593 			PMD_RX_LOG(DEBUG, "Notified");
1594 		}
1595 	}
1596 
1597 	return nb_rx;
1598 }
1599 
1600 uint16_t
1601 virtio_recv_mergeable_pkts(void *rx_queue,
1602 			struct rte_mbuf **rx_pkts,
1603 			uint16_t nb_pkts)
1604 {
1605 	struct virtnet_rx *rxvq = rx_queue;
1606 	struct virtqueue *vq = rxvq->vq;
1607 	struct virtio_hw *hw = vq->hw;
1608 	struct rte_mbuf *rxm;
1609 	struct rte_mbuf *prev = NULL;
1610 	uint16_t nb_used, num, nb_rx = 0;
1611 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1612 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1613 	int error;
1614 	uint32_t nb_enqueued = 0;
1615 	uint32_t seg_num = 0;
1616 	uint32_t seg_res = 0;
1617 	uint32_t hdr_size = hw->vtnet_hdr_size;
1618 	int32_t i;
1619 
1620 	if (unlikely(hw->started == 0))
1621 		return nb_rx;
1622 
1623 	nb_used = VIRTQUEUE_NUSED(vq);
1624 
1625 	virtio_rmb(hw->weak_barriers);
1626 
1627 	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1628 
1629 	num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1630 	if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1631 		num = VIRTIO_MBUF_BURST_SZ;
1632 	if (likely(num > DESC_PER_CACHELINE))
1633 		num = num - ((vq->vq_used_cons_idx + num) %
1634 				DESC_PER_CACHELINE);
1635 
1636 
1637 	num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1638 
1639 	for (i = 0; i < num; i++) {
1640 		struct virtio_net_hdr_mrg_rxbuf *header;
1641 
1642 		PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1643 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1644 
1645 		rxm = rcv_pkts[i];
1646 
1647 		if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1648 			PMD_RX_LOG(ERR, "Packet drop");
1649 			nb_enqueued++;
1650 			virtio_discard_rxbuf(vq, rxm);
1651 			rxvq->stats.errors++;
1652 			continue;
1653 		}
1654 
1655 		header = (struct virtio_net_hdr_mrg_rxbuf *)
1656 			 ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1657 			 - hdr_size);
1658 		seg_num = header->num_buffers;
1659 		if (seg_num == 0)
1660 			seg_num = 1;
1661 
1662 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1663 		rxm->nb_segs = seg_num;
1664 		rxm->ol_flags = 0;
1665 		rxm->vlan_tci = 0;
1666 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1667 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
1668 
1669 		rxm->port = rxvq->port_id;
1670 
1671 		rx_pkts[nb_rx] = rxm;
1672 		prev = rxm;
1673 
1674 		if (hw->has_rx_offload &&
1675 				virtio_rx_offload(rxm, &header->hdr) < 0) {
1676 			virtio_discard_rxbuf(vq, rxm);
1677 			rxvq->stats.errors++;
1678 			continue;
1679 		}
1680 
1681 		if (hw->vlan_strip)
1682 			rte_vlan_strip(rx_pkts[nb_rx]);
1683 
1684 		seg_res = seg_num - 1;
1685 
1686 		/* Merge remaining segments */
1687 		while (seg_res != 0 && i < (num - 1)) {
1688 			i++;
1689 
1690 			rxm = rcv_pkts[i];
1691 			rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1692 			rxm->pkt_len = (uint32_t)(len[i]);
1693 			rxm->data_len = (uint16_t)(len[i]);
1694 
1695 			rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1696 
1697 			prev->next = rxm;
1698 			prev = rxm;
1699 			seg_res -= 1;
1700 		}
1701 
1702 		if (!seg_res) {
1703 			virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1704 			nb_rx++;
1705 		}
1706 	}
1707 
1708 	/* Last packet still need merge segments */
1709 	while (seg_res != 0) {
1710 		uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1711 					VIRTIO_MBUF_BURST_SZ);
1712 
1713 		if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1714 			virtio_rmb(hw->weak_barriers);
1715 			num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
1716 							   rcv_cnt);
1717 			uint16_t extra_idx = 0;
1718 
1719 			rcv_cnt = num;
1720 			while (extra_idx < rcv_cnt) {
1721 				rxm = rcv_pkts[extra_idx];
1722 				rxm->data_off =
1723 					RTE_PKTMBUF_HEADROOM - hdr_size;
1724 				rxm->pkt_len = (uint32_t)(len[extra_idx]);
1725 				rxm->data_len = (uint16_t)(len[extra_idx]);
1726 				prev->next = rxm;
1727 				prev = rxm;
1728 				rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1729 				extra_idx += 1;
1730 			};
1731 			seg_res -= rcv_cnt;
1732 
1733 			if (!seg_res) {
1734 				virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1735 				nb_rx++;
1736 			}
1737 		} else {
1738 			PMD_RX_LOG(ERR,
1739 					"No enough segments for packet.");
1740 			rte_pktmbuf_free(rx_pkts[nb_rx]);
1741 			rxvq->stats.errors++;
1742 			break;
1743 		}
1744 	}
1745 
1746 	rxvq->stats.packets += nb_rx;
1747 
1748 	/* Allocate new mbuf for the used descriptor */
1749 	if (likely(!virtqueue_full(vq))) {
1750 		/* free_cnt may include mrg descs */
1751 		uint16_t free_cnt = vq->vq_free_cnt;
1752 		struct rte_mbuf *new_pkts[free_cnt];
1753 
1754 		if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1755 			error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1756 					free_cnt);
1757 			if (unlikely(error)) {
1758 				for (i = 0; i < free_cnt; i++)
1759 					rte_pktmbuf_free(new_pkts[i]);
1760 			}
1761 			nb_enqueued += free_cnt;
1762 		} else {
1763 			struct rte_eth_dev *dev =
1764 				&rte_eth_devices[rxvq->port_id];
1765 			dev->data->rx_mbuf_alloc_failed += free_cnt;
1766 		}
1767 	}
1768 
1769 	if (likely(nb_enqueued)) {
1770 		vq_update_avail_idx(vq);
1771 
1772 		if (unlikely(virtqueue_kick_prepare(vq))) {
1773 			virtqueue_notify(vq);
1774 			PMD_RX_LOG(DEBUG, "Notified");
1775 		}
1776 	}
1777 
1778 	return nb_rx;
1779 }
1780 
1781 uint16_t
1782 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1783 			struct rte_mbuf **rx_pkts,
1784 			uint16_t nb_pkts)
1785 {
1786 	struct virtnet_rx *rxvq = rx_queue;
1787 	struct virtqueue *vq = rxvq->vq;
1788 	struct virtio_hw *hw = vq->hw;
1789 	struct rte_mbuf *rxm;
1790 	struct rte_mbuf *prev = NULL;
1791 	uint16_t num, nb_rx = 0;
1792 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1793 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1794 	uint32_t nb_enqueued = 0;
1795 	uint32_t seg_num = 0;
1796 	uint32_t seg_res = 0;
1797 	uint32_t hdr_size = hw->vtnet_hdr_size;
1798 	int32_t i;
1799 	int error;
1800 
1801 	if (unlikely(hw->started == 0))
1802 		return nb_rx;
1803 
1804 
1805 	num = nb_pkts;
1806 	if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1807 		num = VIRTIO_MBUF_BURST_SZ;
1808 	if (likely(num > DESC_PER_CACHELINE))
1809 		num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1810 
1811 	num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1812 
1813 	for (i = 0; i < num; i++) {
1814 		struct virtio_net_hdr_mrg_rxbuf *header;
1815 
1816 		PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1817 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1818 
1819 		rxm = rcv_pkts[i];
1820 
1821 		if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1822 			PMD_RX_LOG(ERR, "Packet drop");
1823 			nb_enqueued++;
1824 			virtio_discard_rxbuf(vq, rxm);
1825 			rxvq->stats.errors++;
1826 			continue;
1827 		}
1828 
1829 		header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1830 			  rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1831 		seg_num = header->num_buffers;
1832 
1833 		if (seg_num == 0)
1834 			seg_num = 1;
1835 
1836 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1837 		rxm->nb_segs = seg_num;
1838 		rxm->ol_flags = 0;
1839 		rxm->vlan_tci = 0;
1840 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1841 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
1842 
1843 		rxm->port = rxvq->port_id;
1844 		rx_pkts[nb_rx] = rxm;
1845 		prev = rxm;
1846 
1847 		if (hw->has_rx_offload &&
1848 				virtio_rx_offload(rxm, &header->hdr) < 0) {
1849 			virtio_discard_rxbuf(vq, rxm);
1850 			rxvq->stats.errors++;
1851 			continue;
1852 		}
1853 
1854 		if (hw->vlan_strip)
1855 			rte_vlan_strip(rx_pkts[nb_rx]);
1856 
1857 		seg_res = seg_num - 1;
1858 
1859 		/* Merge remaining segments */
1860 		while (seg_res != 0 && i < (num - 1)) {
1861 			i++;
1862 
1863 			rxm = rcv_pkts[i];
1864 			rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1865 			rxm->pkt_len = (uint32_t)(len[i]);
1866 			rxm->data_len = (uint16_t)(len[i]);
1867 
1868 			rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1869 
1870 			prev->next = rxm;
1871 			prev = rxm;
1872 			seg_res -= 1;
1873 		}
1874 
1875 		if (!seg_res) {
1876 			virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1877 			nb_rx++;
1878 		}
1879 	}
1880 
1881 	/* Last packet still need merge segments */
1882 	while (seg_res != 0) {
1883 		uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1884 					VIRTIO_MBUF_BURST_SZ);
1885 		uint16_t extra_idx = 0;
1886 
1887 		rcv_cnt = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1888 				len, rcv_cnt);
1889 		if (unlikely(rcv_cnt == 0)) {
1890 			PMD_RX_LOG(ERR, "No enough segments for packet.");
1891 			rte_pktmbuf_free(rx_pkts[nb_rx]);
1892 			rxvq->stats.errors++;
1893 			break;
1894 		}
1895 
1896 		while (extra_idx < rcv_cnt) {
1897 			rxm = rcv_pkts[extra_idx];
1898 
1899 			rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1900 			rxm->pkt_len = (uint32_t)(len[extra_idx]);
1901 			rxm->data_len = (uint16_t)(len[extra_idx]);
1902 
1903 			prev->next = rxm;
1904 			prev = rxm;
1905 			rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1906 			extra_idx += 1;
1907 		}
1908 		seg_res -= rcv_cnt;
1909 		if (!seg_res) {
1910 			virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1911 			nb_rx++;
1912 		}
1913 	}
1914 
1915 	rxvq->stats.packets += nb_rx;
1916 
1917 	/* Allocate new mbuf for the used descriptor */
1918 	if (likely(!virtqueue_full(vq))) {
1919 		/* free_cnt may include mrg descs */
1920 		uint16_t free_cnt = vq->vq_free_cnt;
1921 		struct rte_mbuf *new_pkts[free_cnt];
1922 
1923 		if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1924 			error = virtqueue_enqueue_recv_refill_packed(vq,
1925 					new_pkts, free_cnt);
1926 			if (unlikely(error)) {
1927 				for (i = 0; i < free_cnt; i++)
1928 					rte_pktmbuf_free(new_pkts[i]);
1929 			}
1930 			nb_enqueued += free_cnt;
1931 		} else {
1932 			struct rte_eth_dev *dev =
1933 				&rte_eth_devices[rxvq->port_id];
1934 			dev->data->rx_mbuf_alloc_failed += free_cnt;
1935 		}
1936 	}
1937 
1938 	if (likely(nb_enqueued)) {
1939 		if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1940 			virtqueue_notify(vq);
1941 			PMD_RX_LOG(DEBUG, "Notified");
1942 		}
1943 	}
1944 
1945 	return nb_rx;
1946 }
1947 
1948 uint16_t
1949 virtio_xmit_pkts_prepare(void *tx_queue __rte_unused, struct rte_mbuf **tx_pkts,
1950 			uint16_t nb_pkts)
1951 {
1952 	uint16_t nb_tx;
1953 	int error;
1954 
1955 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1956 		struct rte_mbuf *m = tx_pkts[nb_tx];
1957 
1958 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
1959 		error = rte_validate_tx_offload(m);
1960 		if (unlikely(error)) {
1961 			rte_errno = -error;
1962 			break;
1963 		}
1964 #endif
1965 
1966 		/* Do VLAN tag insertion */
1967 		if (unlikely(m->ol_flags & PKT_TX_VLAN_PKT)) {
1968 			error = rte_vlan_insert(&m);
1969 			/* rte_vlan_insert() may change pointer
1970 			 * even in the case of failure
1971 			 */
1972 			tx_pkts[nb_tx] = m;
1973 
1974 			if (unlikely(error)) {
1975 				rte_errno = -error;
1976 				break;
1977 			}
1978 		}
1979 
1980 		error = rte_net_intel_cksum_prepare(m);
1981 		if (unlikely(error)) {
1982 			rte_errno = -error;
1983 			break;
1984 		}
1985 
1986 		if (m->ol_flags & PKT_TX_TCP_SEG)
1987 			virtio_tso_fix_cksum(m);
1988 	}
1989 
1990 	return nb_tx;
1991 }
1992 
1993 uint16_t
1994 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
1995 			uint16_t nb_pkts)
1996 {
1997 	struct virtnet_tx *txvq = tx_queue;
1998 	struct virtqueue *vq = txvq->vq;
1999 	struct virtio_hw *hw = vq->hw;
2000 	uint16_t hdr_size = hw->vtnet_hdr_size;
2001 	uint16_t nb_tx = 0;
2002 	bool in_order = hw->use_inorder_tx;
2003 
2004 	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2005 		return nb_tx;
2006 
2007 	if (unlikely(nb_pkts < 1))
2008 		return nb_pkts;
2009 
2010 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2011 
2012 	if (nb_pkts > vq->vq_free_cnt)
2013 		virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt,
2014 					   in_order);
2015 
2016 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2017 		struct rte_mbuf *txm = tx_pkts[nb_tx];
2018 		int can_push = 0, slots, need;
2019 
2020 		/* optimize ring usage */
2021 		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2022 		      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2023 		    rte_mbuf_refcnt_read(txm) == 1 &&
2024 		    RTE_MBUF_DIRECT(txm) &&
2025 		    txm->nb_segs == 1 &&
2026 		    rte_pktmbuf_headroom(txm) >= hdr_size &&
2027 		    rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2028 			   __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2029 			can_push = 1;
2030 
2031 		/* How many main ring entries are needed to this Tx?
2032 		 * any_layout => number of segments
2033 		 * default    => number of segments + 1
2034 		 */
2035 		slots = txm->nb_segs + !can_push;
2036 		need = slots - vq->vq_free_cnt;
2037 
2038 		/* Positive value indicates it need free vring descriptors */
2039 		if (unlikely(need > 0)) {
2040 			virtio_xmit_cleanup_packed(vq, need, in_order);
2041 			need = slots - vq->vq_free_cnt;
2042 			if (unlikely(need > 0)) {
2043 				PMD_TX_LOG(ERR,
2044 					   "No free tx descriptors to transmit");
2045 				break;
2046 			}
2047 		}
2048 
2049 		/* Enqueue Packet buffers */
2050 		if (can_push)
2051 			virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
2052 		else
2053 			virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
2054 						      in_order);
2055 
2056 		virtio_update_packet_stats(&txvq->stats, txm);
2057 	}
2058 
2059 	txvq->stats.packets += nb_tx;
2060 
2061 	if (likely(nb_tx)) {
2062 		if (unlikely(virtqueue_kick_prepare_packed(vq))) {
2063 			virtqueue_notify(vq);
2064 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2065 		}
2066 	}
2067 
2068 	return nb_tx;
2069 }
2070 
2071 uint16_t
2072 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2073 {
2074 	struct virtnet_tx *txvq = tx_queue;
2075 	struct virtqueue *vq = txvq->vq;
2076 	struct virtio_hw *hw = vq->hw;
2077 	uint16_t hdr_size = hw->vtnet_hdr_size;
2078 	uint16_t nb_used, nb_tx = 0;
2079 
2080 	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2081 		return nb_tx;
2082 
2083 	if (unlikely(nb_pkts < 1))
2084 		return nb_pkts;
2085 
2086 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2087 	nb_used = VIRTQUEUE_NUSED(vq);
2088 
2089 	virtio_rmb(hw->weak_barriers);
2090 	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2091 		virtio_xmit_cleanup(vq, nb_used);
2092 
2093 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2094 		struct rte_mbuf *txm = tx_pkts[nb_tx];
2095 		int can_push = 0, use_indirect = 0, slots, need;
2096 
2097 		/* optimize ring usage */
2098 		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2099 		      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2100 		    rte_mbuf_refcnt_read(txm) == 1 &&
2101 		    RTE_MBUF_DIRECT(txm) &&
2102 		    txm->nb_segs == 1 &&
2103 		    rte_pktmbuf_headroom(txm) >= hdr_size &&
2104 		    rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2105 				   __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2106 			can_push = 1;
2107 		else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2108 			 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2109 			use_indirect = 1;
2110 
2111 		/* How many main ring entries are needed to this Tx?
2112 		 * any_layout => number of segments
2113 		 * indirect   => 1
2114 		 * default    => number of segments + 1
2115 		 */
2116 		slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2117 		need = slots - vq->vq_free_cnt;
2118 
2119 		/* Positive value indicates it need free vring descriptors */
2120 		if (unlikely(need > 0)) {
2121 			nb_used = VIRTQUEUE_NUSED(vq);
2122 			virtio_rmb(hw->weak_barriers);
2123 			need = RTE_MIN(need, (int)nb_used);
2124 
2125 			virtio_xmit_cleanup(vq, need);
2126 			need = slots - vq->vq_free_cnt;
2127 			if (unlikely(need > 0)) {
2128 				PMD_TX_LOG(ERR,
2129 					   "No free tx descriptors to transmit");
2130 				break;
2131 			}
2132 		}
2133 
2134 		/* Enqueue Packet buffers */
2135 		virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2136 			can_push, 0);
2137 
2138 		virtio_update_packet_stats(&txvq->stats, txm);
2139 	}
2140 
2141 	txvq->stats.packets += nb_tx;
2142 
2143 	if (likely(nb_tx)) {
2144 		vq_update_avail_idx(vq);
2145 
2146 		if (unlikely(virtqueue_kick_prepare(vq))) {
2147 			virtqueue_notify(vq);
2148 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2149 		}
2150 	}
2151 
2152 	return nb_tx;
2153 }
2154 
2155 static __rte_always_inline int
2156 virtio_xmit_try_cleanup_inorder(struct virtqueue *vq, uint16_t need)
2157 {
2158 	uint16_t nb_used, nb_clean, nb_descs;
2159 	struct virtio_hw *hw = vq->hw;
2160 
2161 	nb_descs = vq->vq_free_cnt + need;
2162 	nb_used = VIRTQUEUE_NUSED(vq);
2163 	virtio_rmb(hw->weak_barriers);
2164 	nb_clean = RTE_MIN(need, (int)nb_used);
2165 
2166 	virtio_xmit_cleanup_inorder(vq, nb_clean);
2167 
2168 	return nb_descs - vq->vq_free_cnt;
2169 }
2170 
2171 uint16_t
2172 virtio_xmit_pkts_inorder(void *tx_queue,
2173 			struct rte_mbuf **tx_pkts,
2174 			uint16_t nb_pkts)
2175 {
2176 	struct virtnet_tx *txvq = tx_queue;
2177 	struct virtqueue *vq = txvq->vq;
2178 	struct virtio_hw *hw = vq->hw;
2179 	uint16_t hdr_size = hw->vtnet_hdr_size;
2180 	uint16_t nb_used, nb_tx = 0, nb_inorder_pkts = 0;
2181 	struct rte_mbuf *inorder_pkts[nb_pkts];
2182 	int need;
2183 
2184 	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2185 		return nb_tx;
2186 
2187 	if (unlikely(nb_pkts < 1))
2188 		return nb_pkts;
2189 
2190 	VIRTQUEUE_DUMP(vq);
2191 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2192 	nb_used = VIRTQUEUE_NUSED(vq);
2193 
2194 	virtio_rmb(hw->weak_barriers);
2195 	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2196 		virtio_xmit_cleanup_inorder(vq, nb_used);
2197 
2198 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2199 		struct rte_mbuf *txm = tx_pkts[nb_tx];
2200 		int slots;
2201 
2202 		/* optimize ring usage */
2203 		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2204 		     vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2205 		     rte_mbuf_refcnt_read(txm) == 1 &&
2206 		     RTE_MBUF_DIRECT(txm) &&
2207 		     txm->nb_segs == 1 &&
2208 		     rte_pktmbuf_headroom(txm) >= hdr_size &&
2209 		     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2210 				__alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2211 			inorder_pkts[nb_inorder_pkts] = txm;
2212 			nb_inorder_pkts++;
2213 
2214 			continue;
2215 		}
2216 
2217 		if (nb_inorder_pkts) {
2218 			need = nb_inorder_pkts - vq->vq_free_cnt;
2219 			if (unlikely(need > 0)) {
2220 				need = virtio_xmit_try_cleanup_inorder(vq,
2221 								       need);
2222 				if (unlikely(need > 0)) {
2223 					PMD_TX_LOG(ERR,
2224 						"No free tx descriptors to "
2225 						"transmit");
2226 					break;
2227 				}
2228 			}
2229 			virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2230 							nb_inorder_pkts);
2231 			nb_inorder_pkts = 0;
2232 		}
2233 
2234 		slots = txm->nb_segs + 1;
2235 		need = slots - vq->vq_free_cnt;
2236 		if (unlikely(need > 0)) {
2237 			need = virtio_xmit_try_cleanup_inorder(vq, slots);
2238 
2239 			if (unlikely(need > 0)) {
2240 				PMD_TX_LOG(ERR,
2241 					"No free tx descriptors to transmit");
2242 				break;
2243 			}
2244 		}
2245 		/* Enqueue Packet buffers */
2246 		virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2247 
2248 		virtio_update_packet_stats(&txvq->stats, txm);
2249 	}
2250 
2251 	/* Transmit all inorder packets */
2252 	if (nb_inorder_pkts) {
2253 		need = nb_inorder_pkts - vq->vq_free_cnt;
2254 		if (unlikely(need > 0)) {
2255 			need = virtio_xmit_try_cleanup_inorder(vq,
2256 								  need);
2257 			if (unlikely(need > 0)) {
2258 				PMD_TX_LOG(ERR,
2259 					"No free tx descriptors to transmit");
2260 				nb_inorder_pkts = vq->vq_free_cnt;
2261 				nb_tx -= need;
2262 			}
2263 		}
2264 
2265 		virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2266 						nb_inorder_pkts);
2267 	}
2268 
2269 	txvq->stats.packets += nb_tx;
2270 
2271 	if (likely(nb_tx)) {
2272 		vq_update_avail_idx(vq);
2273 
2274 		if (unlikely(virtqueue_kick_prepare(vq))) {
2275 			virtqueue_notify(vq);
2276 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2277 		}
2278 	}
2279 
2280 	VIRTQUEUE_DUMP(vq);
2281 
2282 	return nb_tx;
2283 }
2284