xref: /dpdk/drivers/net/virtio/virtio_rxtx.c (revision a76290c8f1cf9c4774c23592921302a04a90bded)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10 
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27 
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35 
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41 
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45 	struct virtnet_rx *rxvq = rxq;
46 	struct virtqueue *vq = rxvq->vq;
47 
48 	return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50 
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54 	vq->vq_free_cnt += num;
55 	vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57 
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61 	struct vring_desc *dp, *dp_tail;
62 	struct vq_desc_extra *dxp;
63 	uint16_t desc_idx_last = desc_idx;
64 
65 	dp  = &vq->vq_ring.desc[desc_idx];
66 	dxp = &vq->vq_descx[desc_idx];
67 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68 	if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69 		while (dp->flags & VRING_DESC_F_NEXT) {
70 			desc_idx_last = dp->next;
71 			dp = &vq->vq_ring.desc[dp->next];
72 		}
73 	}
74 	dxp->ndescs = 0;
75 
76 	/*
77 	 * We must append the existing free chain, if any, to the end of
78 	 * newly freed chain. If the virtqueue was completely used, then
79 	 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80 	 */
81 	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82 		vq->vq_desc_head_idx = desc_idx;
83 	} else {
84 		dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
85 		dp_tail->next = desc_idx;
86 	}
87 
88 	vq->vq_desc_tail_idx = desc_idx_last;
89 	dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91 
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95 	struct vq_desc_extra *dxp;
96 
97 	dxp = &vq->vq_descx[id];
98 	vq->vq_free_cnt += dxp->ndescs;
99 
100 	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101 		vq->vq_desc_head_idx = id;
102 	else
103 		vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104 
105 	vq->vq_desc_tail_idx = id;
106 	dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108 
109 static uint16_t
110 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
111 				  struct rte_mbuf **rx_pkts,
112 				  uint32_t *len,
113 				  uint16_t num)
114 {
115 	struct rte_mbuf *cookie;
116 	uint16_t used_idx;
117 	uint16_t id;
118 	struct vring_packed_desc *desc;
119 	uint16_t i;
120 
121 	desc = vq->ring_packed.desc_packed;
122 
123 	for (i = 0; i < num; i++) {
124 		used_idx = vq->vq_used_cons_idx;
125 		if (!desc_is_used(&desc[used_idx], vq))
126 			return i;
127 		len[i] = desc[used_idx].len;
128 		id = desc[used_idx].id;
129 		cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
130 		if (unlikely(cookie == NULL)) {
131 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
132 				vq->vq_used_cons_idx);
133 			break;
134 		}
135 		rte_prefetch0(cookie);
136 		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
137 		rx_pkts[i] = cookie;
138 
139 		vq->vq_free_cnt++;
140 		vq->vq_used_cons_idx++;
141 		if (vq->vq_used_cons_idx >= vq->vq_nentries) {
142 			vq->vq_used_cons_idx -= vq->vq_nentries;
143 			vq->used_wrap_counter ^= 1;
144 		}
145 	}
146 
147 	return i;
148 }
149 
150 static uint16_t
151 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
152 			   uint32_t *len, uint16_t num)
153 {
154 	struct vring_used_elem *uep;
155 	struct rte_mbuf *cookie;
156 	uint16_t used_idx, desc_idx;
157 	uint16_t i;
158 
159 	/*  Caller does the check */
160 	for (i = 0; i < num ; i++) {
161 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
162 		uep = &vq->vq_ring.used->ring[used_idx];
163 		desc_idx = (uint16_t) uep->id;
164 		len[i] = uep->len;
165 		cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
166 
167 		if (unlikely(cookie == NULL)) {
168 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
169 				vq->vq_used_cons_idx);
170 			break;
171 		}
172 
173 		rte_prefetch0(cookie);
174 		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
175 		rx_pkts[i]  = cookie;
176 		vq->vq_used_cons_idx++;
177 		vq_ring_free_chain(vq, desc_idx);
178 		vq->vq_descx[desc_idx].cookie = NULL;
179 	}
180 
181 	return i;
182 }
183 
184 static uint16_t
185 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
186 			struct rte_mbuf **rx_pkts,
187 			uint32_t *len,
188 			uint16_t num)
189 {
190 	struct vring_used_elem *uep;
191 	struct rte_mbuf *cookie;
192 	uint16_t used_idx = 0;
193 	uint16_t i;
194 
195 	if (unlikely(num == 0))
196 		return 0;
197 
198 	for (i = 0; i < num; i++) {
199 		used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
200 		/* Desc idx same as used idx */
201 		uep = &vq->vq_ring.used->ring[used_idx];
202 		len[i] = uep->len;
203 		cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
204 
205 		if (unlikely(cookie == NULL)) {
206 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
207 				vq->vq_used_cons_idx);
208 			break;
209 		}
210 
211 		rte_prefetch0(cookie);
212 		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
213 		rx_pkts[i]  = cookie;
214 		vq->vq_used_cons_idx++;
215 		vq->vq_descx[used_idx].cookie = NULL;
216 	}
217 
218 	vq_ring_free_inorder(vq, used_idx, i);
219 	return i;
220 }
221 
222 #ifndef DEFAULT_TX_FREE_THRESH
223 #define DEFAULT_TX_FREE_THRESH 32
224 #endif
225 
226 /* Cleanup from completed transmits. */
227 static void
228 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num)
229 {
230 	uint16_t used_idx, id;
231 	uint16_t size = vq->vq_nentries;
232 	struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
233 	struct vq_desc_extra *dxp;
234 
235 	used_idx = vq->vq_used_cons_idx;
236 	while (num-- && desc_is_used(&desc[used_idx], vq)) {
237 		used_idx = vq->vq_used_cons_idx;
238 		id = desc[used_idx].id;
239 		dxp = &vq->vq_descx[id];
240 		vq->vq_used_cons_idx += dxp->ndescs;
241 		if (vq->vq_used_cons_idx >= size) {
242 			vq->vq_used_cons_idx -= size;
243 			vq->used_wrap_counter ^= 1;
244 		}
245 		vq_ring_free_id_packed(vq, id);
246 		if (dxp->cookie != NULL) {
247 			rte_pktmbuf_free(dxp->cookie);
248 			dxp->cookie = NULL;
249 		}
250 		used_idx = vq->vq_used_cons_idx;
251 	}
252 }
253 
254 static void
255 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
256 {
257 	uint16_t i, used_idx, desc_idx;
258 	for (i = 0; i < num; i++) {
259 		struct vring_used_elem *uep;
260 		struct vq_desc_extra *dxp;
261 
262 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
263 		uep = &vq->vq_ring.used->ring[used_idx];
264 
265 		desc_idx = (uint16_t) uep->id;
266 		dxp = &vq->vq_descx[desc_idx];
267 		vq->vq_used_cons_idx++;
268 		vq_ring_free_chain(vq, desc_idx);
269 
270 		if (dxp->cookie != NULL) {
271 			rte_pktmbuf_free(dxp->cookie);
272 			dxp->cookie = NULL;
273 		}
274 	}
275 }
276 
277 /* Cleanup from completed inorder transmits. */
278 static void
279 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
280 {
281 	uint16_t i, used_idx, desc_idx = 0, last_idx;
282 	int16_t free_cnt = 0;
283 	struct vq_desc_extra *dxp = NULL;
284 
285 	if (unlikely(num == 0))
286 		return;
287 
288 	for (i = 0; i < num; i++) {
289 		struct vring_used_elem *uep;
290 
291 		used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
292 		uep = &vq->vq_ring.used->ring[used_idx];
293 		desc_idx = (uint16_t)uep->id;
294 
295 		dxp = &vq->vq_descx[desc_idx];
296 		vq->vq_used_cons_idx++;
297 
298 		if (dxp->cookie != NULL) {
299 			rte_pktmbuf_free(dxp->cookie);
300 			dxp->cookie = NULL;
301 		}
302 	}
303 
304 	last_idx = desc_idx + dxp->ndescs - 1;
305 	free_cnt = last_idx - vq->vq_desc_tail_idx;
306 	if (free_cnt <= 0)
307 		free_cnt += vq->vq_nentries;
308 
309 	vq_ring_free_inorder(vq, last_idx, free_cnt);
310 }
311 
312 static inline int
313 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
314 			struct rte_mbuf **cookies,
315 			uint16_t num)
316 {
317 	struct vq_desc_extra *dxp;
318 	struct virtio_hw *hw = vq->hw;
319 	struct vring_desc *start_dp;
320 	uint16_t head_idx, idx, i = 0;
321 
322 	if (unlikely(vq->vq_free_cnt == 0))
323 		return -ENOSPC;
324 	if (unlikely(vq->vq_free_cnt < num))
325 		return -EMSGSIZE;
326 
327 	head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
328 	start_dp = vq->vq_ring.desc;
329 
330 	while (i < num) {
331 		idx = head_idx & (vq->vq_nentries - 1);
332 		dxp = &vq->vq_descx[idx];
333 		dxp->cookie = (void *)cookies[i];
334 		dxp->ndescs = 1;
335 
336 		start_dp[idx].addr =
337 				VIRTIO_MBUF_ADDR(cookies[i], vq) +
338 				RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
339 		start_dp[idx].len =
340 				cookies[i]->buf_len -
341 				RTE_PKTMBUF_HEADROOM +
342 				hw->vtnet_hdr_size;
343 		start_dp[idx].flags =  VRING_DESC_F_WRITE;
344 
345 		vq_update_avail_ring(vq, idx);
346 		head_idx++;
347 		i++;
348 	}
349 
350 	vq->vq_desc_head_idx += num;
351 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
352 	return 0;
353 }
354 
355 static inline int
356 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
357 {
358 	struct vq_desc_extra *dxp;
359 	struct virtio_hw *hw = vq->hw;
360 	struct vring_desc *start_dp;
361 	uint16_t needed = 1;
362 	uint16_t head_idx, idx;
363 
364 	if (unlikely(vq->vq_free_cnt == 0))
365 		return -ENOSPC;
366 	if (unlikely(vq->vq_free_cnt < needed))
367 		return -EMSGSIZE;
368 
369 	head_idx = vq->vq_desc_head_idx;
370 	if (unlikely(head_idx >= vq->vq_nentries))
371 		return -EFAULT;
372 
373 	idx = head_idx;
374 	dxp = &vq->vq_descx[idx];
375 	dxp->cookie = (void *)cookie;
376 	dxp->ndescs = needed;
377 
378 	start_dp = vq->vq_ring.desc;
379 	start_dp[idx].addr =
380 		VIRTIO_MBUF_ADDR(cookie, vq) +
381 		RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
382 	start_dp[idx].len =
383 		cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
384 	start_dp[idx].flags =  VRING_DESC_F_WRITE;
385 	idx = start_dp[idx].next;
386 	vq->vq_desc_head_idx = idx;
387 	if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
388 		vq->vq_desc_tail_idx = idx;
389 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
390 	vq_update_avail_ring(vq, head_idx);
391 
392 	return 0;
393 }
394 
395 static inline int
396 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
397 				     struct rte_mbuf **cookie, uint16_t num)
398 {
399 	struct vring_packed_desc *start_dp = vq->ring_packed.desc_packed;
400 	uint16_t flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
401 	struct virtio_hw *hw = vq->hw;
402 	struct vq_desc_extra *dxp;
403 	uint16_t idx;
404 	int i;
405 
406 	if (unlikely(vq->vq_free_cnt == 0))
407 		return -ENOSPC;
408 	if (unlikely(vq->vq_free_cnt < num))
409 		return -EMSGSIZE;
410 
411 	for (i = 0; i < num; i++) {
412 		idx = vq->vq_avail_idx;
413 		dxp = &vq->vq_descx[idx];
414 		dxp->cookie = (void *)cookie[i];
415 		dxp->ndescs = 1;
416 
417 		start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
418 				RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
419 		start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
420 					+ hw->vtnet_hdr_size;
421 
422 		vq->vq_desc_head_idx = dxp->next;
423 		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
424 			vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
425 		rte_smp_wmb();
426 		start_dp[idx].flags = flags;
427 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
428 			vq->vq_avail_idx -= vq->vq_nentries;
429 			vq->avail_wrap_counter ^= 1;
430 			vq->avail_used_flags =
431 				VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
432 				VRING_DESC_F_USED(!vq->avail_wrap_counter);
433 			flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
434 		}
435 	}
436 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
437 	return 0;
438 }
439 
440 /* When doing TSO, the IP length is not included in the pseudo header
441  * checksum of the packet given to the PMD, but for virtio it is
442  * expected.
443  */
444 static void
445 virtio_tso_fix_cksum(struct rte_mbuf *m)
446 {
447 	/* common case: header is not fragmented */
448 	if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
449 			m->l4_len)) {
450 		struct ipv4_hdr *iph;
451 		struct ipv6_hdr *ip6h;
452 		struct tcp_hdr *th;
453 		uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
454 		uint32_t tmp;
455 
456 		iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
457 		th = RTE_PTR_ADD(iph, m->l3_len);
458 		if ((iph->version_ihl >> 4) == 4) {
459 			iph->hdr_checksum = 0;
460 			iph->hdr_checksum = rte_ipv4_cksum(iph);
461 			ip_len = iph->total_length;
462 			ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
463 				m->l3_len);
464 		} else {
465 			ip6h = (struct ipv6_hdr *)iph;
466 			ip_paylen = ip6h->payload_len;
467 		}
468 
469 		/* calculate the new phdr checksum not including ip_paylen */
470 		prev_cksum = th->cksum;
471 		tmp = prev_cksum;
472 		tmp += ip_paylen;
473 		tmp = (tmp & 0xffff) + (tmp >> 16);
474 		new_cksum = tmp;
475 
476 		/* replace it in the packet */
477 		th->cksum = new_cksum;
478 	}
479 }
480 
481 
482 /* avoid write operation when necessary, to lessen cache issues */
483 #define ASSIGN_UNLESS_EQUAL(var, val) do {	\
484 	if ((var) != (val))			\
485 		(var) = (val);			\
486 } while (0)
487 
488 static inline void
489 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
490 			struct rte_mbuf *cookie,
491 			bool offload)
492 {
493 	if (offload) {
494 		if (cookie->ol_flags & PKT_TX_TCP_SEG)
495 			cookie->ol_flags |= PKT_TX_TCP_CKSUM;
496 
497 		switch (cookie->ol_flags & PKT_TX_L4_MASK) {
498 		case PKT_TX_UDP_CKSUM:
499 			hdr->csum_start = cookie->l2_len + cookie->l3_len;
500 			hdr->csum_offset = offsetof(struct udp_hdr,
501 				dgram_cksum);
502 			hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
503 			break;
504 
505 		case PKT_TX_TCP_CKSUM:
506 			hdr->csum_start = cookie->l2_len + cookie->l3_len;
507 			hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
508 			hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
509 			break;
510 
511 		default:
512 			ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
513 			ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
514 			ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
515 			break;
516 		}
517 
518 		/* TCP Segmentation Offload */
519 		if (cookie->ol_flags & PKT_TX_TCP_SEG) {
520 			virtio_tso_fix_cksum(cookie);
521 			hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
522 				VIRTIO_NET_HDR_GSO_TCPV6 :
523 				VIRTIO_NET_HDR_GSO_TCPV4;
524 			hdr->gso_size = cookie->tso_segsz;
525 			hdr->hdr_len =
526 				cookie->l2_len +
527 				cookie->l3_len +
528 				cookie->l4_len;
529 		} else {
530 			ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
531 			ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
532 			ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
533 		}
534 	}
535 }
536 
537 static inline void
538 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
539 			struct rte_mbuf **cookies,
540 			uint16_t num)
541 {
542 	struct vq_desc_extra *dxp;
543 	struct virtqueue *vq = txvq->vq;
544 	struct vring_desc *start_dp;
545 	struct virtio_net_hdr *hdr;
546 	uint16_t idx;
547 	uint16_t head_size = vq->hw->vtnet_hdr_size;
548 	uint16_t i = 0;
549 
550 	idx = vq->vq_desc_head_idx;
551 	start_dp = vq->vq_ring.desc;
552 
553 	while (i < num) {
554 		idx = idx & (vq->vq_nentries - 1);
555 		dxp = &vq->vq_descx[idx];
556 		dxp->cookie = (void *)cookies[i];
557 		dxp->ndescs = 1;
558 
559 		hdr = (struct virtio_net_hdr *)
560 			rte_pktmbuf_prepend(cookies[i], head_size);
561 		cookies[i]->pkt_len -= head_size;
562 
563 		/* if offload disabled, it is not zeroed below, do it now */
564 		if (!vq->hw->has_tx_offload) {
565 			ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
566 			ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
567 			ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
568 			ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
569 			ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
570 			ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
571 		}
572 
573 		virtqueue_xmit_offload(hdr, cookies[i],
574 				vq->hw->has_tx_offload);
575 
576 		start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
577 		start_dp[idx].len   = cookies[i]->data_len;
578 		start_dp[idx].flags = 0;
579 
580 		vq_update_avail_ring(vq, idx);
581 
582 		idx++;
583 		i++;
584 	};
585 
586 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
587 	vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
588 }
589 
590 static inline void
591 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
592 			      uint16_t needed, int can_push)
593 {
594 	struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
595 	struct vq_desc_extra *dxp;
596 	struct virtqueue *vq = txvq->vq;
597 	struct vring_packed_desc *start_dp, *head_dp;
598 	uint16_t idx, id, head_idx, head_flags;
599 	uint16_t head_size = vq->hw->vtnet_hdr_size;
600 	struct virtio_net_hdr *hdr;
601 	uint16_t prev;
602 
603 	id = vq->vq_desc_head_idx;
604 
605 	dxp = &vq->vq_descx[id];
606 	dxp->ndescs = needed;
607 	dxp->cookie = cookie;
608 
609 	head_idx = vq->vq_avail_idx;
610 	idx = head_idx;
611 	prev = head_idx;
612 	start_dp = vq->ring_packed.desc_packed;
613 
614 	head_dp = &vq->ring_packed.desc_packed[idx];
615 	head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
616 	head_flags |= vq->avail_used_flags;
617 
618 	if (can_push) {
619 		/* prepend cannot fail, checked by caller */
620 		hdr = (struct virtio_net_hdr *)
621 			rte_pktmbuf_prepend(cookie, head_size);
622 		/* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
623 		 * which is wrong. Below subtract restores correct pkt size.
624 		 */
625 		cookie->pkt_len -= head_size;
626 
627 		/* if offload disabled, it is not zeroed below, do it now */
628 		if (!vq->hw->has_tx_offload) {
629 			ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
630 			ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
631 			ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
632 			ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
633 			ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
634 			ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
635 		}
636 	} else {
637 		/* setup first tx ring slot to point to header
638 		 * stored in reserved region.
639 		 */
640 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
641 			RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
642 		start_dp[idx].len   = vq->hw->vtnet_hdr_size;
643 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
644 		idx++;
645 		if (idx >= vq->vq_nentries) {
646 			idx -= vq->vq_nentries;
647 			vq->avail_wrap_counter ^= 1;
648 			vq->avail_used_flags =
649 				VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
650 				VRING_DESC_F_USED(!vq->avail_wrap_counter);
651 		}
652 	}
653 
654 	virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
655 
656 	do {
657 		uint16_t flags;
658 
659 		start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
660 		start_dp[idx].len  = cookie->data_len;
661 		if (likely(idx != head_idx)) {
662 			flags = cookie->next ? VRING_DESC_F_NEXT : 0;
663 			flags |= vq->avail_used_flags;
664 			start_dp[idx].flags = flags;
665 		}
666 		prev = idx;
667 		idx++;
668 		if (idx >= vq->vq_nentries) {
669 			idx -= vq->vq_nentries;
670 			vq->avail_wrap_counter ^= 1;
671 			vq->avail_used_flags =
672 				VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
673 				VRING_DESC_F_USED(!vq->avail_wrap_counter);
674 		}
675 	} while ((cookie = cookie->next) != NULL);
676 
677 	start_dp[prev].id = id;
678 
679 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
680 
681 	vq->vq_desc_head_idx = dxp->next;
682 	if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
683 		vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
684 
685 	vq->vq_avail_idx = idx;
686 
687 	rte_smp_wmb();
688 	head_dp->flags = head_flags;
689 }
690 
691 static inline void
692 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
693 			uint16_t needed, int use_indirect, int can_push,
694 			int in_order)
695 {
696 	struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
697 	struct vq_desc_extra *dxp;
698 	struct virtqueue *vq = txvq->vq;
699 	struct vring_desc *start_dp;
700 	uint16_t seg_num = cookie->nb_segs;
701 	uint16_t head_idx, idx;
702 	uint16_t head_size = vq->hw->vtnet_hdr_size;
703 	struct virtio_net_hdr *hdr;
704 
705 	head_idx = vq->vq_desc_head_idx;
706 	idx = head_idx;
707 	dxp = &vq->vq_descx[idx];
708 	dxp->cookie = (void *)cookie;
709 	dxp->ndescs = needed;
710 
711 	start_dp = vq->vq_ring.desc;
712 
713 	if (can_push) {
714 		/* prepend cannot fail, checked by caller */
715 		hdr = (struct virtio_net_hdr *)
716 			rte_pktmbuf_prepend(cookie, head_size);
717 		/* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
718 		 * which is wrong. Below subtract restores correct pkt size.
719 		 */
720 		cookie->pkt_len -= head_size;
721 
722 		/* if offload disabled, it is not zeroed below, do it now */
723 		if (!vq->hw->has_tx_offload) {
724 			ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
725 			ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
726 			ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
727 			ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
728 			ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
729 			ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
730 		}
731 	} else if (use_indirect) {
732 		/* setup tx ring slot to point to indirect
733 		 * descriptor list stored in reserved region.
734 		 *
735 		 * the first slot in indirect ring is already preset
736 		 * to point to the header in reserved region
737 		 */
738 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
739 			RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
740 		start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
741 		start_dp[idx].flags = VRING_DESC_F_INDIRECT;
742 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
743 
744 		/* loop below will fill in rest of the indirect elements */
745 		start_dp = txr[idx].tx_indir;
746 		idx = 1;
747 	} else {
748 		/* setup first tx ring slot to point to header
749 		 * stored in reserved region.
750 		 */
751 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
752 			RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
753 		start_dp[idx].len   = vq->hw->vtnet_hdr_size;
754 		start_dp[idx].flags = VRING_DESC_F_NEXT;
755 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
756 
757 		idx = start_dp[idx].next;
758 	}
759 
760 	virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
761 
762 	do {
763 		start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
764 		start_dp[idx].len   = cookie->data_len;
765 		start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
766 		idx = start_dp[idx].next;
767 	} while ((cookie = cookie->next) != NULL);
768 
769 	if (use_indirect)
770 		idx = vq->vq_ring.desc[head_idx].next;
771 
772 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
773 
774 	vq->vq_desc_head_idx = idx;
775 	vq_update_avail_ring(vq, head_idx);
776 
777 	if (!in_order) {
778 		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
779 			vq->vq_desc_tail_idx = idx;
780 	}
781 }
782 
783 void
784 virtio_dev_cq_start(struct rte_eth_dev *dev)
785 {
786 	struct virtio_hw *hw = dev->data->dev_private;
787 
788 	if (hw->cvq && hw->cvq->vq) {
789 		rte_spinlock_init(&hw->cvq->lock);
790 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
791 	}
792 }
793 
794 int
795 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
796 			uint16_t queue_idx,
797 			uint16_t nb_desc,
798 			unsigned int socket_id __rte_unused,
799 			const struct rte_eth_rxconf *rx_conf __rte_unused,
800 			struct rte_mempool *mp)
801 {
802 	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
803 	struct virtio_hw *hw = dev->data->dev_private;
804 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
805 	struct virtnet_rx *rxvq;
806 
807 	PMD_INIT_FUNC_TRACE();
808 
809 	if (nb_desc == 0 || nb_desc > vq->vq_nentries)
810 		nb_desc = vq->vq_nentries;
811 	vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
812 
813 	rxvq = &vq->rxq;
814 	rxvq->queue_id = queue_idx;
815 	rxvq->mpool = mp;
816 	if (rxvq->mpool == NULL) {
817 		rte_exit(EXIT_FAILURE,
818 			"Cannot allocate mbufs for rx virtqueue");
819 	}
820 
821 	dev->data->rx_queues[queue_idx] = rxvq;
822 
823 	return 0;
824 }
825 
826 int
827 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
828 {
829 	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
830 	struct virtio_hw *hw = dev->data->dev_private;
831 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
832 	struct virtnet_rx *rxvq = &vq->rxq;
833 	struct rte_mbuf *m;
834 	uint16_t desc_idx;
835 	int error, nbufs, i;
836 
837 	PMD_INIT_FUNC_TRACE();
838 
839 	/* Allocate blank mbufs for the each rx descriptor */
840 	nbufs = 0;
841 
842 	if (hw->use_simple_rx) {
843 		for (desc_idx = 0; desc_idx < vq->vq_nentries;
844 		     desc_idx++) {
845 			vq->vq_ring.avail->ring[desc_idx] = desc_idx;
846 			vq->vq_ring.desc[desc_idx].flags =
847 				VRING_DESC_F_WRITE;
848 		}
849 
850 		virtio_rxq_vec_setup(rxvq);
851 	}
852 
853 	memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
854 	for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
855 	     desc_idx++) {
856 		vq->sw_ring[vq->vq_nentries + desc_idx] =
857 			&rxvq->fake_mbuf;
858 	}
859 
860 	if (hw->use_simple_rx) {
861 		while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
862 			virtio_rxq_rearm_vec(rxvq);
863 			nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
864 		}
865 	} else if (hw->use_inorder_rx) {
866 		if ((!virtqueue_full(vq))) {
867 			uint16_t free_cnt = vq->vq_free_cnt;
868 			struct rte_mbuf *pkts[free_cnt];
869 
870 			if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
871 				free_cnt)) {
872 				error = virtqueue_enqueue_refill_inorder(vq,
873 						pkts,
874 						free_cnt);
875 				if (unlikely(error)) {
876 					for (i = 0; i < free_cnt; i++)
877 						rte_pktmbuf_free(pkts[i]);
878 				}
879 			}
880 
881 			nbufs += free_cnt;
882 			vq_update_avail_idx(vq);
883 		}
884 	} else {
885 		while (!virtqueue_full(vq)) {
886 			m = rte_mbuf_raw_alloc(rxvq->mpool);
887 			if (m == NULL)
888 				break;
889 
890 			/* Enqueue allocated buffers */
891 			if (vtpci_packed_queue(vq->hw))
892 				error = virtqueue_enqueue_recv_refill_packed(vq,
893 						&m, 1);
894 			else
895 				error = virtqueue_enqueue_recv_refill(vq, m);
896 			if (error) {
897 				rte_pktmbuf_free(m);
898 				break;
899 			}
900 			nbufs++;
901 		}
902 
903 		if (!vtpci_packed_queue(vq->hw))
904 			vq_update_avail_idx(vq);
905 	}
906 
907 	PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
908 
909 	VIRTQUEUE_DUMP(vq);
910 
911 	return 0;
912 }
913 
914 /*
915  * struct rte_eth_dev *dev: Used to update dev
916  * uint16_t nb_desc: Defaults to values read from config space
917  * unsigned int socket_id: Used to allocate memzone
918  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
919  * uint16_t queue_idx: Just used as an index in dev txq list
920  */
921 int
922 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
923 			uint16_t queue_idx,
924 			uint16_t nb_desc,
925 			unsigned int socket_id __rte_unused,
926 			const struct rte_eth_txconf *tx_conf)
927 {
928 	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
929 	struct virtio_hw *hw = dev->data->dev_private;
930 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
931 	struct virtnet_tx *txvq;
932 	uint16_t tx_free_thresh;
933 
934 	PMD_INIT_FUNC_TRACE();
935 
936 	if (nb_desc == 0 || nb_desc > vq->vq_nentries)
937 		nb_desc = vq->vq_nentries;
938 	vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
939 
940 	txvq = &vq->txq;
941 	txvq->queue_id = queue_idx;
942 
943 	tx_free_thresh = tx_conf->tx_free_thresh;
944 	if (tx_free_thresh == 0)
945 		tx_free_thresh =
946 			RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
947 
948 	if (tx_free_thresh >= (vq->vq_nentries - 3)) {
949 		RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
950 			"number of TX entries minus 3 (%u)."
951 			" (tx_free_thresh=%u port=%u queue=%u)\n",
952 			vq->vq_nentries - 3,
953 			tx_free_thresh, dev->data->port_id, queue_idx);
954 		return -EINVAL;
955 	}
956 
957 	vq->vq_free_thresh = tx_free_thresh;
958 
959 	dev->data->tx_queues[queue_idx] = txvq;
960 	return 0;
961 }
962 
963 int
964 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
965 				uint16_t queue_idx)
966 {
967 	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
968 	struct virtio_hw *hw = dev->data->dev_private;
969 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
970 
971 	PMD_INIT_FUNC_TRACE();
972 
973 	if (!vtpci_packed_queue(hw)) {
974 		if (hw->use_inorder_tx)
975 			vq->vq_ring.desc[vq->vq_nentries - 1].next = 0;
976 	}
977 
978 	VIRTQUEUE_DUMP(vq);
979 
980 	return 0;
981 }
982 
983 static void
984 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
985 {
986 	int error;
987 	/*
988 	 * Requeue the discarded mbuf. This should always be
989 	 * successful since it was just dequeued.
990 	 */
991 	if (vtpci_packed_queue(vq->hw))
992 		error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
993 	else
994 		error = virtqueue_enqueue_recv_refill(vq, m);
995 
996 	if (unlikely(error)) {
997 		RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
998 		rte_pktmbuf_free(m);
999 	}
1000 }
1001 
1002 static void
1003 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1004 {
1005 	int error;
1006 
1007 	error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1008 	if (unlikely(error)) {
1009 		RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1010 		rte_pktmbuf_free(m);
1011 	}
1012 }
1013 
1014 static void
1015 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
1016 {
1017 	uint32_t s = mbuf->pkt_len;
1018 	struct ether_addr *ea;
1019 
1020 	stats->bytes += s;
1021 
1022 	if (s == 64) {
1023 		stats->size_bins[1]++;
1024 	} else if (s > 64 && s < 1024) {
1025 		uint32_t bin;
1026 
1027 		/* count zeros, and offset into correct bin */
1028 		bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
1029 		stats->size_bins[bin]++;
1030 	} else {
1031 		if (s < 64)
1032 			stats->size_bins[0]++;
1033 		else if (s < 1519)
1034 			stats->size_bins[6]++;
1035 		else if (s >= 1519)
1036 			stats->size_bins[7]++;
1037 	}
1038 
1039 	ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
1040 	if (is_multicast_ether_addr(ea)) {
1041 		if (is_broadcast_ether_addr(ea))
1042 			stats->broadcast++;
1043 		else
1044 			stats->multicast++;
1045 	}
1046 }
1047 
1048 static inline void
1049 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
1050 {
1051 	VIRTIO_DUMP_PACKET(m, m->data_len);
1052 
1053 	virtio_update_packet_stats(&rxvq->stats, m);
1054 }
1055 
1056 /* Optionally fill offload information in structure */
1057 static int
1058 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1059 {
1060 	struct rte_net_hdr_lens hdr_lens;
1061 	uint32_t hdrlen, ptype;
1062 	int l4_supported = 0;
1063 
1064 	/* nothing to do */
1065 	if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1066 		return 0;
1067 
1068 	m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1069 
1070 	ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1071 	m->packet_type = ptype;
1072 	if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1073 	    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1074 	    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1075 		l4_supported = 1;
1076 
1077 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1078 		hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1079 		if (hdr->csum_start <= hdrlen && l4_supported) {
1080 			m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1081 		} else {
1082 			/* Unknown proto or tunnel, do sw cksum. We can assume
1083 			 * the cksum field is in the first segment since the
1084 			 * buffers we provided to the host are large enough.
1085 			 * In case of SCTP, this will be wrong since it's a CRC
1086 			 * but there's nothing we can do.
1087 			 */
1088 			uint16_t csum = 0, off;
1089 
1090 			rte_raw_cksum_mbuf(m, hdr->csum_start,
1091 				rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1092 				&csum);
1093 			if (likely(csum != 0xffff))
1094 				csum = ~csum;
1095 			off = hdr->csum_offset + hdr->csum_start;
1096 			if (rte_pktmbuf_data_len(m) >= off + 1)
1097 				*rte_pktmbuf_mtod_offset(m, uint16_t *,
1098 					off) = csum;
1099 		}
1100 	} else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1101 		m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1102 	}
1103 
1104 	/* GSO request, save required information in mbuf */
1105 	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1106 		/* Check unsupported modes */
1107 		if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1108 		    (hdr->gso_size == 0)) {
1109 			return -EINVAL;
1110 		}
1111 
1112 		/* Update mss lengthes in mbuf */
1113 		m->tso_segsz = hdr->gso_size;
1114 		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1115 			case VIRTIO_NET_HDR_GSO_TCPV4:
1116 			case VIRTIO_NET_HDR_GSO_TCPV6:
1117 				m->ol_flags |= PKT_RX_LRO | \
1118 					PKT_RX_L4_CKSUM_NONE;
1119 				break;
1120 			default:
1121 				return -EINVAL;
1122 		}
1123 	}
1124 
1125 	return 0;
1126 }
1127 
1128 #define VIRTIO_MBUF_BURST_SZ 64
1129 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1130 uint16_t
1131 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1132 {
1133 	struct virtnet_rx *rxvq = rx_queue;
1134 	struct virtqueue *vq = rxvq->vq;
1135 	struct virtio_hw *hw = vq->hw;
1136 	struct rte_mbuf *rxm, *new_mbuf;
1137 	uint16_t nb_used, num, nb_rx;
1138 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1139 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1140 	int error;
1141 	uint32_t i, nb_enqueued;
1142 	uint32_t hdr_size;
1143 	struct virtio_net_hdr *hdr;
1144 
1145 	nb_rx = 0;
1146 	if (unlikely(hw->started == 0))
1147 		return nb_rx;
1148 
1149 	nb_used = VIRTQUEUE_NUSED(vq);
1150 
1151 	virtio_rmb();
1152 
1153 	num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1154 	if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1155 		num = VIRTIO_MBUF_BURST_SZ;
1156 	if (likely(num > DESC_PER_CACHELINE))
1157 		num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1158 
1159 	num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1160 	PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1161 
1162 	nb_enqueued = 0;
1163 	hdr_size = hw->vtnet_hdr_size;
1164 
1165 	for (i = 0; i < num ; i++) {
1166 		rxm = rcv_pkts[i];
1167 
1168 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1169 
1170 		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1171 			PMD_RX_LOG(ERR, "Packet drop");
1172 			nb_enqueued++;
1173 			virtio_discard_rxbuf(vq, rxm);
1174 			rxvq->stats.errors++;
1175 			continue;
1176 		}
1177 
1178 		rxm->port = rxvq->port_id;
1179 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1180 		rxm->ol_flags = 0;
1181 		rxm->vlan_tci = 0;
1182 
1183 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1184 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
1185 
1186 		hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1187 			RTE_PKTMBUF_HEADROOM - hdr_size);
1188 
1189 		if (hw->vlan_strip)
1190 			rte_vlan_strip(rxm);
1191 
1192 		if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1193 			virtio_discard_rxbuf(vq, rxm);
1194 			rxvq->stats.errors++;
1195 			continue;
1196 		}
1197 
1198 		virtio_rx_stats_updated(rxvq, rxm);
1199 
1200 		rx_pkts[nb_rx++] = rxm;
1201 	}
1202 
1203 	rxvq->stats.packets += nb_rx;
1204 
1205 	/* Allocate new mbuf for the used descriptor */
1206 	while (likely(!virtqueue_full(vq))) {
1207 		new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1208 		if (unlikely(new_mbuf == NULL)) {
1209 			struct rte_eth_dev *dev
1210 				= &rte_eth_devices[rxvq->port_id];
1211 			dev->data->rx_mbuf_alloc_failed++;
1212 			break;
1213 		}
1214 		error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
1215 		if (unlikely(error)) {
1216 			rte_pktmbuf_free(new_mbuf);
1217 			break;
1218 		}
1219 		nb_enqueued++;
1220 	}
1221 
1222 	if (likely(nb_enqueued)) {
1223 		vq_update_avail_idx(vq);
1224 
1225 		if (unlikely(virtqueue_kick_prepare(vq))) {
1226 			virtqueue_notify(vq);
1227 			PMD_RX_LOG(DEBUG, "Notified");
1228 		}
1229 	}
1230 
1231 	return nb_rx;
1232 }
1233 
1234 uint16_t
1235 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1236 			uint16_t nb_pkts)
1237 {
1238 	struct virtnet_rx *rxvq = rx_queue;
1239 	struct virtqueue *vq = rxvq->vq;
1240 	struct virtio_hw *hw = vq->hw;
1241 	struct rte_mbuf *rxm, *new_mbuf;
1242 	uint16_t num, nb_rx;
1243 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1244 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1245 	int error;
1246 	uint32_t i, nb_enqueued;
1247 	uint32_t hdr_size;
1248 	struct virtio_net_hdr *hdr;
1249 
1250 	nb_rx = 0;
1251 	if (unlikely(hw->started == 0))
1252 		return nb_rx;
1253 
1254 	num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1255 	if (likely(num > DESC_PER_CACHELINE))
1256 		num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1257 
1258 	num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1259 	PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1260 
1261 	nb_enqueued = 0;
1262 	hdr_size = hw->vtnet_hdr_size;
1263 
1264 	for (i = 0; i < num; i++) {
1265 		rxm = rcv_pkts[i];
1266 
1267 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1268 
1269 		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1270 			PMD_RX_LOG(ERR, "Packet drop");
1271 			nb_enqueued++;
1272 			virtio_discard_rxbuf(vq, rxm);
1273 			rxvq->stats.errors++;
1274 			continue;
1275 		}
1276 
1277 		rxm->port = rxvq->port_id;
1278 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1279 		rxm->ol_flags = 0;
1280 		rxm->vlan_tci = 0;
1281 
1282 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1283 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
1284 
1285 		hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1286 			RTE_PKTMBUF_HEADROOM - hdr_size);
1287 
1288 		if (hw->vlan_strip)
1289 			rte_vlan_strip(rxm);
1290 
1291 		if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1292 			virtio_discard_rxbuf(vq, rxm);
1293 			rxvq->stats.errors++;
1294 			continue;
1295 		}
1296 
1297 		virtio_rx_stats_updated(rxvq, rxm);
1298 
1299 		rx_pkts[nb_rx++] = rxm;
1300 	}
1301 
1302 	rxvq->stats.packets += nb_rx;
1303 
1304 	/* Allocate new mbuf for the used descriptor */
1305 	while (likely(!virtqueue_full(vq))) {
1306 		new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1307 		if (unlikely(new_mbuf == NULL)) {
1308 			struct rte_eth_dev *dev =
1309 				&rte_eth_devices[rxvq->port_id];
1310 			dev->data->rx_mbuf_alloc_failed++;
1311 			break;
1312 		}
1313 		error = virtqueue_enqueue_recv_refill_packed(vq, &new_mbuf, 1);
1314 		if (unlikely(error)) {
1315 			rte_pktmbuf_free(new_mbuf);
1316 			break;
1317 		}
1318 		nb_enqueued++;
1319 	}
1320 
1321 	if (likely(nb_enqueued)) {
1322 		if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1323 			virtqueue_notify(vq);
1324 			PMD_RX_LOG(DEBUG, "Notified");
1325 		}
1326 	}
1327 
1328 	return nb_rx;
1329 }
1330 
1331 
1332 uint16_t
1333 virtio_recv_mergeable_pkts_inorder(void *rx_queue,
1334 			struct rte_mbuf **rx_pkts,
1335 			uint16_t nb_pkts)
1336 {
1337 	struct virtnet_rx *rxvq = rx_queue;
1338 	struct virtqueue *vq = rxvq->vq;
1339 	struct virtio_hw *hw = vq->hw;
1340 	struct rte_mbuf *rxm;
1341 	struct rte_mbuf *prev;
1342 	uint16_t nb_used, num, nb_rx;
1343 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1344 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1345 	int error;
1346 	uint32_t nb_enqueued;
1347 	uint32_t seg_num;
1348 	uint32_t seg_res;
1349 	uint32_t hdr_size;
1350 	int32_t i;
1351 
1352 	nb_rx = 0;
1353 	if (unlikely(hw->started == 0))
1354 		return nb_rx;
1355 
1356 	nb_used = VIRTQUEUE_NUSED(vq);
1357 	nb_used = RTE_MIN(nb_used, nb_pkts);
1358 	nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1359 
1360 	virtio_rmb();
1361 
1362 	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1363 
1364 	nb_enqueued = 0;
1365 	seg_num = 1;
1366 	seg_res = 0;
1367 	hdr_size = hw->vtnet_hdr_size;
1368 
1369 	num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1370 
1371 	for (i = 0; i < num; i++) {
1372 		struct virtio_net_hdr_mrg_rxbuf *header;
1373 
1374 		PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1375 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1376 
1377 		rxm = rcv_pkts[i];
1378 
1379 		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1380 			PMD_RX_LOG(ERR, "Packet drop");
1381 			nb_enqueued++;
1382 			virtio_discard_rxbuf_inorder(vq, rxm);
1383 			rxvq->stats.errors++;
1384 			continue;
1385 		}
1386 
1387 		header = (struct virtio_net_hdr_mrg_rxbuf *)
1388 			 ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1389 			 - hdr_size);
1390 		seg_num = header->num_buffers;
1391 
1392 		if (seg_num == 0)
1393 			seg_num = 1;
1394 
1395 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1396 		rxm->nb_segs = seg_num;
1397 		rxm->ol_flags = 0;
1398 		rxm->vlan_tci = 0;
1399 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1400 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
1401 
1402 		rxm->port = rxvq->port_id;
1403 
1404 		rx_pkts[nb_rx] = rxm;
1405 		prev = rxm;
1406 
1407 		if (vq->hw->has_rx_offload &&
1408 				virtio_rx_offload(rxm, &header->hdr) < 0) {
1409 			virtio_discard_rxbuf_inorder(vq, rxm);
1410 			rxvq->stats.errors++;
1411 			continue;
1412 		}
1413 
1414 		if (hw->vlan_strip)
1415 			rte_vlan_strip(rx_pkts[nb_rx]);
1416 
1417 		seg_res = seg_num - 1;
1418 
1419 		/* Merge remaining segments */
1420 		while (seg_res != 0 && i < (num - 1)) {
1421 			i++;
1422 
1423 			rxm = rcv_pkts[i];
1424 			rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1425 			rxm->pkt_len = (uint32_t)(len[i]);
1426 			rxm->data_len = (uint16_t)(len[i]);
1427 
1428 			rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1429 			rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1430 
1431 			if (prev)
1432 				prev->next = rxm;
1433 
1434 			prev = rxm;
1435 			seg_res -= 1;
1436 		}
1437 
1438 		if (!seg_res) {
1439 			virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1440 			nb_rx++;
1441 		}
1442 	}
1443 
1444 	/* Last packet still need merge segments */
1445 	while (seg_res != 0) {
1446 		uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1447 					VIRTIO_MBUF_BURST_SZ);
1448 
1449 		prev = rcv_pkts[nb_rx];
1450 		if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1451 			num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1452 							   rcv_cnt);
1453 			uint16_t extra_idx = 0;
1454 
1455 			rcv_cnt = num;
1456 			while (extra_idx < rcv_cnt) {
1457 				rxm = rcv_pkts[extra_idx];
1458 				rxm->data_off =
1459 					RTE_PKTMBUF_HEADROOM - hdr_size;
1460 				rxm->pkt_len = (uint32_t)(len[extra_idx]);
1461 				rxm->data_len = (uint16_t)(len[extra_idx]);
1462 				prev->next = rxm;
1463 				prev = rxm;
1464 				rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1465 				rx_pkts[nb_rx]->data_len += len[extra_idx];
1466 				extra_idx += 1;
1467 			};
1468 			seg_res -= rcv_cnt;
1469 
1470 			if (!seg_res) {
1471 				virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1472 				nb_rx++;
1473 			}
1474 		} else {
1475 			PMD_RX_LOG(ERR,
1476 					"No enough segments for packet.");
1477 			virtio_discard_rxbuf_inorder(vq, prev);
1478 			rxvq->stats.errors++;
1479 			break;
1480 		}
1481 	}
1482 
1483 	rxvq->stats.packets += nb_rx;
1484 
1485 	/* Allocate new mbuf for the used descriptor */
1486 
1487 	if (likely(!virtqueue_full(vq))) {
1488 		/* free_cnt may include mrg descs */
1489 		uint16_t free_cnt = vq->vq_free_cnt;
1490 		struct rte_mbuf *new_pkts[free_cnt];
1491 
1492 		if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1493 			error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1494 					free_cnt);
1495 			if (unlikely(error)) {
1496 				for (i = 0; i < free_cnt; i++)
1497 					rte_pktmbuf_free(new_pkts[i]);
1498 			}
1499 			nb_enqueued += free_cnt;
1500 		} else {
1501 			struct rte_eth_dev *dev =
1502 				&rte_eth_devices[rxvq->port_id];
1503 			dev->data->rx_mbuf_alloc_failed += free_cnt;
1504 		}
1505 	}
1506 
1507 	if (likely(nb_enqueued)) {
1508 		vq_update_avail_idx(vq);
1509 
1510 		if (unlikely(virtqueue_kick_prepare(vq))) {
1511 			virtqueue_notify(vq);
1512 			PMD_RX_LOG(DEBUG, "Notified");
1513 		}
1514 	}
1515 
1516 	return nb_rx;
1517 }
1518 
1519 uint16_t
1520 virtio_recv_mergeable_pkts(void *rx_queue,
1521 			struct rte_mbuf **rx_pkts,
1522 			uint16_t nb_pkts)
1523 {
1524 	struct virtnet_rx *rxvq = rx_queue;
1525 	struct virtqueue *vq = rxvq->vq;
1526 	struct virtio_hw *hw = vq->hw;
1527 	struct rte_mbuf *rxm, *new_mbuf;
1528 	uint16_t nb_used, num, nb_rx;
1529 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1530 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1531 	struct rte_mbuf *prev;
1532 	int error;
1533 	uint32_t i, nb_enqueued;
1534 	uint32_t seg_num;
1535 	uint16_t extra_idx;
1536 	uint32_t seg_res;
1537 	uint32_t hdr_size;
1538 
1539 	nb_rx = 0;
1540 	if (unlikely(hw->started == 0))
1541 		return nb_rx;
1542 
1543 	nb_used = VIRTQUEUE_NUSED(vq);
1544 
1545 	virtio_rmb();
1546 
1547 	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1548 
1549 	i = 0;
1550 	nb_enqueued = 0;
1551 	seg_num = 0;
1552 	extra_idx = 0;
1553 	seg_res = 0;
1554 	hdr_size = hw->vtnet_hdr_size;
1555 
1556 	while (i < nb_used) {
1557 		struct virtio_net_hdr_mrg_rxbuf *header;
1558 
1559 		if (nb_rx == nb_pkts)
1560 			break;
1561 
1562 		num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1);
1563 		if (num != 1)
1564 			continue;
1565 
1566 		i++;
1567 
1568 		PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1569 		PMD_RX_LOG(DEBUG, "packet len:%d", len[0]);
1570 
1571 		rxm = rcv_pkts[0];
1572 
1573 		if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
1574 			PMD_RX_LOG(ERR, "Packet drop");
1575 			nb_enqueued++;
1576 			virtio_discard_rxbuf(vq, rxm);
1577 			rxvq->stats.errors++;
1578 			continue;
1579 		}
1580 
1581 		header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
1582 			RTE_PKTMBUF_HEADROOM - hdr_size);
1583 		seg_num = header->num_buffers;
1584 
1585 		if (seg_num == 0)
1586 			seg_num = 1;
1587 
1588 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1589 		rxm->nb_segs = seg_num;
1590 		rxm->ol_flags = 0;
1591 		rxm->vlan_tci = 0;
1592 		rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
1593 		rxm->data_len = (uint16_t)(len[0] - hdr_size);
1594 
1595 		rxm->port = rxvq->port_id;
1596 		rx_pkts[nb_rx] = rxm;
1597 		prev = rxm;
1598 
1599 		if (hw->has_rx_offload &&
1600 				virtio_rx_offload(rxm, &header->hdr) < 0) {
1601 			virtio_discard_rxbuf(vq, rxm);
1602 			rxvq->stats.errors++;
1603 			continue;
1604 		}
1605 
1606 		seg_res = seg_num - 1;
1607 
1608 		while (seg_res != 0) {
1609 			/*
1610 			 * Get extra segments for current uncompleted packet.
1611 			 */
1612 			uint16_t  rcv_cnt =
1613 				RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
1614 			if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1615 				uint32_t rx_num =
1616 					virtqueue_dequeue_burst_rx(vq,
1617 					rcv_pkts, len, rcv_cnt);
1618 				i += rx_num;
1619 				rcv_cnt = rx_num;
1620 			} else {
1621 				PMD_RX_LOG(ERR,
1622 					   "No enough segments for packet.");
1623 				nb_enqueued++;
1624 				virtio_discard_rxbuf(vq, rxm);
1625 				rxvq->stats.errors++;
1626 				break;
1627 			}
1628 
1629 			extra_idx = 0;
1630 
1631 			while (extra_idx < rcv_cnt) {
1632 				rxm = rcv_pkts[extra_idx];
1633 
1634 				rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1635 				rxm->pkt_len = (uint32_t)(len[extra_idx]);
1636 				rxm->data_len = (uint16_t)(len[extra_idx]);
1637 
1638 				if (prev)
1639 					prev->next = rxm;
1640 
1641 				prev = rxm;
1642 				rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
1643 				extra_idx++;
1644 			};
1645 			seg_res -= rcv_cnt;
1646 		}
1647 
1648 		if (hw->vlan_strip)
1649 			rte_vlan_strip(rx_pkts[nb_rx]);
1650 
1651 		VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
1652 			rx_pkts[nb_rx]->data_len);
1653 
1654 		virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]);
1655 		nb_rx++;
1656 	}
1657 
1658 	rxvq->stats.packets += nb_rx;
1659 
1660 	/* Allocate new mbuf for the used descriptor */
1661 	while (likely(!virtqueue_full(vq))) {
1662 		new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1663 		if (unlikely(new_mbuf == NULL)) {
1664 			struct rte_eth_dev *dev
1665 				= &rte_eth_devices[rxvq->port_id];
1666 			dev->data->rx_mbuf_alloc_failed++;
1667 			break;
1668 		}
1669 		error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
1670 		if (unlikely(error)) {
1671 			rte_pktmbuf_free(new_mbuf);
1672 			break;
1673 		}
1674 		nb_enqueued++;
1675 	}
1676 
1677 	if (likely(nb_enqueued)) {
1678 		vq_update_avail_idx(vq);
1679 
1680 		if (unlikely(virtqueue_kick_prepare(vq))) {
1681 			virtqueue_notify(vq);
1682 			PMD_RX_LOG(DEBUG, "Notified");
1683 		}
1684 	}
1685 
1686 	return nb_rx;
1687 }
1688 
1689 uint16_t
1690 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1691 			struct rte_mbuf **rx_pkts,
1692 			uint16_t nb_pkts)
1693 {
1694 	struct virtnet_rx *rxvq = rx_queue;
1695 	struct virtqueue *vq = rxvq->vq;
1696 	struct virtio_hw *hw = vq->hw;
1697 	struct rte_mbuf *rxm;
1698 	struct rte_mbuf *prev = NULL;
1699 	uint16_t num, nb_rx = 0;
1700 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1701 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1702 	uint32_t nb_enqueued = 0;
1703 	uint32_t seg_num = 0;
1704 	uint32_t seg_res = 0;
1705 	uint32_t hdr_size = hw->vtnet_hdr_size;
1706 	int32_t i;
1707 	int error;
1708 
1709 	if (unlikely(hw->started == 0))
1710 		return nb_rx;
1711 
1712 
1713 	num = nb_pkts;
1714 	if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1715 		num = VIRTIO_MBUF_BURST_SZ;
1716 	if (likely(num > DESC_PER_CACHELINE))
1717 		num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1718 
1719 	num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1720 
1721 	for (i = 0; i < num; i++) {
1722 		struct virtio_net_hdr_mrg_rxbuf *header;
1723 
1724 		PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1725 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1726 
1727 		rxm = rcv_pkts[i];
1728 
1729 		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1730 			PMD_RX_LOG(ERR, "Packet drop");
1731 			nb_enqueued++;
1732 			virtio_discard_rxbuf(vq, rxm);
1733 			rxvq->stats.errors++;
1734 			continue;
1735 		}
1736 
1737 		header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1738 			  rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1739 		seg_num = header->num_buffers;
1740 
1741 		if (seg_num == 0)
1742 			seg_num = 1;
1743 
1744 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1745 		rxm->nb_segs = seg_num;
1746 		rxm->ol_flags = 0;
1747 		rxm->vlan_tci = 0;
1748 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1749 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
1750 
1751 		rxm->port = rxvq->port_id;
1752 		rx_pkts[nb_rx] = rxm;
1753 		prev = rxm;
1754 
1755 		if (hw->has_rx_offload &&
1756 				virtio_rx_offload(rxm, &header->hdr) < 0) {
1757 			virtio_discard_rxbuf(vq, rxm);
1758 			rxvq->stats.errors++;
1759 			continue;
1760 		}
1761 
1762 		if (hw->vlan_strip)
1763 			rte_vlan_strip(rx_pkts[nb_rx]);
1764 
1765 		seg_res = seg_num - 1;
1766 
1767 		/* Merge remaining segments */
1768 		while (seg_res != 0 && i < (num - 1)) {
1769 			i++;
1770 
1771 			rxm = rcv_pkts[i];
1772 			rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1773 			rxm->pkt_len = (uint32_t)(len[i]);
1774 			rxm->data_len = (uint16_t)(len[i]);
1775 
1776 			rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1777 			rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1778 
1779 			if (prev)
1780 				prev->next = rxm;
1781 
1782 			prev = rxm;
1783 			seg_res -= 1;
1784 		}
1785 
1786 		if (!seg_res) {
1787 			virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1788 			nb_rx++;
1789 		}
1790 	}
1791 
1792 	/* Last packet still need merge segments */
1793 	while (seg_res != 0) {
1794 		uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1795 					VIRTIO_MBUF_BURST_SZ);
1796 		if (likely(vq->vq_free_cnt >= rcv_cnt)) {
1797 			num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1798 					len, rcv_cnt);
1799 			uint16_t extra_idx = 0;
1800 
1801 			rcv_cnt = num;
1802 
1803 			while (extra_idx < rcv_cnt) {
1804 				rxm = rcv_pkts[extra_idx];
1805 
1806 				rxm->data_off =
1807 					RTE_PKTMBUF_HEADROOM - hdr_size;
1808 				rxm->pkt_len = (uint32_t)(len[extra_idx]);
1809 				rxm->data_len = (uint16_t)(len[extra_idx]);
1810 
1811 				prev->next = rxm;
1812 				prev = rxm;
1813 				rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1814 				rx_pkts[nb_rx]->data_len += len[extra_idx];
1815 				extra_idx += 1;
1816 			}
1817 			seg_res -= rcv_cnt;
1818 			if (!seg_res) {
1819 				virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1820 				nb_rx++;
1821 			}
1822 		} else {
1823 			PMD_RX_LOG(ERR,
1824 					"No enough segments for packet.");
1825 			if (prev)
1826 				virtio_discard_rxbuf(vq, prev);
1827 			rxvq->stats.errors++;
1828 			break;
1829 		}
1830 	}
1831 
1832 	rxvq->stats.packets += nb_rx;
1833 
1834 	/* Allocate new mbuf for the used descriptor */
1835 	if (likely(!virtqueue_full(vq))) {
1836 		/* free_cnt may include mrg descs */
1837 		uint16_t free_cnt = vq->vq_free_cnt;
1838 		struct rte_mbuf *new_pkts[free_cnt];
1839 
1840 		if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1841 			error = virtqueue_enqueue_recv_refill_packed(vq,
1842 					new_pkts, free_cnt);
1843 			if (unlikely(error)) {
1844 				for (i = 0; i < free_cnt; i++)
1845 					rte_pktmbuf_free(new_pkts[i]);
1846 			}
1847 			nb_enqueued += free_cnt;
1848 		} else {
1849 			struct rte_eth_dev *dev =
1850 				&rte_eth_devices[rxvq->port_id];
1851 			dev->data->rx_mbuf_alloc_failed += free_cnt;
1852 		}
1853 	}
1854 
1855 	if (likely(nb_enqueued)) {
1856 		if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1857 			virtqueue_notify(vq);
1858 			PMD_RX_LOG(DEBUG, "Notified");
1859 		}
1860 	}
1861 
1862 	return nb_rx;
1863 }
1864 
1865 uint16_t
1866 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
1867 			uint16_t nb_pkts)
1868 {
1869 	struct virtnet_tx *txvq = tx_queue;
1870 	struct virtqueue *vq = txvq->vq;
1871 	struct virtio_hw *hw = vq->hw;
1872 	uint16_t hdr_size = hw->vtnet_hdr_size;
1873 	uint16_t nb_tx = 0;
1874 	int error;
1875 
1876 	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1877 		return nb_tx;
1878 
1879 	if (unlikely(nb_pkts < 1))
1880 		return nb_pkts;
1881 
1882 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1883 
1884 	if (nb_pkts > vq->vq_free_cnt)
1885 		virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt);
1886 
1887 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1888 		struct rte_mbuf *txm = tx_pkts[nb_tx];
1889 		int can_push = 0, slots, need;
1890 
1891 		/* Do VLAN tag insertion */
1892 		if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1893 			error = rte_vlan_insert(&txm);
1894 			if (unlikely(error)) {
1895 				rte_pktmbuf_free(txm);
1896 				continue;
1897 			}
1898 		}
1899 
1900 		/* optimize ring usage */
1901 		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1902 		      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1903 		    rte_mbuf_refcnt_read(txm) == 1 &&
1904 		    RTE_MBUF_DIRECT(txm) &&
1905 		    txm->nb_segs == 1 &&
1906 		    rte_pktmbuf_headroom(txm) >= hdr_size &&
1907 		    rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1908 			   __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1909 			can_push = 1;
1910 
1911 		/* How many main ring entries are needed to this Tx?
1912 		 * any_layout => number of segments
1913 		 * default    => number of segments + 1
1914 		 */
1915 		slots = txm->nb_segs + !can_push;
1916 		need = slots - vq->vq_free_cnt;
1917 
1918 		/* Positive value indicates it need free vring descriptors */
1919 		if (unlikely(need > 0)) {
1920 			virtio_rmb();
1921 			need = RTE_MIN(need, (int)nb_pkts);
1922 			virtio_xmit_cleanup_packed(vq, need);
1923 			need = slots - vq->vq_free_cnt;
1924 			if (unlikely(need > 0)) {
1925 				PMD_TX_LOG(ERR,
1926 					   "No free tx descriptors to transmit");
1927 				break;
1928 			}
1929 		}
1930 
1931 		/* Enqueue Packet buffers */
1932 		virtqueue_enqueue_xmit_packed(txvq, txm, slots, can_push);
1933 
1934 		txvq->stats.bytes += txm->pkt_len;
1935 		virtio_update_packet_stats(&txvq->stats, txm);
1936 	}
1937 
1938 	txvq->stats.packets += nb_tx;
1939 
1940 	if (likely(nb_tx)) {
1941 		if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1942 			virtqueue_notify(vq);
1943 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1944 		}
1945 	}
1946 
1947 	return nb_tx;
1948 }
1949 
1950 uint16_t
1951 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1952 {
1953 	struct virtnet_tx *txvq = tx_queue;
1954 	struct virtqueue *vq = txvq->vq;
1955 	struct virtio_hw *hw = vq->hw;
1956 	uint16_t hdr_size = hw->vtnet_hdr_size;
1957 	uint16_t nb_used, nb_tx = 0;
1958 	int error;
1959 
1960 	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1961 		return nb_tx;
1962 
1963 	if (unlikely(nb_pkts < 1))
1964 		return nb_pkts;
1965 
1966 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1967 	nb_used = VIRTQUEUE_NUSED(vq);
1968 
1969 	virtio_rmb();
1970 	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1971 		virtio_xmit_cleanup(vq, nb_used);
1972 
1973 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1974 		struct rte_mbuf *txm = tx_pkts[nb_tx];
1975 		int can_push = 0, use_indirect = 0, slots, need;
1976 
1977 		/* Do VLAN tag insertion */
1978 		if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1979 			error = rte_vlan_insert(&txm);
1980 			if (unlikely(error)) {
1981 				rte_pktmbuf_free(txm);
1982 				continue;
1983 			}
1984 		}
1985 
1986 		/* optimize ring usage */
1987 		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1988 		      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1989 		    rte_mbuf_refcnt_read(txm) == 1 &&
1990 		    RTE_MBUF_DIRECT(txm) &&
1991 		    txm->nb_segs == 1 &&
1992 		    rte_pktmbuf_headroom(txm) >= hdr_size &&
1993 		    rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1994 				   __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1995 			can_push = 1;
1996 		else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
1997 			 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
1998 			use_indirect = 1;
1999 
2000 		/* How many main ring entries are needed to this Tx?
2001 		 * any_layout => number of segments
2002 		 * indirect   => 1
2003 		 * default    => number of segments + 1
2004 		 */
2005 		slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2006 		need = slots - vq->vq_free_cnt;
2007 
2008 		/* Positive value indicates it need free vring descriptors */
2009 		if (unlikely(need > 0)) {
2010 			nb_used = VIRTQUEUE_NUSED(vq);
2011 			virtio_rmb();
2012 			need = RTE_MIN(need, (int)nb_used);
2013 
2014 			virtio_xmit_cleanup(vq, need);
2015 			need = slots - vq->vq_free_cnt;
2016 			if (unlikely(need > 0)) {
2017 				PMD_TX_LOG(ERR,
2018 					   "No free tx descriptors to transmit");
2019 				break;
2020 			}
2021 		}
2022 
2023 		/* Enqueue Packet buffers */
2024 		virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2025 			can_push, 0);
2026 
2027 		virtio_update_packet_stats(&txvq->stats, txm);
2028 	}
2029 
2030 	txvq->stats.packets += nb_tx;
2031 
2032 	if (likely(nb_tx)) {
2033 		vq_update_avail_idx(vq);
2034 
2035 		if (unlikely(virtqueue_kick_prepare(vq))) {
2036 			virtqueue_notify(vq);
2037 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2038 		}
2039 	}
2040 
2041 	return nb_tx;
2042 }
2043 
2044 uint16_t
2045 virtio_xmit_pkts_inorder(void *tx_queue,
2046 			struct rte_mbuf **tx_pkts,
2047 			uint16_t nb_pkts)
2048 {
2049 	struct virtnet_tx *txvq = tx_queue;
2050 	struct virtqueue *vq = txvq->vq;
2051 	struct virtio_hw *hw = vq->hw;
2052 	uint16_t hdr_size = hw->vtnet_hdr_size;
2053 	uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
2054 	struct rte_mbuf *inorder_pkts[nb_pkts];
2055 	int error;
2056 
2057 	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2058 		return nb_tx;
2059 
2060 	if (unlikely(nb_pkts < 1))
2061 		return nb_pkts;
2062 
2063 	VIRTQUEUE_DUMP(vq);
2064 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2065 	nb_used = VIRTQUEUE_NUSED(vq);
2066 
2067 	virtio_rmb();
2068 	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2069 		virtio_xmit_cleanup_inorder(vq, nb_used);
2070 
2071 	if (unlikely(!vq->vq_free_cnt))
2072 		virtio_xmit_cleanup_inorder(vq, nb_used);
2073 
2074 	nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
2075 
2076 	for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
2077 		struct rte_mbuf *txm = tx_pkts[nb_tx];
2078 		int slots, need;
2079 
2080 		/* Do VLAN tag insertion */
2081 		if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2082 			error = rte_vlan_insert(&txm);
2083 			if (unlikely(error)) {
2084 				rte_pktmbuf_free(txm);
2085 				continue;
2086 			}
2087 		}
2088 
2089 		/* optimize ring usage */
2090 		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2091 		     vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2092 		     rte_mbuf_refcnt_read(txm) == 1 &&
2093 		     RTE_MBUF_DIRECT(txm) &&
2094 		     txm->nb_segs == 1 &&
2095 		     rte_pktmbuf_headroom(txm) >= hdr_size &&
2096 		     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2097 				__alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2098 			inorder_pkts[nb_inorder_pkts] = txm;
2099 			nb_inorder_pkts++;
2100 
2101 			virtio_update_packet_stats(&txvq->stats, txm);
2102 			continue;
2103 		}
2104 
2105 		if (nb_inorder_pkts) {
2106 			virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2107 							nb_inorder_pkts);
2108 			nb_inorder_pkts = 0;
2109 		}
2110 
2111 		slots = txm->nb_segs + 1;
2112 		need = slots - vq->vq_free_cnt;
2113 		if (unlikely(need > 0)) {
2114 			nb_used = VIRTQUEUE_NUSED(vq);
2115 			virtio_rmb();
2116 			need = RTE_MIN(need, (int)nb_used);
2117 
2118 			virtio_xmit_cleanup_inorder(vq, need);
2119 
2120 			need = slots - vq->vq_free_cnt;
2121 
2122 			if (unlikely(need > 0)) {
2123 				PMD_TX_LOG(ERR,
2124 					"No free tx descriptors to transmit");
2125 				break;
2126 			}
2127 		}
2128 		/* Enqueue Packet buffers */
2129 		virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2130 
2131 		virtio_update_packet_stats(&txvq->stats, txm);
2132 	}
2133 
2134 	/* Transmit all inorder packets */
2135 	if (nb_inorder_pkts)
2136 		virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2137 						nb_inorder_pkts);
2138 
2139 	txvq->stats.packets += nb_tx;
2140 
2141 	if (likely(nb_tx)) {
2142 		vq_update_avail_idx(vq);
2143 
2144 		if (unlikely(virtqueue_kick_prepare(vq))) {
2145 			virtqueue_notify(vq);
2146 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2147 		}
2148 	}
2149 
2150 	VIRTQUEUE_DUMP(vq);
2151 
2152 	return nb_tx;
2153 }
2154