xref: /dpdk/drivers/net/virtio/virtio_rxtx.c (revision efcda13648d25ed21bc9de2f19082fe8eb5ab8ff)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10 
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27 
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35 
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41 
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45 	struct virtnet_rx *rxvq = rxq;
46 	struct virtqueue *vq = rxvq->vq;
47 
48 	return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50 
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54 	vq->vq_free_cnt += num;
55 	vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57 
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61 	struct vring_desc *dp, *dp_tail;
62 	struct vq_desc_extra *dxp;
63 	uint16_t desc_idx_last = desc_idx;
64 
65 	dp  = &vq->vq_ring.desc[desc_idx];
66 	dxp = &vq->vq_descx[desc_idx];
67 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68 	if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69 		while (dp->flags & VRING_DESC_F_NEXT) {
70 			desc_idx_last = dp->next;
71 			dp = &vq->vq_ring.desc[dp->next];
72 		}
73 	}
74 	dxp->ndescs = 0;
75 
76 	/*
77 	 * We must append the existing free chain, if any, to the end of
78 	 * newly freed chain. If the virtqueue was completely used, then
79 	 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80 	 */
81 	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82 		vq->vq_desc_head_idx = desc_idx;
83 	} else {
84 		dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
85 		dp_tail->next = desc_idx;
86 	}
87 
88 	vq->vq_desc_tail_idx = desc_idx_last;
89 	dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91 
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95 	struct vq_desc_extra *dxp;
96 
97 	dxp = &vq->vq_descx[id];
98 	vq->vq_free_cnt += dxp->ndescs;
99 
100 	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101 		vq->vq_desc_head_idx = id;
102 	else
103 		vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104 
105 	vq->vq_desc_tail_idx = id;
106 	dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108 
109 static uint16_t
110 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
111 				  struct rte_mbuf **rx_pkts,
112 				  uint32_t *len,
113 				  uint16_t num)
114 {
115 	struct rte_mbuf *cookie;
116 	uint16_t used_idx;
117 	uint16_t id;
118 	struct vring_packed_desc *desc;
119 	uint16_t i;
120 
121 	desc = vq->ring_packed.desc_packed;
122 
123 	for (i = 0; i < num; i++) {
124 		used_idx = vq->vq_used_cons_idx;
125 		if (!desc_is_used(&desc[used_idx], vq))
126 			return i;
127 		len[i] = desc[used_idx].len;
128 		id = desc[used_idx].id;
129 		cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
130 		if (unlikely(cookie == NULL)) {
131 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
132 				vq->vq_used_cons_idx);
133 			break;
134 		}
135 		rte_prefetch0(cookie);
136 		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
137 		rx_pkts[i] = cookie;
138 
139 		vq->vq_free_cnt++;
140 		vq->vq_used_cons_idx++;
141 		if (vq->vq_used_cons_idx >= vq->vq_nentries) {
142 			vq->vq_used_cons_idx -= vq->vq_nentries;
143 			vq->used_wrap_counter ^= 1;
144 		}
145 	}
146 
147 	return i;
148 }
149 
150 static uint16_t
151 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
152 			   uint32_t *len, uint16_t num)
153 {
154 	struct vring_used_elem *uep;
155 	struct rte_mbuf *cookie;
156 	uint16_t used_idx, desc_idx;
157 	uint16_t i;
158 
159 	/*  Caller does the check */
160 	for (i = 0; i < num ; i++) {
161 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
162 		uep = &vq->vq_ring.used->ring[used_idx];
163 		desc_idx = (uint16_t) uep->id;
164 		len[i] = uep->len;
165 		cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
166 
167 		if (unlikely(cookie == NULL)) {
168 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
169 				vq->vq_used_cons_idx);
170 			break;
171 		}
172 
173 		rte_prefetch0(cookie);
174 		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
175 		rx_pkts[i]  = cookie;
176 		vq->vq_used_cons_idx++;
177 		vq_ring_free_chain(vq, desc_idx);
178 		vq->vq_descx[desc_idx].cookie = NULL;
179 	}
180 
181 	return i;
182 }
183 
184 static uint16_t
185 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
186 			struct rte_mbuf **rx_pkts,
187 			uint32_t *len,
188 			uint16_t num)
189 {
190 	struct vring_used_elem *uep;
191 	struct rte_mbuf *cookie;
192 	uint16_t used_idx = 0;
193 	uint16_t i;
194 
195 	if (unlikely(num == 0))
196 		return 0;
197 
198 	for (i = 0; i < num; i++) {
199 		used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
200 		/* Desc idx same as used idx */
201 		uep = &vq->vq_ring.used->ring[used_idx];
202 		len[i] = uep->len;
203 		cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
204 
205 		if (unlikely(cookie == NULL)) {
206 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
207 				vq->vq_used_cons_idx);
208 			break;
209 		}
210 
211 		rte_prefetch0(cookie);
212 		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
213 		rx_pkts[i]  = cookie;
214 		vq->vq_used_cons_idx++;
215 		vq->vq_descx[used_idx].cookie = NULL;
216 	}
217 
218 	vq_ring_free_inorder(vq, used_idx, i);
219 	return i;
220 }
221 
222 #ifndef DEFAULT_TX_FREE_THRESH
223 #define DEFAULT_TX_FREE_THRESH 32
224 #endif
225 
226 /* Cleanup from completed transmits. */
227 static void
228 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num)
229 {
230 	uint16_t used_idx, id;
231 	uint16_t size = vq->vq_nentries;
232 	struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
233 	struct vq_desc_extra *dxp;
234 
235 	used_idx = vq->vq_used_cons_idx;
236 	while (num-- && desc_is_used(&desc[used_idx], vq)) {
237 		used_idx = vq->vq_used_cons_idx;
238 		id = desc[used_idx].id;
239 		dxp = &vq->vq_descx[id];
240 		vq->vq_used_cons_idx += dxp->ndescs;
241 		if (vq->vq_used_cons_idx >= size) {
242 			vq->vq_used_cons_idx -= size;
243 			vq->used_wrap_counter ^= 1;
244 		}
245 		vq_ring_free_id_packed(vq, id);
246 		if (dxp->cookie != NULL) {
247 			rte_pktmbuf_free(dxp->cookie);
248 			dxp->cookie = NULL;
249 		}
250 		used_idx = vq->vq_used_cons_idx;
251 	}
252 }
253 
254 static void
255 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
256 {
257 	uint16_t i, used_idx, desc_idx;
258 	for (i = 0; i < num; i++) {
259 		struct vring_used_elem *uep;
260 		struct vq_desc_extra *dxp;
261 
262 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
263 		uep = &vq->vq_ring.used->ring[used_idx];
264 
265 		desc_idx = (uint16_t) uep->id;
266 		dxp = &vq->vq_descx[desc_idx];
267 		vq->vq_used_cons_idx++;
268 		vq_ring_free_chain(vq, desc_idx);
269 
270 		if (dxp->cookie != NULL) {
271 			rte_pktmbuf_free(dxp->cookie);
272 			dxp->cookie = NULL;
273 		}
274 	}
275 }
276 
277 /* Cleanup from completed inorder transmits. */
278 static void
279 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
280 {
281 	uint16_t i, used_idx, desc_idx = 0, last_idx;
282 	int16_t free_cnt = 0;
283 	struct vq_desc_extra *dxp = NULL;
284 
285 	if (unlikely(num == 0))
286 		return;
287 
288 	for (i = 0; i < num; i++) {
289 		struct vring_used_elem *uep;
290 
291 		used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
292 		uep = &vq->vq_ring.used->ring[used_idx];
293 		desc_idx = (uint16_t)uep->id;
294 
295 		dxp = &vq->vq_descx[desc_idx];
296 		vq->vq_used_cons_idx++;
297 
298 		if (dxp->cookie != NULL) {
299 			rte_pktmbuf_free(dxp->cookie);
300 			dxp->cookie = NULL;
301 		}
302 	}
303 
304 	last_idx = desc_idx + dxp->ndescs - 1;
305 	free_cnt = last_idx - vq->vq_desc_tail_idx;
306 	if (free_cnt <= 0)
307 		free_cnt += vq->vq_nentries;
308 
309 	vq_ring_free_inorder(vq, last_idx, free_cnt);
310 }
311 
312 static inline int
313 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
314 			struct rte_mbuf **cookies,
315 			uint16_t num)
316 {
317 	struct vq_desc_extra *dxp;
318 	struct virtio_hw *hw = vq->hw;
319 	struct vring_desc *start_dp;
320 	uint16_t head_idx, idx, i = 0;
321 
322 	if (unlikely(vq->vq_free_cnt == 0))
323 		return -ENOSPC;
324 	if (unlikely(vq->vq_free_cnt < num))
325 		return -EMSGSIZE;
326 
327 	head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
328 	start_dp = vq->vq_ring.desc;
329 
330 	while (i < num) {
331 		idx = head_idx & (vq->vq_nentries - 1);
332 		dxp = &vq->vq_descx[idx];
333 		dxp->cookie = (void *)cookies[i];
334 		dxp->ndescs = 1;
335 
336 		start_dp[idx].addr =
337 				VIRTIO_MBUF_ADDR(cookies[i], vq) +
338 				RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
339 		start_dp[idx].len =
340 				cookies[i]->buf_len -
341 				RTE_PKTMBUF_HEADROOM +
342 				hw->vtnet_hdr_size;
343 		start_dp[idx].flags =  VRING_DESC_F_WRITE;
344 
345 		vq_update_avail_ring(vq, idx);
346 		head_idx++;
347 		i++;
348 	}
349 
350 	vq->vq_desc_head_idx += num;
351 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
352 	return 0;
353 }
354 
355 static inline int
356 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
357 {
358 	struct vq_desc_extra *dxp;
359 	struct virtio_hw *hw = vq->hw;
360 	struct vring_desc *start_dp;
361 	uint16_t needed = 1;
362 	uint16_t head_idx, idx;
363 
364 	if (unlikely(vq->vq_free_cnt == 0))
365 		return -ENOSPC;
366 	if (unlikely(vq->vq_free_cnt < needed))
367 		return -EMSGSIZE;
368 
369 	head_idx = vq->vq_desc_head_idx;
370 	if (unlikely(head_idx >= vq->vq_nentries))
371 		return -EFAULT;
372 
373 	idx = head_idx;
374 	dxp = &vq->vq_descx[idx];
375 	dxp->cookie = (void *)cookie;
376 	dxp->ndescs = needed;
377 
378 	start_dp = vq->vq_ring.desc;
379 	start_dp[idx].addr =
380 		VIRTIO_MBUF_ADDR(cookie, vq) +
381 		RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
382 	start_dp[idx].len =
383 		cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
384 	start_dp[idx].flags =  VRING_DESC_F_WRITE;
385 	idx = start_dp[idx].next;
386 	vq->vq_desc_head_idx = idx;
387 	if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
388 		vq->vq_desc_tail_idx = idx;
389 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
390 	vq_update_avail_ring(vq, head_idx);
391 
392 	return 0;
393 }
394 
395 static inline int
396 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
397 				     struct rte_mbuf **cookie, uint16_t num)
398 {
399 	struct vring_packed_desc *start_dp = vq->ring_packed.desc_packed;
400 	uint16_t flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
401 	struct virtio_hw *hw = vq->hw;
402 	struct vq_desc_extra *dxp;
403 	uint16_t idx;
404 	int i;
405 
406 	if (unlikely(vq->vq_free_cnt == 0))
407 		return -ENOSPC;
408 	if (unlikely(vq->vq_free_cnt < num))
409 		return -EMSGSIZE;
410 
411 	for (i = 0; i < num; i++) {
412 		idx = vq->vq_avail_idx;
413 		dxp = &vq->vq_descx[idx];
414 		dxp->cookie = (void *)cookie[i];
415 		dxp->ndescs = 1;
416 
417 		start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
418 				RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
419 		start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
420 					+ hw->vtnet_hdr_size;
421 
422 		vq->vq_desc_head_idx = dxp->next;
423 		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
424 			vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
425 		rte_smp_wmb();
426 		start_dp[idx].flags = flags;
427 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
428 			vq->vq_avail_idx -= vq->vq_nentries;
429 			vq->avail_wrap_counter ^= 1;
430 			vq->avail_used_flags =
431 				VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
432 				VRING_DESC_F_USED(!vq->avail_wrap_counter);
433 			flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
434 		}
435 	}
436 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
437 	return 0;
438 }
439 
440 /* When doing TSO, the IP length is not included in the pseudo header
441  * checksum of the packet given to the PMD, but for virtio it is
442  * expected.
443  */
444 static void
445 virtio_tso_fix_cksum(struct rte_mbuf *m)
446 {
447 	/* common case: header is not fragmented */
448 	if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
449 			m->l4_len)) {
450 		struct ipv4_hdr *iph;
451 		struct ipv6_hdr *ip6h;
452 		struct tcp_hdr *th;
453 		uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
454 		uint32_t tmp;
455 
456 		iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
457 		th = RTE_PTR_ADD(iph, m->l3_len);
458 		if ((iph->version_ihl >> 4) == 4) {
459 			iph->hdr_checksum = 0;
460 			iph->hdr_checksum = rte_ipv4_cksum(iph);
461 			ip_len = iph->total_length;
462 			ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
463 				m->l3_len);
464 		} else {
465 			ip6h = (struct ipv6_hdr *)iph;
466 			ip_paylen = ip6h->payload_len;
467 		}
468 
469 		/* calculate the new phdr checksum not including ip_paylen */
470 		prev_cksum = th->cksum;
471 		tmp = prev_cksum;
472 		tmp += ip_paylen;
473 		tmp = (tmp & 0xffff) + (tmp >> 16);
474 		new_cksum = tmp;
475 
476 		/* replace it in the packet */
477 		th->cksum = new_cksum;
478 	}
479 }
480 
481 
482 /* avoid write operation when necessary, to lessen cache issues */
483 #define ASSIGN_UNLESS_EQUAL(var, val) do {	\
484 	if ((var) != (val))			\
485 		(var) = (val);			\
486 } while (0)
487 
488 static inline void
489 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
490 			struct rte_mbuf *cookie,
491 			bool offload)
492 {
493 	if (offload) {
494 		if (cookie->ol_flags & PKT_TX_TCP_SEG)
495 			cookie->ol_flags |= PKT_TX_TCP_CKSUM;
496 
497 		switch (cookie->ol_flags & PKT_TX_L4_MASK) {
498 		case PKT_TX_UDP_CKSUM:
499 			hdr->csum_start = cookie->l2_len + cookie->l3_len;
500 			hdr->csum_offset = offsetof(struct udp_hdr,
501 				dgram_cksum);
502 			hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
503 			break;
504 
505 		case PKT_TX_TCP_CKSUM:
506 			hdr->csum_start = cookie->l2_len + cookie->l3_len;
507 			hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
508 			hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
509 			break;
510 
511 		default:
512 			ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
513 			ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
514 			ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
515 			break;
516 		}
517 
518 		/* TCP Segmentation Offload */
519 		if (cookie->ol_flags & PKT_TX_TCP_SEG) {
520 			virtio_tso_fix_cksum(cookie);
521 			hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
522 				VIRTIO_NET_HDR_GSO_TCPV6 :
523 				VIRTIO_NET_HDR_GSO_TCPV4;
524 			hdr->gso_size = cookie->tso_segsz;
525 			hdr->hdr_len =
526 				cookie->l2_len +
527 				cookie->l3_len +
528 				cookie->l4_len;
529 		} else {
530 			ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
531 			ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
532 			ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
533 		}
534 	}
535 }
536 
537 static inline void
538 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
539 			struct rte_mbuf **cookies,
540 			uint16_t num)
541 {
542 	struct vq_desc_extra *dxp;
543 	struct virtqueue *vq = txvq->vq;
544 	struct vring_desc *start_dp;
545 	struct virtio_net_hdr *hdr;
546 	uint16_t idx;
547 	uint16_t head_size = vq->hw->vtnet_hdr_size;
548 	uint16_t i = 0;
549 
550 	idx = vq->vq_desc_head_idx;
551 	start_dp = vq->vq_ring.desc;
552 
553 	while (i < num) {
554 		idx = idx & (vq->vq_nentries - 1);
555 		dxp = &vq->vq_descx[idx];
556 		dxp->cookie = (void *)cookies[i];
557 		dxp->ndescs = 1;
558 
559 		hdr = (struct virtio_net_hdr *)
560 			rte_pktmbuf_prepend(cookies[i], head_size);
561 		cookies[i]->pkt_len -= head_size;
562 
563 		/* if offload disabled, it is not zeroed below, do it now */
564 		if (!vq->hw->has_tx_offload) {
565 			ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
566 			ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
567 			ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
568 			ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
569 			ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
570 			ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
571 		}
572 
573 		virtqueue_xmit_offload(hdr, cookies[i],
574 				vq->hw->has_tx_offload);
575 
576 		start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
577 		start_dp[idx].len   = cookies[i]->data_len;
578 		start_dp[idx].flags = 0;
579 
580 		vq_update_avail_ring(vq, idx);
581 
582 		idx++;
583 		i++;
584 	};
585 
586 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
587 	vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
588 }
589 
590 static inline void
591 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
592 			      uint16_t needed, int can_push)
593 {
594 	struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
595 	struct vq_desc_extra *dxp;
596 	struct virtqueue *vq = txvq->vq;
597 	struct vring_packed_desc *start_dp, *head_dp;
598 	uint16_t idx, id, head_idx, head_flags;
599 	uint16_t head_size = vq->hw->vtnet_hdr_size;
600 	struct virtio_net_hdr *hdr;
601 	uint16_t prev;
602 
603 	id = vq->vq_desc_head_idx;
604 
605 	dxp = &vq->vq_descx[id];
606 	dxp->ndescs = needed;
607 	dxp->cookie = cookie;
608 
609 	head_idx = vq->vq_avail_idx;
610 	idx = head_idx;
611 	prev = head_idx;
612 	start_dp = vq->ring_packed.desc_packed;
613 
614 	head_dp = &vq->ring_packed.desc_packed[idx];
615 	head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
616 	head_flags |= vq->avail_used_flags;
617 
618 	if (can_push) {
619 		/* prepend cannot fail, checked by caller */
620 		hdr = (struct virtio_net_hdr *)
621 			rte_pktmbuf_prepend(cookie, head_size);
622 		/* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
623 		 * which is wrong. Below subtract restores correct pkt size.
624 		 */
625 		cookie->pkt_len -= head_size;
626 
627 		/* if offload disabled, it is not zeroed below, do it now */
628 		if (!vq->hw->has_tx_offload) {
629 			ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
630 			ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
631 			ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
632 			ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
633 			ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
634 			ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
635 		}
636 	} else {
637 		/* setup first tx ring slot to point to header
638 		 * stored in reserved region.
639 		 */
640 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
641 			RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
642 		start_dp[idx].len   = vq->hw->vtnet_hdr_size;
643 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
644 		idx++;
645 		if (idx >= vq->vq_nentries) {
646 			idx -= vq->vq_nentries;
647 			vq->avail_wrap_counter ^= 1;
648 			vq->avail_used_flags =
649 				VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
650 				VRING_DESC_F_USED(!vq->avail_wrap_counter);
651 		}
652 	}
653 
654 	virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
655 
656 	do {
657 		uint16_t flags;
658 
659 		start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
660 		start_dp[idx].len  = cookie->data_len;
661 		if (likely(idx != head_idx)) {
662 			flags = cookie->next ? VRING_DESC_F_NEXT : 0;
663 			flags |= vq->avail_used_flags;
664 			start_dp[idx].flags = flags;
665 		}
666 		prev = idx;
667 		idx++;
668 		if (idx >= vq->vq_nentries) {
669 			idx -= vq->vq_nentries;
670 			vq->avail_wrap_counter ^= 1;
671 			vq->avail_used_flags =
672 				VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
673 				VRING_DESC_F_USED(!vq->avail_wrap_counter);
674 		}
675 	} while ((cookie = cookie->next) != NULL);
676 
677 	start_dp[prev].id = id;
678 
679 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
680 
681 	vq->vq_desc_head_idx = dxp->next;
682 	if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
683 		vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
684 
685 	vq->vq_avail_idx = idx;
686 
687 	rte_smp_wmb();
688 	head_dp->flags = head_flags;
689 }
690 
691 static inline void
692 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
693 			uint16_t needed, int use_indirect, int can_push,
694 			int in_order)
695 {
696 	struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
697 	struct vq_desc_extra *dxp;
698 	struct virtqueue *vq = txvq->vq;
699 	struct vring_desc *start_dp;
700 	uint16_t seg_num = cookie->nb_segs;
701 	uint16_t head_idx, idx;
702 	uint16_t head_size = vq->hw->vtnet_hdr_size;
703 	struct virtio_net_hdr *hdr;
704 
705 	head_idx = vq->vq_desc_head_idx;
706 	idx = head_idx;
707 	dxp = &vq->vq_descx[idx];
708 	dxp->cookie = (void *)cookie;
709 	dxp->ndescs = needed;
710 
711 	start_dp = vq->vq_ring.desc;
712 
713 	if (can_push) {
714 		/* prepend cannot fail, checked by caller */
715 		hdr = (struct virtio_net_hdr *)
716 			rte_pktmbuf_prepend(cookie, head_size);
717 		/* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
718 		 * which is wrong. Below subtract restores correct pkt size.
719 		 */
720 		cookie->pkt_len -= head_size;
721 
722 		/* if offload disabled, it is not zeroed below, do it now */
723 		if (!vq->hw->has_tx_offload) {
724 			ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
725 			ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
726 			ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
727 			ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
728 			ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
729 			ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
730 		}
731 	} else if (use_indirect) {
732 		/* setup tx ring slot to point to indirect
733 		 * descriptor list stored in reserved region.
734 		 *
735 		 * the first slot in indirect ring is already preset
736 		 * to point to the header in reserved region
737 		 */
738 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
739 			RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
740 		start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
741 		start_dp[idx].flags = VRING_DESC_F_INDIRECT;
742 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
743 
744 		/* loop below will fill in rest of the indirect elements */
745 		start_dp = txr[idx].tx_indir;
746 		idx = 1;
747 	} else {
748 		/* setup first tx ring slot to point to header
749 		 * stored in reserved region.
750 		 */
751 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
752 			RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
753 		start_dp[idx].len   = vq->hw->vtnet_hdr_size;
754 		start_dp[idx].flags = VRING_DESC_F_NEXT;
755 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
756 
757 		idx = start_dp[idx].next;
758 	}
759 
760 	virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
761 
762 	do {
763 		start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
764 		start_dp[idx].len   = cookie->data_len;
765 		start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
766 		idx = start_dp[idx].next;
767 	} while ((cookie = cookie->next) != NULL);
768 
769 	if (use_indirect)
770 		idx = vq->vq_ring.desc[head_idx].next;
771 
772 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
773 
774 	vq->vq_desc_head_idx = idx;
775 	vq_update_avail_ring(vq, head_idx);
776 
777 	if (!in_order) {
778 		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
779 			vq->vq_desc_tail_idx = idx;
780 	}
781 }
782 
783 void
784 virtio_dev_cq_start(struct rte_eth_dev *dev)
785 {
786 	struct virtio_hw *hw = dev->data->dev_private;
787 
788 	if (hw->cvq && hw->cvq->vq) {
789 		rte_spinlock_init(&hw->cvq->lock);
790 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
791 	}
792 }
793 
794 int
795 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
796 			uint16_t queue_idx,
797 			uint16_t nb_desc,
798 			unsigned int socket_id __rte_unused,
799 			const struct rte_eth_rxconf *rx_conf __rte_unused,
800 			struct rte_mempool *mp)
801 {
802 	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
803 	struct virtio_hw *hw = dev->data->dev_private;
804 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
805 	struct virtnet_rx *rxvq;
806 
807 	PMD_INIT_FUNC_TRACE();
808 
809 	if (nb_desc == 0 || nb_desc > vq->vq_nentries)
810 		nb_desc = vq->vq_nentries;
811 	vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
812 
813 	rxvq = &vq->rxq;
814 	rxvq->queue_id = queue_idx;
815 	rxvq->mpool = mp;
816 	if (rxvq->mpool == NULL) {
817 		rte_exit(EXIT_FAILURE,
818 			"Cannot allocate mbufs for rx virtqueue");
819 	}
820 
821 	dev->data->rx_queues[queue_idx] = rxvq;
822 
823 	return 0;
824 }
825 
826 int
827 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
828 {
829 	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
830 	struct virtio_hw *hw = dev->data->dev_private;
831 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
832 	struct virtnet_rx *rxvq = &vq->rxq;
833 	struct rte_mbuf *m;
834 	uint16_t desc_idx;
835 	int error, nbufs, i;
836 
837 	PMD_INIT_FUNC_TRACE();
838 
839 	/* Allocate blank mbufs for the each rx descriptor */
840 	nbufs = 0;
841 
842 	if (hw->use_simple_rx) {
843 		for (desc_idx = 0; desc_idx < vq->vq_nentries;
844 		     desc_idx++) {
845 			vq->vq_ring.avail->ring[desc_idx] = desc_idx;
846 			vq->vq_ring.desc[desc_idx].flags =
847 				VRING_DESC_F_WRITE;
848 		}
849 
850 		virtio_rxq_vec_setup(rxvq);
851 	}
852 
853 	memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
854 	for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
855 	     desc_idx++) {
856 		vq->sw_ring[vq->vq_nentries + desc_idx] =
857 			&rxvq->fake_mbuf;
858 	}
859 
860 	if (hw->use_simple_rx) {
861 		while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
862 			virtio_rxq_rearm_vec(rxvq);
863 			nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
864 		}
865 	} else if (hw->use_inorder_rx) {
866 		if ((!virtqueue_full(vq))) {
867 			uint16_t free_cnt = vq->vq_free_cnt;
868 			struct rte_mbuf *pkts[free_cnt];
869 
870 			if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
871 				free_cnt)) {
872 				error = virtqueue_enqueue_refill_inorder(vq,
873 						pkts,
874 						free_cnt);
875 				if (unlikely(error)) {
876 					for (i = 0; i < free_cnt; i++)
877 						rte_pktmbuf_free(pkts[i]);
878 				}
879 			}
880 
881 			nbufs += free_cnt;
882 			vq_update_avail_idx(vq);
883 		}
884 	} else {
885 		while (!virtqueue_full(vq)) {
886 			m = rte_mbuf_raw_alloc(rxvq->mpool);
887 			if (m == NULL)
888 				break;
889 
890 			/* Enqueue allocated buffers */
891 			if (vtpci_packed_queue(vq->hw))
892 				error = virtqueue_enqueue_recv_refill_packed(vq,
893 						&m, 1);
894 			else
895 				error = virtqueue_enqueue_recv_refill(vq, m);
896 			if (error) {
897 				rte_pktmbuf_free(m);
898 				break;
899 			}
900 			nbufs++;
901 		}
902 
903 		if (!vtpci_packed_queue(vq->hw))
904 			vq_update_avail_idx(vq);
905 	}
906 
907 	PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
908 
909 	VIRTQUEUE_DUMP(vq);
910 
911 	return 0;
912 }
913 
914 /*
915  * struct rte_eth_dev *dev: Used to update dev
916  * uint16_t nb_desc: Defaults to values read from config space
917  * unsigned int socket_id: Used to allocate memzone
918  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
919  * uint16_t queue_idx: Just used as an index in dev txq list
920  */
921 int
922 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
923 			uint16_t queue_idx,
924 			uint16_t nb_desc,
925 			unsigned int socket_id __rte_unused,
926 			const struct rte_eth_txconf *tx_conf)
927 {
928 	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
929 	struct virtio_hw *hw = dev->data->dev_private;
930 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
931 	struct virtnet_tx *txvq;
932 	uint16_t tx_free_thresh;
933 
934 	PMD_INIT_FUNC_TRACE();
935 
936 	if (nb_desc == 0 || nb_desc > vq->vq_nentries)
937 		nb_desc = vq->vq_nentries;
938 	vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
939 
940 	txvq = &vq->txq;
941 	txvq->queue_id = queue_idx;
942 
943 	tx_free_thresh = tx_conf->tx_free_thresh;
944 	if (tx_free_thresh == 0)
945 		tx_free_thresh =
946 			RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
947 
948 	if (tx_free_thresh >= (vq->vq_nentries - 3)) {
949 		RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
950 			"number of TX entries minus 3 (%u)."
951 			" (tx_free_thresh=%u port=%u queue=%u)\n",
952 			vq->vq_nentries - 3,
953 			tx_free_thresh, dev->data->port_id, queue_idx);
954 		return -EINVAL;
955 	}
956 
957 	vq->vq_free_thresh = tx_free_thresh;
958 
959 	dev->data->tx_queues[queue_idx] = txvq;
960 	return 0;
961 }
962 
963 int
964 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
965 				uint16_t queue_idx)
966 {
967 	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
968 	struct virtio_hw *hw = dev->data->dev_private;
969 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
970 
971 	PMD_INIT_FUNC_TRACE();
972 
973 	if (!vtpci_packed_queue(hw)) {
974 		if (hw->use_inorder_tx)
975 			vq->vq_ring.desc[vq->vq_nentries - 1].next = 0;
976 	}
977 
978 	VIRTQUEUE_DUMP(vq);
979 
980 	return 0;
981 }
982 
983 static inline void
984 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
985 {
986 	int error;
987 	/*
988 	 * Requeue the discarded mbuf. This should always be
989 	 * successful since it was just dequeued.
990 	 */
991 	if (vtpci_packed_queue(vq->hw))
992 		error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
993 	else
994 		error = virtqueue_enqueue_recv_refill(vq, m);
995 
996 	if (unlikely(error)) {
997 		RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
998 		rte_pktmbuf_free(m);
999 	}
1000 }
1001 
1002 static inline void
1003 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1004 {
1005 	int error;
1006 
1007 	error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1008 	if (unlikely(error)) {
1009 		RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1010 		rte_pktmbuf_free(m);
1011 	}
1012 }
1013 
1014 static inline void
1015 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
1016 {
1017 	uint32_t s = mbuf->pkt_len;
1018 	struct ether_addr *ea;
1019 
1020 	stats->bytes += s;
1021 
1022 	if (s == 64) {
1023 		stats->size_bins[1]++;
1024 	} else if (s > 64 && s < 1024) {
1025 		uint32_t bin;
1026 
1027 		/* count zeros, and offset into correct bin */
1028 		bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
1029 		stats->size_bins[bin]++;
1030 	} else {
1031 		if (s < 64)
1032 			stats->size_bins[0]++;
1033 		else if (s < 1519)
1034 			stats->size_bins[6]++;
1035 		else if (s >= 1519)
1036 			stats->size_bins[7]++;
1037 	}
1038 
1039 	ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
1040 	if (is_multicast_ether_addr(ea)) {
1041 		if (is_broadcast_ether_addr(ea))
1042 			stats->broadcast++;
1043 		else
1044 			stats->multicast++;
1045 	}
1046 }
1047 
1048 static inline void
1049 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
1050 {
1051 	VIRTIO_DUMP_PACKET(m, m->data_len);
1052 
1053 	virtio_update_packet_stats(&rxvq->stats, m);
1054 }
1055 
1056 /* Optionally fill offload information in structure */
1057 static inline int
1058 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1059 {
1060 	struct rte_net_hdr_lens hdr_lens;
1061 	uint32_t hdrlen, ptype;
1062 	int l4_supported = 0;
1063 
1064 	/* nothing to do */
1065 	if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1066 		return 0;
1067 
1068 	m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1069 
1070 	ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1071 	m->packet_type = ptype;
1072 	if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1073 	    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1074 	    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1075 		l4_supported = 1;
1076 
1077 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1078 		hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1079 		if (hdr->csum_start <= hdrlen && l4_supported) {
1080 			m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1081 		} else {
1082 			/* Unknown proto or tunnel, do sw cksum. We can assume
1083 			 * the cksum field is in the first segment since the
1084 			 * buffers we provided to the host are large enough.
1085 			 * In case of SCTP, this will be wrong since it's a CRC
1086 			 * but there's nothing we can do.
1087 			 */
1088 			uint16_t csum = 0, off;
1089 
1090 			rte_raw_cksum_mbuf(m, hdr->csum_start,
1091 				rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1092 				&csum);
1093 			if (likely(csum != 0xffff))
1094 				csum = ~csum;
1095 			off = hdr->csum_offset + hdr->csum_start;
1096 			if (rte_pktmbuf_data_len(m) >= off + 1)
1097 				*rte_pktmbuf_mtod_offset(m, uint16_t *,
1098 					off) = csum;
1099 		}
1100 	} else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1101 		m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1102 	}
1103 
1104 	/* GSO request, save required information in mbuf */
1105 	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1106 		/* Check unsupported modes */
1107 		if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1108 		    (hdr->gso_size == 0)) {
1109 			return -EINVAL;
1110 		}
1111 
1112 		/* Update mss lengthes in mbuf */
1113 		m->tso_segsz = hdr->gso_size;
1114 		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1115 			case VIRTIO_NET_HDR_GSO_TCPV4:
1116 			case VIRTIO_NET_HDR_GSO_TCPV6:
1117 				m->ol_flags |= PKT_RX_LRO | \
1118 					PKT_RX_L4_CKSUM_NONE;
1119 				break;
1120 			default:
1121 				return -EINVAL;
1122 		}
1123 	}
1124 
1125 	return 0;
1126 }
1127 
1128 #define VIRTIO_MBUF_BURST_SZ 64
1129 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1130 uint16_t
1131 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1132 {
1133 	struct virtnet_rx *rxvq = rx_queue;
1134 	struct virtqueue *vq = rxvq->vq;
1135 	struct virtio_hw *hw = vq->hw;
1136 	struct rte_mbuf *rxm, *new_mbuf;
1137 	uint16_t nb_used, num, nb_rx;
1138 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1139 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1140 	int error;
1141 	uint32_t i, nb_enqueued;
1142 	uint32_t hdr_size;
1143 	struct virtio_net_hdr *hdr;
1144 
1145 	nb_rx = 0;
1146 	if (unlikely(hw->started == 0))
1147 		return nb_rx;
1148 
1149 	nb_used = VIRTQUEUE_NUSED(vq);
1150 
1151 	virtio_rmb();
1152 
1153 	num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1154 	if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1155 		num = VIRTIO_MBUF_BURST_SZ;
1156 	if (likely(num > DESC_PER_CACHELINE))
1157 		num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1158 
1159 	num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1160 	PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1161 
1162 	nb_enqueued = 0;
1163 	hdr_size = hw->vtnet_hdr_size;
1164 
1165 	for (i = 0; i < num ; i++) {
1166 		rxm = rcv_pkts[i];
1167 
1168 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1169 
1170 		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1171 			PMD_RX_LOG(ERR, "Packet drop");
1172 			nb_enqueued++;
1173 			virtio_discard_rxbuf(vq, rxm);
1174 			rxvq->stats.errors++;
1175 			continue;
1176 		}
1177 
1178 		rxm->port = rxvq->port_id;
1179 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1180 		rxm->ol_flags = 0;
1181 		rxm->vlan_tci = 0;
1182 
1183 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1184 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
1185 
1186 		hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1187 			RTE_PKTMBUF_HEADROOM - hdr_size);
1188 
1189 		if (hw->vlan_strip)
1190 			rte_vlan_strip(rxm);
1191 
1192 		if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1193 			virtio_discard_rxbuf(vq, rxm);
1194 			rxvq->stats.errors++;
1195 			continue;
1196 		}
1197 
1198 		virtio_rx_stats_updated(rxvq, rxm);
1199 
1200 		rx_pkts[nb_rx++] = rxm;
1201 	}
1202 
1203 	rxvq->stats.packets += nb_rx;
1204 
1205 	/* Allocate new mbuf for the used descriptor */
1206 	while (likely(!virtqueue_full(vq))) {
1207 		new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1208 		if (unlikely(new_mbuf == NULL)) {
1209 			struct rte_eth_dev *dev
1210 				= &rte_eth_devices[rxvq->port_id];
1211 			dev->data->rx_mbuf_alloc_failed++;
1212 			break;
1213 		}
1214 		error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
1215 		if (unlikely(error)) {
1216 			rte_pktmbuf_free(new_mbuf);
1217 			break;
1218 		}
1219 		nb_enqueued++;
1220 	}
1221 
1222 	if (likely(nb_enqueued)) {
1223 		vq_update_avail_idx(vq);
1224 
1225 		if (unlikely(virtqueue_kick_prepare(vq))) {
1226 			virtqueue_notify(vq);
1227 			PMD_RX_LOG(DEBUG, "Notified");
1228 		}
1229 	}
1230 
1231 	return nb_rx;
1232 }
1233 
1234 uint16_t
1235 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1236 			uint16_t nb_pkts)
1237 {
1238 	struct virtnet_rx *rxvq = rx_queue;
1239 	struct virtqueue *vq = rxvq->vq;
1240 	struct virtio_hw *hw = vq->hw;
1241 	struct rte_mbuf *rxm, *new_mbuf;
1242 	uint16_t num, nb_rx;
1243 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1244 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1245 	int error;
1246 	uint32_t i, nb_enqueued;
1247 	uint32_t hdr_size;
1248 	struct virtio_net_hdr *hdr;
1249 
1250 	nb_rx = 0;
1251 	if (unlikely(hw->started == 0))
1252 		return nb_rx;
1253 
1254 	num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1255 	if (likely(num > DESC_PER_CACHELINE))
1256 		num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1257 
1258 	num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1259 	PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1260 
1261 	nb_enqueued = 0;
1262 	hdr_size = hw->vtnet_hdr_size;
1263 
1264 	for (i = 0; i < num; i++) {
1265 		rxm = rcv_pkts[i];
1266 
1267 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1268 
1269 		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1270 			PMD_RX_LOG(ERR, "Packet drop");
1271 			nb_enqueued++;
1272 			virtio_discard_rxbuf(vq, rxm);
1273 			rxvq->stats.errors++;
1274 			continue;
1275 		}
1276 
1277 		rxm->port = rxvq->port_id;
1278 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1279 		rxm->ol_flags = 0;
1280 		rxm->vlan_tci = 0;
1281 
1282 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1283 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
1284 
1285 		hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1286 			RTE_PKTMBUF_HEADROOM - hdr_size);
1287 
1288 		if (hw->vlan_strip)
1289 			rte_vlan_strip(rxm);
1290 
1291 		if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1292 			virtio_discard_rxbuf(vq, rxm);
1293 			rxvq->stats.errors++;
1294 			continue;
1295 		}
1296 
1297 		virtio_rx_stats_updated(rxvq, rxm);
1298 
1299 		rx_pkts[nb_rx++] = rxm;
1300 	}
1301 
1302 	rxvq->stats.packets += nb_rx;
1303 
1304 	/* Allocate new mbuf for the used descriptor */
1305 	while (likely(!virtqueue_full(vq))) {
1306 		new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1307 		if (unlikely(new_mbuf == NULL)) {
1308 			struct rte_eth_dev *dev =
1309 				&rte_eth_devices[rxvq->port_id];
1310 			dev->data->rx_mbuf_alloc_failed++;
1311 			break;
1312 		}
1313 		error = virtqueue_enqueue_recv_refill_packed(vq, &new_mbuf, 1);
1314 		if (unlikely(error)) {
1315 			rte_pktmbuf_free(new_mbuf);
1316 			break;
1317 		}
1318 		nb_enqueued++;
1319 	}
1320 
1321 	if (likely(nb_enqueued)) {
1322 		if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1323 			virtqueue_notify(vq);
1324 			PMD_RX_LOG(DEBUG, "Notified");
1325 		}
1326 	}
1327 
1328 	return nb_rx;
1329 }
1330 
1331 
1332 uint16_t
1333 virtio_recv_pkts_inorder(void *rx_queue,
1334 			struct rte_mbuf **rx_pkts,
1335 			uint16_t nb_pkts)
1336 {
1337 	struct virtnet_rx *rxvq = rx_queue;
1338 	struct virtqueue *vq = rxvq->vq;
1339 	struct virtio_hw *hw = vq->hw;
1340 	struct rte_mbuf *rxm;
1341 	struct rte_mbuf *prev;
1342 	uint16_t nb_used, num, nb_rx;
1343 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1344 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1345 	int error;
1346 	uint32_t nb_enqueued;
1347 	uint32_t seg_num;
1348 	uint32_t seg_res;
1349 	uint32_t hdr_size;
1350 	int32_t i;
1351 
1352 	nb_rx = 0;
1353 	if (unlikely(hw->started == 0))
1354 		return nb_rx;
1355 
1356 	nb_used = VIRTQUEUE_NUSED(vq);
1357 	nb_used = RTE_MIN(nb_used, nb_pkts);
1358 	nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1359 
1360 	virtio_rmb();
1361 
1362 	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1363 
1364 	nb_enqueued = 0;
1365 	seg_num = 1;
1366 	seg_res = 0;
1367 	hdr_size = hw->vtnet_hdr_size;
1368 
1369 	num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1370 
1371 	for (i = 0; i < num; i++) {
1372 		struct virtio_net_hdr_mrg_rxbuf *header;
1373 
1374 		PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1375 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1376 
1377 		rxm = rcv_pkts[i];
1378 
1379 		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1380 			PMD_RX_LOG(ERR, "Packet drop");
1381 			nb_enqueued++;
1382 			virtio_discard_rxbuf_inorder(vq, rxm);
1383 			rxvq->stats.errors++;
1384 			continue;
1385 		}
1386 
1387 		header = (struct virtio_net_hdr_mrg_rxbuf *)
1388 			 ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1389 			 - hdr_size);
1390 
1391 		if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1392 			seg_num = header->num_buffers;
1393 			if (seg_num == 0)
1394 				seg_num = 1;
1395 		} else {
1396 			seg_num = 1;
1397 		}
1398 
1399 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1400 		rxm->nb_segs = seg_num;
1401 		rxm->ol_flags = 0;
1402 		rxm->vlan_tci = 0;
1403 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1404 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
1405 
1406 		rxm->port = rxvq->port_id;
1407 
1408 		rx_pkts[nb_rx] = rxm;
1409 		prev = rxm;
1410 
1411 		if (vq->hw->has_rx_offload &&
1412 				virtio_rx_offload(rxm, &header->hdr) < 0) {
1413 			virtio_discard_rxbuf_inorder(vq, rxm);
1414 			rxvq->stats.errors++;
1415 			continue;
1416 		}
1417 
1418 		if (hw->vlan_strip)
1419 			rte_vlan_strip(rx_pkts[nb_rx]);
1420 
1421 		seg_res = seg_num - 1;
1422 
1423 		/* Merge remaining segments */
1424 		while (seg_res != 0 && i < (num - 1)) {
1425 			i++;
1426 
1427 			rxm = rcv_pkts[i];
1428 			rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1429 			rxm->pkt_len = (uint32_t)(len[i]);
1430 			rxm->data_len = (uint16_t)(len[i]);
1431 
1432 			rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1433 			rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1434 
1435 			if (prev)
1436 				prev->next = rxm;
1437 
1438 			prev = rxm;
1439 			seg_res -= 1;
1440 		}
1441 
1442 		if (!seg_res) {
1443 			virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1444 			nb_rx++;
1445 		}
1446 	}
1447 
1448 	/* Last packet still need merge segments */
1449 	while (seg_res != 0) {
1450 		uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1451 					VIRTIO_MBUF_BURST_SZ);
1452 
1453 		prev = rcv_pkts[nb_rx];
1454 		if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1455 			num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1456 							   rcv_cnt);
1457 			uint16_t extra_idx = 0;
1458 
1459 			rcv_cnt = num;
1460 			while (extra_idx < rcv_cnt) {
1461 				rxm = rcv_pkts[extra_idx];
1462 				rxm->data_off =
1463 					RTE_PKTMBUF_HEADROOM - hdr_size;
1464 				rxm->pkt_len = (uint32_t)(len[extra_idx]);
1465 				rxm->data_len = (uint16_t)(len[extra_idx]);
1466 				prev->next = rxm;
1467 				prev = rxm;
1468 				rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1469 				rx_pkts[nb_rx]->data_len += len[extra_idx];
1470 				extra_idx += 1;
1471 			};
1472 			seg_res -= rcv_cnt;
1473 
1474 			if (!seg_res) {
1475 				virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1476 				nb_rx++;
1477 			}
1478 		} else {
1479 			PMD_RX_LOG(ERR,
1480 					"No enough segments for packet.");
1481 			virtio_discard_rxbuf_inorder(vq, prev);
1482 			rxvq->stats.errors++;
1483 			break;
1484 		}
1485 	}
1486 
1487 	rxvq->stats.packets += nb_rx;
1488 
1489 	/* Allocate new mbuf for the used descriptor */
1490 
1491 	if (likely(!virtqueue_full(vq))) {
1492 		/* free_cnt may include mrg descs */
1493 		uint16_t free_cnt = vq->vq_free_cnt;
1494 		struct rte_mbuf *new_pkts[free_cnt];
1495 
1496 		if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1497 			error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1498 					free_cnt);
1499 			if (unlikely(error)) {
1500 				for (i = 0; i < free_cnt; i++)
1501 					rte_pktmbuf_free(new_pkts[i]);
1502 			}
1503 			nb_enqueued += free_cnt;
1504 		} else {
1505 			struct rte_eth_dev *dev =
1506 				&rte_eth_devices[rxvq->port_id];
1507 			dev->data->rx_mbuf_alloc_failed += free_cnt;
1508 		}
1509 	}
1510 
1511 	if (likely(nb_enqueued)) {
1512 		vq_update_avail_idx(vq);
1513 
1514 		if (unlikely(virtqueue_kick_prepare(vq))) {
1515 			virtqueue_notify(vq);
1516 			PMD_RX_LOG(DEBUG, "Notified");
1517 		}
1518 	}
1519 
1520 	return nb_rx;
1521 }
1522 
1523 uint16_t
1524 virtio_recv_mergeable_pkts(void *rx_queue,
1525 			struct rte_mbuf **rx_pkts,
1526 			uint16_t nb_pkts)
1527 {
1528 	struct virtnet_rx *rxvq = rx_queue;
1529 	struct virtqueue *vq = rxvq->vq;
1530 	struct virtio_hw *hw = vq->hw;
1531 	struct rte_mbuf *rxm, *new_mbuf;
1532 	uint16_t nb_used, num, nb_rx;
1533 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1534 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1535 	struct rte_mbuf *prev;
1536 	int error;
1537 	uint32_t i, nb_enqueued;
1538 	uint32_t seg_num;
1539 	uint16_t extra_idx;
1540 	uint32_t seg_res;
1541 	uint32_t hdr_size;
1542 
1543 	nb_rx = 0;
1544 	if (unlikely(hw->started == 0))
1545 		return nb_rx;
1546 
1547 	nb_used = VIRTQUEUE_NUSED(vq);
1548 
1549 	virtio_rmb();
1550 
1551 	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1552 
1553 	i = 0;
1554 	nb_enqueued = 0;
1555 	seg_num = 0;
1556 	extra_idx = 0;
1557 	seg_res = 0;
1558 	hdr_size = hw->vtnet_hdr_size;
1559 
1560 	while (i < nb_used) {
1561 		struct virtio_net_hdr_mrg_rxbuf *header;
1562 
1563 		if (nb_rx == nb_pkts)
1564 			break;
1565 
1566 		num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1);
1567 		if (num != 1)
1568 			continue;
1569 
1570 		i++;
1571 
1572 		PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1573 		PMD_RX_LOG(DEBUG, "packet len:%d", len[0]);
1574 
1575 		rxm = rcv_pkts[0];
1576 
1577 		if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
1578 			PMD_RX_LOG(ERR, "Packet drop");
1579 			nb_enqueued++;
1580 			virtio_discard_rxbuf(vq, rxm);
1581 			rxvq->stats.errors++;
1582 			continue;
1583 		}
1584 
1585 		header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
1586 			RTE_PKTMBUF_HEADROOM - hdr_size);
1587 		seg_num = header->num_buffers;
1588 
1589 		if (seg_num == 0)
1590 			seg_num = 1;
1591 
1592 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1593 		rxm->nb_segs = seg_num;
1594 		rxm->ol_flags = 0;
1595 		rxm->vlan_tci = 0;
1596 		rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
1597 		rxm->data_len = (uint16_t)(len[0] - hdr_size);
1598 
1599 		rxm->port = rxvq->port_id;
1600 		rx_pkts[nb_rx] = rxm;
1601 		prev = rxm;
1602 
1603 		if (hw->has_rx_offload &&
1604 				virtio_rx_offload(rxm, &header->hdr) < 0) {
1605 			virtio_discard_rxbuf(vq, rxm);
1606 			rxvq->stats.errors++;
1607 			continue;
1608 		}
1609 
1610 		seg_res = seg_num - 1;
1611 
1612 		while (seg_res != 0) {
1613 			/*
1614 			 * Get extra segments for current uncompleted packet.
1615 			 */
1616 			uint16_t  rcv_cnt =
1617 				RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
1618 			if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1619 				uint32_t rx_num =
1620 					virtqueue_dequeue_burst_rx(vq,
1621 					rcv_pkts, len, rcv_cnt);
1622 				i += rx_num;
1623 				rcv_cnt = rx_num;
1624 			} else {
1625 				PMD_RX_LOG(ERR,
1626 					   "No enough segments for packet.");
1627 				nb_enqueued++;
1628 				virtio_discard_rxbuf(vq, rxm);
1629 				rxvq->stats.errors++;
1630 				break;
1631 			}
1632 
1633 			extra_idx = 0;
1634 
1635 			while (extra_idx < rcv_cnt) {
1636 				rxm = rcv_pkts[extra_idx];
1637 
1638 				rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1639 				rxm->pkt_len = (uint32_t)(len[extra_idx]);
1640 				rxm->data_len = (uint16_t)(len[extra_idx]);
1641 
1642 				if (prev)
1643 					prev->next = rxm;
1644 
1645 				prev = rxm;
1646 				rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
1647 				extra_idx++;
1648 			};
1649 			seg_res -= rcv_cnt;
1650 		}
1651 
1652 		if (hw->vlan_strip)
1653 			rte_vlan_strip(rx_pkts[nb_rx]);
1654 
1655 		VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
1656 			rx_pkts[nb_rx]->data_len);
1657 
1658 		virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]);
1659 		nb_rx++;
1660 	}
1661 
1662 	rxvq->stats.packets += nb_rx;
1663 
1664 	/* Allocate new mbuf for the used descriptor */
1665 	while (likely(!virtqueue_full(vq))) {
1666 		new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1667 		if (unlikely(new_mbuf == NULL)) {
1668 			struct rte_eth_dev *dev
1669 				= &rte_eth_devices[rxvq->port_id];
1670 			dev->data->rx_mbuf_alloc_failed++;
1671 			break;
1672 		}
1673 		error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
1674 		if (unlikely(error)) {
1675 			rte_pktmbuf_free(new_mbuf);
1676 			break;
1677 		}
1678 		nb_enqueued++;
1679 	}
1680 
1681 	if (likely(nb_enqueued)) {
1682 		vq_update_avail_idx(vq);
1683 
1684 		if (unlikely(virtqueue_kick_prepare(vq))) {
1685 			virtqueue_notify(vq);
1686 			PMD_RX_LOG(DEBUG, "Notified");
1687 		}
1688 	}
1689 
1690 	return nb_rx;
1691 }
1692 
1693 uint16_t
1694 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1695 			struct rte_mbuf **rx_pkts,
1696 			uint16_t nb_pkts)
1697 {
1698 	struct virtnet_rx *rxvq = rx_queue;
1699 	struct virtqueue *vq = rxvq->vq;
1700 	struct virtio_hw *hw = vq->hw;
1701 	struct rte_mbuf *rxm;
1702 	struct rte_mbuf *prev = NULL;
1703 	uint16_t num, nb_rx = 0;
1704 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1705 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1706 	uint32_t nb_enqueued = 0;
1707 	uint32_t seg_num = 0;
1708 	uint32_t seg_res = 0;
1709 	uint32_t hdr_size = hw->vtnet_hdr_size;
1710 	int32_t i;
1711 	int error;
1712 
1713 	if (unlikely(hw->started == 0))
1714 		return nb_rx;
1715 
1716 
1717 	num = nb_pkts;
1718 	if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1719 		num = VIRTIO_MBUF_BURST_SZ;
1720 	if (likely(num > DESC_PER_CACHELINE))
1721 		num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1722 
1723 	num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1724 
1725 	for (i = 0; i < num; i++) {
1726 		struct virtio_net_hdr_mrg_rxbuf *header;
1727 
1728 		PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1729 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1730 
1731 		rxm = rcv_pkts[i];
1732 
1733 		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1734 			PMD_RX_LOG(ERR, "Packet drop");
1735 			nb_enqueued++;
1736 			virtio_discard_rxbuf(vq, rxm);
1737 			rxvq->stats.errors++;
1738 			continue;
1739 		}
1740 
1741 		header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1742 			  rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1743 		seg_num = header->num_buffers;
1744 
1745 		if (seg_num == 0)
1746 			seg_num = 1;
1747 
1748 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1749 		rxm->nb_segs = seg_num;
1750 		rxm->ol_flags = 0;
1751 		rxm->vlan_tci = 0;
1752 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1753 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
1754 
1755 		rxm->port = rxvq->port_id;
1756 		rx_pkts[nb_rx] = rxm;
1757 		prev = rxm;
1758 
1759 		if (hw->has_rx_offload &&
1760 				virtio_rx_offload(rxm, &header->hdr) < 0) {
1761 			virtio_discard_rxbuf(vq, rxm);
1762 			rxvq->stats.errors++;
1763 			continue;
1764 		}
1765 
1766 		if (hw->vlan_strip)
1767 			rte_vlan_strip(rx_pkts[nb_rx]);
1768 
1769 		seg_res = seg_num - 1;
1770 
1771 		/* Merge remaining segments */
1772 		while (seg_res != 0 && i < (num - 1)) {
1773 			i++;
1774 
1775 			rxm = rcv_pkts[i];
1776 			rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1777 			rxm->pkt_len = (uint32_t)(len[i]);
1778 			rxm->data_len = (uint16_t)(len[i]);
1779 
1780 			rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1781 			rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1782 
1783 			if (prev)
1784 				prev->next = rxm;
1785 
1786 			prev = rxm;
1787 			seg_res -= 1;
1788 		}
1789 
1790 		if (!seg_res) {
1791 			virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1792 			nb_rx++;
1793 		}
1794 	}
1795 
1796 	/* Last packet still need merge segments */
1797 	while (seg_res != 0) {
1798 		uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1799 					VIRTIO_MBUF_BURST_SZ);
1800 		if (likely(vq->vq_free_cnt >= rcv_cnt)) {
1801 			num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1802 					len, rcv_cnt);
1803 			uint16_t extra_idx = 0;
1804 
1805 			rcv_cnt = num;
1806 
1807 			while (extra_idx < rcv_cnt) {
1808 				rxm = rcv_pkts[extra_idx];
1809 
1810 				rxm->data_off =
1811 					RTE_PKTMBUF_HEADROOM - hdr_size;
1812 				rxm->pkt_len = (uint32_t)(len[extra_idx]);
1813 				rxm->data_len = (uint16_t)(len[extra_idx]);
1814 
1815 				prev->next = rxm;
1816 				prev = rxm;
1817 				rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1818 				rx_pkts[nb_rx]->data_len += len[extra_idx];
1819 				extra_idx += 1;
1820 			}
1821 			seg_res -= rcv_cnt;
1822 			if (!seg_res) {
1823 				virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1824 				nb_rx++;
1825 			}
1826 		} else {
1827 			PMD_RX_LOG(ERR,
1828 					"No enough segments for packet.");
1829 			if (prev)
1830 				virtio_discard_rxbuf(vq, prev);
1831 			rxvq->stats.errors++;
1832 			break;
1833 		}
1834 	}
1835 
1836 	rxvq->stats.packets += nb_rx;
1837 
1838 	/* Allocate new mbuf for the used descriptor */
1839 	if (likely(!virtqueue_full(vq))) {
1840 		/* free_cnt may include mrg descs */
1841 		uint16_t free_cnt = vq->vq_free_cnt;
1842 		struct rte_mbuf *new_pkts[free_cnt];
1843 
1844 		if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1845 			error = virtqueue_enqueue_recv_refill_packed(vq,
1846 					new_pkts, free_cnt);
1847 			if (unlikely(error)) {
1848 				for (i = 0; i < free_cnt; i++)
1849 					rte_pktmbuf_free(new_pkts[i]);
1850 			}
1851 			nb_enqueued += free_cnt;
1852 		} else {
1853 			struct rte_eth_dev *dev =
1854 				&rte_eth_devices[rxvq->port_id];
1855 			dev->data->rx_mbuf_alloc_failed += free_cnt;
1856 		}
1857 	}
1858 
1859 	if (likely(nb_enqueued)) {
1860 		if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1861 			virtqueue_notify(vq);
1862 			PMD_RX_LOG(DEBUG, "Notified");
1863 		}
1864 	}
1865 
1866 	return nb_rx;
1867 }
1868 
1869 uint16_t
1870 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
1871 			uint16_t nb_pkts)
1872 {
1873 	struct virtnet_tx *txvq = tx_queue;
1874 	struct virtqueue *vq = txvq->vq;
1875 	struct virtio_hw *hw = vq->hw;
1876 	uint16_t hdr_size = hw->vtnet_hdr_size;
1877 	uint16_t nb_tx = 0;
1878 	int error;
1879 
1880 	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1881 		return nb_tx;
1882 
1883 	if (unlikely(nb_pkts < 1))
1884 		return nb_pkts;
1885 
1886 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1887 
1888 	if (nb_pkts > vq->vq_free_cnt)
1889 		virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt);
1890 
1891 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1892 		struct rte_mbuf *txm = tx_pkts[nb_tx];
1893 		int can_push = 0, slots, need;
1894 
1895 		/* Do VLAN tag insertion */
1896 		if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1897 			error = rte_vlan_insert(&txm);
1898 			if (unlikely(error)) {
1899 				rte_pktmbuf_free(txm);
1900 				continue;
1901 			}
1902 		}
1903 
1904 		/* optimize ring usage */
1905 		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1906 		      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1907 		    rte_mbuf_refcnt_read(txm) == 1 &&
1908 		    RTE_MBUF_DIRECT(txm) &&
1909 		    txm->nb_segs == 1 &&
1910 		    rte_pktmbuf_headroom(txm) >= hdr_size &&
1911 		    rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1912 			   __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1913 			can_push = 1;
1914 
1915 		/* How many main ring entries are needed to this Tx?
1916 		 * any_layout => number of segments
1917 		 * default    => number of segments + 1
1918 		 */
1919 		slots = txm->nb_segs + !can_push;
1920 		need = slots - vq->vq_free_cnt;
1921 
1922 		/* Positive value indicates it need free vring descriptors */
1923 		if (unlikely(need > 0)) {
1924 			virtio_rmb();
1925 			need = RTE_MIN(need, (int)nb_pkts);
1926 			virtio_xmit_cleanup_packed(vq, need);
1927 			need = slots - vq->vq_free_cnt;
1928 			if (unlikely(need > 0)) {
1929 				PMD_TX_LOG(ERR,
1930 					   "No free tx descriptors to transmit");
1931 				break;
1932 			}
1933 		}
1934 
1935 		/* Enqueue Packet buffers */
1936 		virtqueue_enqueue_xmit_packed(txvq, txm, slots, can_push);
1937 
1938 		virtio_update_packet_stats(&txvq->stats, txm);
1939 	}
1940 
1941 	txvq->stats.packets += nb_tx;
1942 
1943 	if (likely(nb_tx)) {
1944 		if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1945 			virtqueue_notify(vq);
1946 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1947 		}
1948 	}
1949 
1950 	return nb_tx;
1951 }
1952 
1953 uint16_t
1954 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1955 {
1956 	struct virtnet_tx *txvq = tx_queue;
1957 	struct virtqueue *vq = txvq->vq;
1958 	struct virtio_hw *hw = vq->hw;
1959 	uint16_t hdr_size = hw->vtnet_hdr_size;
1960 	uint16_t nb_used, nb_tx = 0;
1961 	int error;
1962 
1963 	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1964 		return nb_tx;
1965 
1966 	if (unlikely(nb_pkts < 1))
1967 		return nb_pkts;
1968 
1969 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1970 	nb_used = VIRTQUEUE_NUSED(vq);
1971 
1972 	virtio_rmb();
1973 	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1974 		virtio_xmit_cleanup(vq, nb_used);
1975 
1976 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1977 		struct rte_mbuf *txm = tx_pkts[nb_tx];
1978 		int can_push = 0, use_indirect = 0, slots, need;
1979 
1980 		/* Do VLAN tag insertion */
1981 		if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1982 			error = rte_vlan_insert(&txm);
1983 			if (unlikely(error)) {
1984 				rte_pktmbuf_free(txm);
1985 				continue;
1986 			}
1987 		}
1988 
1989 		/* optimize ring usage */
1990 		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1991 		      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1992 		    rte_mbuf_refcnt_read(txm) == 1 &&
1993 		    RTE_MBUF_DIRECT(txm) &&
1994 		    txm->nb_segs == 1 &&
1995 		    rte_pktmbuf_headroom(txm) >= hdr_size &&
1996 		    rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1997 				   __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1998 			can_push = 1;
1999 		else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2000 			 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2001 			use_indirect = 1;
2002 
2003 		/* How many main ring entries are needed to this Tx?
2004 		 * any_layout => number of segments
2005 		 * indirect   => 1
2006 		 * default    => number of segments + 1
2007 		 */
2008 		slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2009 		need = slots - vq->vq_free_cnt;
2010 
2011 		/* Positive value indicates it need free vring descriptors */
2012 		if (unlikely(need > 0)) {
2013 			nb_used = VIRTQUEUE_NUSED(vq);
2014 			virtio_rmb();
2015 			need = RTE_MIN(need, (int)nb_used);
2016 
2017 			virtio_xmit_cleanup(vq, need);
2018 			need = slots - vq->vq_free_cnt;
2019 			if (unlikely(need > 0)) {
2020 				PMD_TX_LOG(ERR,
2021 					   "No free tx descriptors to transmit");
2022 				break;
2023 			}
2024 		}
2025 
2026 		/* Enqueue Packet buffers */
2027 		virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2028 			can_push, 0);
2029 
2030 		virtio_update_packet_stats(&txvq->stats, txm);
2031 	}
2032 
2033 	txvq->stats.packets += nb_tx;
2034 
2035 	if (likely(nb_tx)) {
2036 		vq_update_avail_idx(vq);
2037 
2038 		if (unlikely(virtqueue_kick_prepare(vq))) {
2039 			virtqueue_notify(vq);
2040 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2041 		}
2042 	}
2043 
2044 	return nb_tx;
2045 }
2046 
2047 uint16_t
2048 virtio_xmit_pkts_inorder(void *tx_queue,
2049 			struct rte_mbuf **tx_pkts,
2050 			uint16_t nb_pkts)
2051 {
2052 	struct virtnet_tx *txvq = tx_queue;
2053 	struct virtqueue *vq = txvq->vq;
2054 	struct virtio_hw *hw = vq->hw;
2055 	uint16_t hdr_size = hw->vtnet_hdr_size;
2056 	uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
2057 	struct rte_mbuf *inorder_pkts[nb_pkts];
2058 	int error;
2059 
2060 	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2061 		return nb_tx;
2062 
2063 	if (unlikely(nb_pkts < 1))
2064 		return nb_pkts;
2065 
2066 	VIRTQUEUE_DUMP(vq);
2067 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2068 	nb_used = VIRTQUEUE_NUSED(vq);
2069 
2070 	virtio_rmb();
2071 	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2072 		virtio_xmit_cleanup_inorder(vq, nb_used);
2073 
2074 	if (unlikely(!vq->vq_free_cnt))
2075 		virtio_xmit_cleanup_inorder(vq, nb_used);
2076 
2077 	nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
2078 
2079 	for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
2080 		struct rte_mbuf *txm = tx_pkts[nb_tx];
2081 		int slots, need;
2082 
2083 		/* Do VLAN tag insertion */
2084 		if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2085 			error = rte_vlan_insert(&txm);
2086 			if (unlikely(error)) {
2087 				rte_pktmbuf_free(txm);
2088 				continue;
2089 			}
2090 		}
2091 
2092 		/* optimize ring usage */
2093 		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2094 		     vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2095 		     rte_mbuf_refcnt_read(txm) == 1 &&
2096 		     RTE_MBUF_DIRECT(txm) &&
2097 		     txm->nb_segs == 1 &&
2098 		     rte_pktmbuf_headroom(txm) >= hdr_size &&
2099 		     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2100 				__alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2101 			inorder_pkts[nb_inorder_pkts] = txm;
2102 			nb_inorder_pkts++;
2103 
2104 			virtio_update_packet_stats(&txvq->stats, txm);
2105 			continue;
2106 		}
2107 
2108 		if (nb_inorder_pkts) {
2109 			virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2110 							nb_inorder_pkts);
2111 			nb_inorder_pkts = 0;
2112 		}
2113 
2114 		slots = txm->nb_segs + 1;
2115 		need = slots - vq->vq_free_cnt;
2116 		if (unlikely(need > 0)) {
2117 			nb_used = VIRTQUEUE_NUSED(vq);
2118 			virtio_rmb();
2119 			need = RTE_MIN(need, (int)nb_used);
2120 
2121 			virtio_xmit_cleanup_inorder(vq, need);
2122 
2123 			need = slots - vq->vq_free_cnt;
2124 
2125 			if (unlikely(need > 0)) {
2126 				PMD_TX_LOG(ERR,
2127 					"No free tx descriptors to transmit");
2128 				break;
2129 			}
2130 		}
2131 		/* Enqueue Packet buffers */
2132 		virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2133 
2134 		virtio_update_packet_stats(&txvq->stats, txm);
2135 	}
2136 
2137 	/* Transmit all inorder packets */
2138 	if (nb_inorder_pkts)
2139 		virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2140 						nb_inorder_pkts);
2141 
2142 	txvq->stats.packets += nb_tx;
2143 
2144 	if (likely(nb_tx)) {
2145 		vq_update_avail_idx(vq);
2146 
2147 		if (unlikely(virtqueue_kick_prepare(vq))) {
2148 			virtqueue_notify(vq);
2149 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2150 		}
2151 	}
2152 
2153 	VIRTQUEUE_DUMP(vq);
2154 
2155 	return nb_tx;
2156 }
2157