xref: /dpdk/drivers/net/virtio/virtio_rxtx.c (revision 0964a95120fa024888fbc0ea5e34d1abef1b93dc)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10 
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27 
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35 
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41 
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45 	struct virtnet_rx *rxvq = rxq;
46 	struct virtqueue *vq = rxvq->vq;
47 
48 	return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50 
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54 	vq->vq_free_cnt += num;
55 	vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57 
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61 	struct vring_desc *dp, *dp_tail;
62 	struct vq_desc_extra *dxp;
63 	uint16_t desc_idx_last = desc_idx;
64 
65 	dp  = &vq->vq_split.ring.desc[desc_idx];
66 	dxp = &vq->vq_descx[desc_idx];
67 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68 	if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69 		while (dp->flags & VRING_DESC_F_NEXT) {
70 			desc_idx_last = dp->next;
71 			dp = &vq->vq_split.ring.desc[dp->next];
72 		}
73 	}
74 	dxp->ndescs = 0;
75 
76 	/*
77 	 * We must append the existing free chain, if any, to the end of
78 	 * newly freed chain. If the virtqueue was completely used, then
79 	 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80 	 */
81 	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82 		vq->vq_desc_head_idx = desc_idx;
83 	} else {
84 		dp_tail = &vq->vq_split.ring.desc[vq->vq_desc_tail_idx];
85 		dp_tail->next = desc_idx;
86 	}
87 
88 	vq->vq_desc_tail_idx = desc_idx_last;
89 	dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91 
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95 	struct vq_desc_extra *dxp;
96 
97 	dxp = &vq->vq_descx[id];
98 	vq->vq_free_cnt += dxp->ndescs;
99 
100 	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101 		vq->vq_desc_head_idx = id;
102 	else
103 		vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104 
105 	vq->vq_desc_tail_idx = id;
106 	dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108 
109 void
110 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
111 {
112 	uint32_t s = mbuf->pkt_len;
113 	struct rte_ether_addr *ea;
114 
115 	stats->bytes += s;
116 
117 	if (s == 64) {
118 		stats->size_bins[1]++;
119 	} else if (s > 64 && s < 1024) {
120 		uint32_t bin;
121 
122 		/* count zeros, and offset into correct bin */
123 		bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
124 		stats->size_bins[bin]++;
125 	} else {
126 		if (s < 64)
127 			stats->size_bins[0]++;
128 		else if (s < 1519)
129 			stats->size_bins[6]++;
130 		else
131 			stats->size_bins[7]++;
132 	}
133 
134 	ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
135 	if (rte_is_multicast_ether_addr(ea)) {
136 		if (rte_is_broadcast_ether_addr(ea))
137 			stats->broadcast++;
138 		else
139 			stats->multicast++;
140 	}
141 }
142 
143 static inline void
144 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
145 {
146 	VIRTIO_DUMP_PACKET(m, m->data_len);
147 
148 	virtio_update_packet_stats(&rxvq->stats, m);
149 }
150 
151 static uint16_t
152 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
153 				  struct rte_mbuf **rx_pkts,
154 				  uint32_t *len,
155 				  uint16_t num)
156 {
157 	struct rte_mbuf *cookie;
158 	uint16_t used_idx;
159 	uint16_t id;
160 	struct vring_packed_desc *desc;
161 	uint16_t i;
162 
163 	desc = vq->vq_packed.ring.desc;
164 
165 	for (i = 0; i < num; i++) {
166 		used_idx = vq->vq_used_cons_idx;
167 		if (!desc_is_used(&desc[used_idx], vq))
168 			return i;
169 		virtio_rmb(vq->hw->weak_barriers);
170 		len[i] = desc[used_idx].len;
171 		id = desc[used_idx].id;
172 		cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
173 		if (unlikely(cookie == NULL)) {
174 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
175 				vq->vq_used_cons_idx);
176 			break;
177 		}
178 		rte_prefetch0(cookie);
179 		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
180 		rx_pkts[i] = cookie;
181 
182 		vq->vq_free_cnt++;
183 		vq->vq_used_cons_idx++;
184 		if (vq->vq_used_cons_idx >= vq->vq_nentries) {
185 			vq->vq_used_cons_idx -= vq->vq_nentries;
186 			vq->vq_packed.used_wrap_counter ^= 1;
187 		}
188 	}
189 
190 	return i;
191 }
192 
193 static uint16_t
194 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
195 			   uint32_t *len, uint16_t num)
196 {
197 	struct vring_used_elem *uep;
198 	struct rte_mbuf *cookie;
199 	uint16_t used_idx, desc_idx;
200 	uint16_t i;
201 
202 	/*  Caller does the check */
203 	for (i = 0; i < num ; i++) {
204 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
205 		uep = &vq->vq_split.ring.used->ring[used_idx];
206 		desc_idx = (uint16_t) uep->id;
207 		len[i] = uep->len;
208 		cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
209 
210 		if (unlikely(cookie == NULL)) {
211 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
212 				vq->vq_used_cons_idx);
213 			break;
214 		}
215 
216 		rte_prefetch0(cookie);
217 		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
218 		rx_pkts[i]  = cookie;
219 		vq->vq_used_cons_idx++;
220 		vq_ring_free_chain(vq, desc_idx);
221 		vq->vq_descx[desc_idx].cookie = NULL;
222 	}
223 
224 	return i;
225 }
226 
227 static uint16_t
228 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
229 			struct rte_mbuf **rx_pkts,
230 			uint32_t *len,
231 			uint16_t num)
232 {
233 	struct vring_used_elem *uep;
234 	struct rte_mbuf *cookie;
235 	uint16_t used_idx = 0;
236 	uint16_t i;
237 
238 	if (unlikely(num == 0))
239 		return 0;
240 
241 	for (i = 0; i < num; i++) {
242 		used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
243 		/* Desc idx same as used idx */
244 		uep = &vq->vq_split.ring.used->ring[used_idx];
245 		len[i] = uep->len;
246 		cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
247 
248 		if (unlikely(cookie == NULL)) {
249 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
250 				vq->vq_used_cons_idx);
251 			break;
252 		}
253 
254 		rte_prefetch0(cookie);
255 		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
256 		rx_pkts[i]  = cookie;
257 		vq->vq_used_cons_idx++;
258 		vq->vq_descx[used_idx].cookie = NULL;
259 	}
260 
261 	vq_ring_free_inorder(vq, used_idx, i);
262 	return i;
263 }
264 
265 #ifndef DEFAULT_TX_FREE_THRESH
266 #define DEFAULT_TX_FREE_THRESH 32
267 #endif
268 
269 static void
270 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
271 {
272 	uint16_t used_idx, id, curr_id, free_cnt = 0;
273 	uint16_t size = vq->vq_nentries;
274 	struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
275 	struct vq_desc_extra *dxp;
276 
277 	used_idx = vq->vq_used_cons_idx;
278 	while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
279 		virtio_rmb(vq->hw->weak_barriers);
280 		id = desc[used_idx].id;
281 		do {
282 			curr_id = used_idx;
283 			dxp = &vq->vq_descx[used_idx];
284 			used_idx += dxp->ndescs;
285 			free_cnt += dxp->ndescs;
286 			num -= dxp->ndescs;
287 			if (used_idx >= size) {
288 				used_idx -= size;
289 				vq->vq_packed.used_wrap_counter ^= 1;
290 			}
291 			if (dxp->cookie != NULL) {
292 				rte_pktmbuf_free(dxp->cookie);
293 				dxp->cookie = NULL;
294 			}
295 		} while (curr_id != id);
296 	}
297 	vq->vq_used_cons_idx = used_idx;
298 	vq->vq_free_cnt += free_cnt;
299 }
300 
301 static void
302 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
303 {
304 	uint16_t used_idx, id;
305 	uint16_t size = vq->vq_nentries;
306 	struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
307 	struct vq_desc_extra *dxp;
308 
309 	used_idx = vq->vq_used_cons_idx;
310 	while (num-- && desc_is_used(&desc[used_idx], vq)) {
311 		virtio_rmb(vq->hw->weak_barriers);
312 		id = desc[used_idx].id;
313 		dxp = &vq->vq_descx[id];
314 		vq->vq_used_cons_idx += dxp->ndescs;
315 		if (vq->vq_used_cons_idx >= size) {
316 			vq->vq_used_cons_idx -= size;
317 			vq->vq_packed.used_wrap_counter ^= 1;
318 		}
319 		vq_ring_free_id_packed(vq, id);
320 		if (dxp->cookie != NULL) {
321 			rte_pktmbuf_free(dxp->cookie);
322 			dxp->cookie = NULL;
323 		}
324 		used_idx = vq->vq_used_cons_idx;
325 	}
326 }
327 
328 /* Cleanup from completed transmits. */
329 static inline void
330 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order)
331 {
332 	if (in_order)
333 		virtio_xmit_cleanup_inorder_packed(vq, num);
334 	else
335 		virtio_xmit_cleanup_normal_packed(vq, num);
336 }
337 
338 static void
339 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
340 {
341 	uint16_t i, used_idx, desc_idx;
342 	for (i = 0; i < num; i++) {
343 		struct vring_used_elem *uep;
344 		struct vq_desc_extra *dxp;
345 
346 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
347 		uep = &vq->vq_split.ring.used->ring[used_idx];
348 
349 		desc_idx = (uint16_t) uep->id;
350 		dxp = &vq->vq_descx[desc_idx];
351 		vq->vq_used_cons_idx++;
352 		vq_ring_free_chain(vq, desc_idx);
353 
354 		if (dxp->cookie != NULL) {
355 			rte_pktmbuf_free(dxp->cookie);
356 			dxp->cookie = NULL;
357 		}
358 	}
359 }
360 
361 /* Cleanup from completed inorder transmits. */
362 static __rte_always_inline void
363 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
364 {
365 	uint16_t i, idx = vq->vq_used_cons_idx;
366 	int16_t free_cnt = 0;
367 	struct vq_desc_extra *dxp = NULL;
368 
369 	if (unlikely(num == 0))
370 		return;
371 
372 	for (i = 0; i < num; i++) {
373 		dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
374 		free_cnt += dxp->ndescs;
375 		if (dxp->cookie != NULL) {
376 			rte_pktmbuf_free(dxp->cookie);
377 			dxp->cookie = NULL;
378 		}
379 	}
380 
381 	vq->vq_free_cnt += free_cnt;
382 	vq->vq_used_cons_idx = idx;
383 }
384 
385 static inline int
386 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
387 			struct rte_mbuf **cookies,
388 			uint16_t num)
389 {
390 	struct vq_desc_extra *dxp;
391 	struct virtio_hw *hw = vq->hw;
392 	struct vring_desc *start_dp;
393 	uint16_t head_idx, idx, i = 0;
394 
395 	if (unlikely(vq->vq_free_cnt == 0))
396 		return -ENOSPC;
397 	if (unlikely(vq->vq_free_cnt < num))
398 		return -EMSGSIZE;
399 
400 	head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
401 	start_dp = vq->vq_split.ring.desc;
402 
403 	while (i < num) {
404 		idx = head_idx & (vq->vq_nentries - 1);
405 		dxp = &vq->vq_descx[idx];
406 		dxp->cookie = (void *)cookies[i];
407 		dxp->ndescs = 1;
408 
409 		start_dp[idx].addr =
410 				VIRTIO_MBUF_ADDR(cookies[i], vq) +
411 				RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
412 		start_dp[idx].len =
413 				cookies[i]->buf_len -
414 				RTE_PKTMBUF_HEADROOM +
415 				hw->vtnet_hdr_size;
416 		start_dp[idx].flags =  VRING_DESC_F_WRITE;
417 
418 		vq_update_avail_ring(vq, idx);
419 		head_idx++;
420 		i++;
421 	}
422 
423 	vq->vq_desc_head_idx += num;
424 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
425 	return 0;
426 }
427 
428 static inline int
429 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie,
430 				uint16_t num)
431 {
432 	struct vq_desc_extra *dxp;
433 	struct virtio_hw *hw = vq->hw;
434 	struct vring_desc *start_dp = vq->vq_split.ring.desc;
435 	uint16_t idx, i;
436 
437 	if (unlikely(vq->vq_free_cnt == 0))
438 		return -ENOSPC;
439 	if (unlikely(vq->vq_free_cnt < num))
440 		return -EMSGSIZE;
441 
442 	if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries))
443 		return -EFAULT;
444 
445 	for (i = 0; i < num; i++) {
446 		idx = vq->vq_desc_head_idx;
447 		dxp = &vq->vq_descx[idx];
448 		dxp->cookie = (void *)cookie[i];
449 		dxp->ndescs = 1;
450 
451 		start_dp[idx].addr =
452 			VIRTIO_MBUF_ADDR(cookie[i], vq) +
453 			RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
454 		start_dp[idx].len =
455 			cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +
456 			hw->vtnet_hdr_size;
457 		start_dp[idx].flags = VRING_DESC_F_WRITE;
458 		vq->vq_desc_head_idx = start_dp[idx].next;
459 		vq_update_avail_ring(vq, idx);
460 		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
461 			vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
462 			break;
463 		}
464 	}
465 
466 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
467 
468 	return 0;
469 }
470 
471 static inline int
472 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
473 				     struct rte_mbuf **cookie, uint16_t num)
474 {
475 	struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
476 	uint16_t flags = vq->vq_packed.cached_flags;
477 	struct virtio_hw *hw = vq->hw;
478 	struct vq_desc_extra *dxp;
479 	uint16_t idx;
480 	int i;
481 
482 	if (unlikely(vq->vq_free_cnt == 0))
483 		return -ENOSPC;
484 	if (unlikely(vq->vq_free_cnt < num))
485 		return -EMSGSIZE;
486 
487 	for (i = 0; i < num; i++) {
488 		idx = vq->vq_avail_idx;
489 		dxp = &vq->vq_descx[idx];
490 		dxp->cookie = (void *)cookie[i];
491 		dxp->ndescs = 1;
492 
493 		start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
494 				RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
495 		start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
496 					+ hw->vtnet_hdr_size;
497 
498 		vq->vq_desc_head_idx = dxp->next;
499 		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
500 			vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
501 		virtio_wmb(hw->weak_barriers);
502 		start_dp[idx].flags = flags;
503 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
504 			vq->vq_avail_idx -= vq->vq_nentries;
505 			vq->vq_packed.cached_flags ^=
506 				VRING_PACKED_DESC_F_AVAIL_USED;
507 			flags = vq->vq_packed.cached_flags;
508 		}
509 	}
510 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
511 	return 0;
512 }
513 
514 /* When doing TSO, the IP length is not included in the pseudo header
515  * checksum of the packet given to the PMD, but for virtio it is
516  * expected.
517  */
518 static void
519 virtio_tso_fix_cksum(struct rte_mbuf *m)
520 {
521 	/* common case: header is not fragmented */
522 	if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
523 			m->l4_len)) {
524 		struct rte_ipv4_hdr *iph;
525 		struct rte_ipv6_hdr *ip6h;
526 		struct rte_tcp_hdr *th;
527 		uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
528 		uint32_t tmp;
529 
530 		iph = rte_pktmbuf_mtod_offset(m,
531 					struct rte_ipv4_hdr *, m->l2_len);
532 		th = RTE_PTR_ADD(iph, m->l3_len);
533 		if ((iph->version_ihl >> 4) == 4) {
534 			iph->hdr_checksum = 0;
535 			iph->hdr_checksum = rte_ipv4_cksum(iph);
536 			ip_len = iph->total_length;
537 			ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
538 				m->l3_len);
539 		} else {
540 			ip6h = (struct rte_ipv6_hdr *)iph;
541 			ip_paylen = ip6h->payload_len;
542 		}
543 
544 		/* calculate the new phdr checksum not including ip_paylen */
545 		prev_cksum = th->cksum;
546 		tmp = prev_cksum;
547 		tmp += ip_paylen;
548 		tmp = (tmp & 0xffff) + (tmp >> 16);
549 		new_cksum = tmp;
550 
551 		/* replace it in the packet */
552 		th->cksum = new_cksum;
553 	}
554 }
555 
556 
557 /* avoid write operation when necessary, to lessen cache issues */
558 #define ASSIGN_UNLESS_EQUAL(var, val) do {	\
559 	if ((var) != (val))			\
560 		(var) = (val);			\
561 } while (0)
562 
563 #define virtqueue_clear_net_hdr(_hdr) do {		\
564 	ASSIGN_UNLESS_EQUAL((_hdr)->csum_start, 0);	\
565 	ASSIGN_UNLESS_EQUAL((_hdr)->csum_offset, 0);	\
566 	ASSIGN_UNLESS_EQUAL((_hdr)->flags, 0);		\
567 	ASSIGN_UNLESS_EQUAL((_hdr)->gso_type, 0);	\
568 	ASSIGN_UNLESS_EQUAL((_hdr)->gso_size, 0);	\
569 	ASSIGN_UNLESS_EQUAL((_hdr)->hdr_len, 0);	\
570 } while (0)
571 
572 static inline void
573 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
574 			struct rte_mbuf *cookie,
575 			bool offload)
576 {
577 	if (offload) {
578 		if (cookie->ol_flags & PKT_TX_TCP_SEG)
579 			cookie->ol_flags |= PKT_TX_TCP_CKSUM;
580 
581 		switch (cookie->ol_flags & PKT_TX_L4_MASK) {
582 		case PKT_TX_UDP_CKSUM:
583 			hdr->csum_start = cookie->l2_len + cookie->l3_len;
584 			hdr->csum_offset = offsetof(struct rte_udp_hdr,
585 				dgram_cksum);
586 			hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
587 			break;
588 
589 		case PKT_TX_TCP_CKSUM:
590 			hdr->csum_start = cookie->l2_len + cookie->l3_len;
591 			hdr->csum_offset = offsetof(struct rte_tcp_hdr, cksum);
592 			hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
593 			break;
594 
595 		default:
596 			ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
597 			ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
598 			ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
599 			break;
600 		}
601 
602 		/* TCP Segmentation Offload */
603 		if (cookie->ol_flags & PKT_TX_TCP_SEG) {
604 			hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
605 				VIRTIO_NET_HDR_GSO_TCPV6 :
606 				VIRTIO_NET_HDR_GSO_TCPV4;
607 			hdr->gso_size = cookie->tso_segsz;
608 			hdr->hdr_len =
609 				cookie->l2_len +
610 				cookie->l3_len +
611 				cookie->l4_len;
612 		} else {
613 			ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
614 			ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
615 			ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
616 		}
617 	}
618 }
619 
620 static inline void
621 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
622 			struct rte_mbuf **cookies,
623 			uint16_t num)
624 {
625 	struct vq_desc_extra *dxp;
626 	struct virtqueue *vq = txvq->vq;
627 	struct vring_desc *start_dp;
628 	struct virtio_net_hdr *hdr;
629 	uint16_t idx;
630 	uint16_t head_size = vq->hw->vtnet_hdr_size;
631 	uint16_t i = 0;
632 
633 	idx = vq->vq_desc_head_idx;
634 	start_dp = vq->vq_split.ring.desc;
635 
636 	while (i < num) {
637 		idx = idx & (vq->vq_nentries - 1);
638 		dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
639 		dxp->cookie = (void *)cookies[i];
640 		dxp->ndescs = 1;
641 		virtio_update_packet_stats(&txvq->stats, cookies[i]);
642 
643 		hdr = (struct virtio_net_hdr *)(char *)cookies[i]->buf_addr +
644 			cookies[i]->data_off - head_size;
645 
646 		/* if offload disabled, hdr is not zeroed yet, do it now */
647 		if (!vq->hw->has_tx_offload)
648 			virtqueue_clear_net_hdr(hdr);
649 		else
650 			virtqueue_xmit_offload(hdr, cookies[i], true);
651 
652 		start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
653 		start_dp[idx].len   = cookies[i]->data_len + head_size;
654 		start_dp[idx].flags = 0;
655 
656 
657 		vq_update_avail_ring(vq, idx);
658 
659 		idx++;
660 		i++;
661 	};
662 
663 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
664 	vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
665 }
666 
667 static inline void
668 virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
669 				   struct rte_mbuf *cookie,
670 				   int in_order)
671 {
672 	struct virtqueue *vq = txvq->vq;
673 	struct vring_packed_desc *dp;
674 	struct vq_desc_extra *dxp;
675 	uint16_t idx, id, flags;
676 	uint16_t head_size = vq->hw->vtnet_hdr_size;
677 	struct virtio_net_hdr *hdr;
678 
679 	id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
680 	idx = vq->vq_avail_idx;
681 	dp = &vq->vq_packed.ring.desc[idx];
682 
683 	dxp = &vq->vq_descx[id];
684 	dxp->ndescs = 1;
685 	dxp->cookie = cookie;
686 
687 	flags = vq->vq_packed.cached_flags;
688 
689 	/* prepend cannot fail, checked by caller */
690 	hdr = (struct virtio_net_hdr *)(char *)cookie->buf_addr +
691 		cookie->data_off - head_size;
692 
693 	/* if offload disabled, hdr is not zeroed yet, do it now */
694 	if (!vq->hw->has_tx_offload)
695 		virtqueue_clear_net_hdr(hdr);
696 	else
697 		virtqueue_xmit_offload(hdr, cookie, true);
698 
699 	dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
700 	dp->len  = cookie->data_len + head_size;
701 	dp->id   = id;
702 
703 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
704 		vq->vq_avail_idx -= vq->vq_nentries;
705 		vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
706 	}
707 
708 	vq->vq_free_cnt--;
709 
710 	if (!in_order) {
711 		vq->vq_desc_head_idx = dxp->next;
712 		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
713 			vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
714 	}
715 
716 	virtio_wmb(vq->hw->weak_barriers);
717 	dp->flags = flags;
718 }
719 
720 static inline void
721 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
722 			      uint16_t needed, int can_push, int in_order)
723 {
724 	struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
725 	struct vq_desc_extra *dxp;
726 	struct virtqueue *vq = txvq->vq;
727 	struct vring_packed_desc *start_dp, *head_dp;
728 	uint16_t idx, id, head_idx, head_flags;
729 	uint16_t head_size = vq->hw->vtnet_hdr_size;
730 	struct virtio_net_hdr *hdr;
731 	uint16_t prev;
732 	bool prepend_header = false;
733 
734 	id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
735 
736 	dxp = &vq->vq_descx[id];
737 	dxp->ndescs = needed;
738 	dxp->cookie = cookie;
739 
740 	head_idx = vq->vq_avail_idx;
741 	idx = head_idx;
742 	prev = head_idx;
743 	start_dp = vq->vq_packed.ring.desc;
744 
745 	head_dp = &vq->vq_packed.ring.desc[idx];
746 	head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
747 	head_flags |= vq->vq_packed.cached_flags;
748 
749 	if (can_push) {
750 		/* prepend cannot fail, checked by caller */
751 		hdr = (struct virtio_net_hdr *)(char *)cookie->buf_addr +
752 			cookie->data_off - head_size;
753 		prepend_header = true;
754 
755 		/* if offload disabled, it is not zeroed below, do it now */
756 		if (!vq->hw->has_tx_offload)
757 			virtqueue_clear_net_hdr(hdr);
758 	} else {
759 		/* setup first tx ring slot to point to header
760 		 * stored in reserved region.
761 		 */
762 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
763 			RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
764 		start_dp[idx].len   = vq->hw->vtnet_hdr_size;
765 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
766 		idx++;
767 		if (idx >= vq->vq_nentries) {
768 			idx -= vq->vq_nentries;
769 			vq->vq_packed.cached_flags ^=
770 				VRING_PACKED_DESC_F_AVAIL_USED;
771 		}
772 	}
773 
774 	virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
775 
776 	do {
777 		uint16_t flags;
778 
779 		start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
780 		start_dp[idx].len  = cookie->data_len;
781 		if (prepend_header) {
782 			start_dp[idx].len += head_size;
783 			prepend_header = false;
784 		}
785 
786 		if (likely(idx != head_idx)) {
787 			flags = cookie->next ? VRING_DESC_F_NEXT : 0;
788 			flags |= vq->vq_packed.cached_flags;
789 			start_dp[idx].flags = flags;
790 		}
791 		prev = idx;
792 		idx++;
793 		if (idx >= vq->vq_nentries) {
794 			idx -= vq->vq_nentries;
795 			vq->vq_packed.cached_flags ^=
796 				VRING_PACKED_DESC_F_AVAIL_USED;
797 		}
798 	} while ((cookie = cookie->next) != NULL);
799 
800 	start_dp[prev].id = id;
801 
802 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
803 	vq->vq_avail_idx = idx;
804 
805 	if (!in_order) {
806 		vq->vq_desc_head_idx = dxp->next;
807 		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
808 			vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
809 	}
810 
811 	virtio_wmb(vq->hw->weak_barriers);
812 	head_dp->flags = head_flags;
813 }
814 
815 static inline void
816 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
817 			uint16_t needed, int use_indirect, int can_push,
818 			int in_order)
819 {
820 	struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
821 	struct vq_desc_extra *dxp;
822 	struct virtqueue *vq = txvq->vq;
823 	struct vring_desc *start_dp;
824 	uint16_t seg_num = cookie->nb_segs;
825 	uint16_t head_idx, idx;
826 	uint16_t head_size = vq->hw->vtnet_hdr_size;
827 	bool prepend_header = false;
828 	struct virtio_net_hdr *hdr;
829 
830 	head_idx = vq->vq_desc_head_idx;
831 	idx = head_idx;
832 	if (in_order)
833 		dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
834 	else
835 		dxp = &vq->vq_descx[idx];
836 	dxp->cookie = (void *)cookie;
837 	dxp->ndescs = needed;
838 
839 	start_dp = vq->vq_split.ring.desc;
840 
841 	if (can_push) {
842 		/* prepend cannot fail, checked by caller */
843 		hdr = (struct virtio_net_hdr *)(char *)cookie->buf_addr +
844 			cookie->data_off - head_size;
845 		prepend_header = true;
846 
847 		/* if offload disabled, it is not zeroed below, do it now */
848 		if (!vq->hw->has_tx_offload)
849 			virtqueue_clear_net_hdr(hdr);
850 	} else if (use_indirect) {
851 		/* setup tx ring slot to point to indirect
852 		 * descriptor list stored in reserved region.
853 		 *
854 		 * the first slot in indirect ring is already preset
855 		 * to point to the header in reserved region
856 		 */
857 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
858 			RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
859 		start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
860 		start_dp[idx].flags = VRING_DESC_F_INDIRECT;
861 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
862 
863 		/* loop below will fill in rest of the indirect elements */
864 		start_dp = txr[idx].tx_indir;
865 		idx = 1;
866 	} else {
867 		/* setup first tx ring slot to point to header
868 		 * stored in reserved region.
869 		 */
870 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
871 			RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
872 		start_dp[idx].len   = vq->hw->vtnet_hdr_size;
873 		start_dp[idx].flags = VRING_DESC_F_NEXT;
874 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
875 
876 		idx = start_dp[idx].next;
877 	}
878 
879 	virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
880 
881 	do {
882 		start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
883 		start_dp[idx].len   = cookie->data_len;
884 		if (prepend_header) {
885 			start_dp[idx].len += head_size;
886 			prepend_header = false;
887 		}
888 		start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
889 		idx = start_dp[idx].next;
890 	} while ((cookie = cookie->next) != NULL);
891 
892 	if (use_indirect)
893 		idx = vq->vq_split.ring.desc[head_idx].next;
894 
895 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
896 
897 	vq->vq_desc_head_idx = idx;
898 	vq_update_avail_ring(vq, head_idx);
899 
900 	if (!in_order) {
901 		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
902 			vq->vq_desc_tail_idx = idx;
903 	}
904 }
905 
906 void
907 virtio_dev_cq_start(struct rte_eth_dev *dev)
908 {
909 	struct virtio_hw *hw = dev->data->dev_private;
910 
911 	if (hw->cvq && hw->cvq->vq) {
912 		rte_spinlock_init(&hw->cvq->lock);
913 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
914 	}
915 }
916 
917 int
918 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
919 			uint16_t queue_idx,
920 			uint16_t nb_desc,
921 			unsigned int socket_id __rte_unused,
922 			const struct rte_eth_rxconf *rx_conf __rte_unused,
923 			struct rte_mempool *mp)
924 {
925 	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
926 	struct virtio_hw *hw = dev->data->dev_private;
927 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
928 	struct virtnet_rx *rxvq;
929 
930 	PMD_INIT_FUNC_TRACE();
931 
932 	if (nb_desc == 0 || nb_desc > vq->vq_nentries)
933 		nb_desc = vq->vq_nentries;
934 	vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
935 
936 	rxvq = &vq->rxq;
937 	rxvq->queue_id = queue_idx;
938 	rxvq->mpool = mp;
939 	dev->data->rx_queues[queue_idx] = rxvq;
940 
941 	return 0;
942 }
943 
944 int
945 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
946 {
947 	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
948 	struct virtio_hw *hw = dev->data->dev_private;
949 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
950 	struct virtnet_rx *rxvq = &vq->rxq;
951 	struct rte_mbuf *m;
952 	uint16_t desc_idx;
953 	int error, nbufs, i;
954 
955 	PMD_INIT_FUNC_TRACE();
956 
957 	/* Allocate blank mbufs for the each rx descriptor */
958 	nbufs = 0;
959 
960 	if (hw->use_simple_rx) {
961 		for (desc_idx = 0; desc_idx < vq->vq_nentries;
962 		     desc_idx++) {
963 			vq->vq_split.ring.avail->ring[desc_idx] = desc_idx;
964 			vq->vq_split.ring.desc[desc_idx].flags =
965 				VRING_DESC_F_WRITE;
966 		}
967 
968 		virtio_rxq_vec_setup(rxvq);
969 	}
970 
971 	memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
972 	for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
973 	     desc_idx++) {
974 		vq->sw_ring[vq->vq_nentries + desc_idx] =
975 			&rxvq->fake_mbuf;
976 	}
977 
978 	if (hw->use_simple_rx) {
979 		while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
980 			virtio_rxq_rearm_vec(rxvq);
981 			nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
982 		}
983 	} else if (hw->use_inorder_rx) {
984 		if ((!virtqueue_full(vq))) {
985 			uint16_t free_cnt = vq->vq_free_cnt;
986 			struct rte_mbuf *pkts[free_cnt];
987 
988 			if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
989 				free_cnt)) {
990 				error = virtqueue_enqueue_refill_inorder(vq,
991 						pkts,
992 						free_cnt);
993 				if (unlikely(error)) {
994 					for (i = 0; i < free_cnt; i++)
995 						rte_pktmbuf_free(pkts[i]);
996 				}
997 			}
998 
999 			nbufs += free_cnt;
1000 			vq_update_avail_idx(vq);
1001 		}
1002 	} else {
1003 		while (!virtqueue_full(vq)) {
1004 			m = rte_mbuf_raw_alloc(rxvq->mpool);
1005 			if (m == NULL)
1006 				break;
1007 
1008 			/* Enqueue allocated buffers */
1009 			if (vtpci_packed_queue(vq->hw))
1010 				error = virtqueue_enqueue_recv_refill_packed(vq,
1011 						&m, 1);
1012 			else
1013 				error = virtqueue_enqueue_recv_refill(vq,
1014 						&m, 1);
1015 			if (error) {
1016 				rte_pktmbuf_free(m);
1017 				break;
1018 			}
1019 			nbufs++;
1020 		}
1021 
1022 		if (!vtpci_packed_queue(vq->hw))
1023 			vq_update_avail_idx(vq);
1024 	}
1025 
1026 	PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
1027 
1028 	VIRTQUEUE_DUMP(vq);
1029 
1030 	return 0;
1031 }
1032 
1033 /*
1034  * struct rte_eth_dev *dev: Used to update dev
1035  * uint16_t nb_desc: Defaults to values read from config space
1036  * unsigned int socket_id: Used to allocate memzone
1037  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
1038  * uint16_t queue_idx: Just used as an index in dev txq list
1039  */
1040 int
1041 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
1042 			uint16_t queue_idx,
1043 			uint16_t nb_desc,
1044 			unsigned int socket_id __rte_unused,
1045 			const struct rte_eth_txconf *tx_conf)
1046 {
1047 	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1048 	struct virtio_hw *hw = dev->data->dev_private;
1049 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1050 	struct virtnet_tx *txvq;
1051 	uint16_t tx_free_thresh;
1052 
1053 	PMD_INIT_FUNC_TRACE();
1054 
1055 	if (nb_desc == 0 || nb_desc > vq->vq_nentries)
1056 		nb_desc = vq->vq_nentries;
1057 	vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
1058 
1059 	txvq = &vq->txq;
1060 	txvq->queue_id = queue_idx;
1061 
1062 	tx_free_thresh = tx_conf->tx_free_thresh;
1063 	if (tx_free_thresh == 0)
1064 		tx_free_thresh =
1065 			RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
1066 
1067 	if (tx_free_thresh >= (vq->vq_nentries - 3)) {
1068 		RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
1069 			"number of TX entries minus 3 (%u)."
1070 			" (tx_free_thresh=%u port=%u queue=%u)\n",
1071 			vq->vq_nentries - 3,
1072 			tx_free_thresh, dev->data->port_id, queue_idx);
1073 		return -EINVAL;
1074 	}
1075 
1076 	vq->vq_free_thresh = tx_free_thresh;
1077 
1078 	dev->data->tx_queues[queue_idx] = txvq;
1079 	return 0;
1080 }
1081 
1082 int
1083 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
1084 				uint16_t queue_idx)
1085 {
1086 	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1087 	struct virtio_hw *hw = dev->data->dev_private;
1088 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1089 
1090 	PMD_INIT_FUNC_TRACE();
1091 
1092 	if (!vtpci_packed_queue(hw)) {
1093 		if (hw->use_inorder_tx)
1094 			vq->vq_split.ring.desc[vq->vq_nentries - 1].next = 0;
1095 	}
1096 
1097 	VIRTQUEUE_DUMP(vq);
1098 
1099 	return 0;
1100 }
1101 
1102 static inline void
1103 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
1104 {
1105 	int error;
1106 	/*
1107 	 * Requeue the discarded mbuf. This should always be
1108 	 * successful since it was just dequeued.
1109 	 */
1110 	if (vtpci_packed_queue(vq->hw))
1111 		error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
1112 	else
1113 		error = virtqueue_enqueue_recv_refill(vq, &m, 1);
1114 
1115 	if (unlikely(error)) {
1116 		RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1117 		rte_pktmbuf_free(m);
1118 	}
1119 }
1120 
1121 static inline void
1122 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1123 {
1124 	int error;
1125 
1126 	error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1127 	if (unlikely(error)) {
1128 		RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1129 		rte_pktmbuf_free(m);
1130 	}
1131 }
1132 
1133 /* Optionally fill offload information in structure */
1134 static inline int
1135 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1136 {
1137 	struct rte_net_hdr_lens hdr_lens;
1138 	uint32_t hdrlen, ptype;
1139 	int l4_supported = 0;
1140 
1141 	/* nothing to do */
1142 	if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1143 		return 0;
1144 
1145 	m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1146 
1147 	ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1148 	m->packet_type = ptype;
1149 	if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1150 	    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1151 	    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1152 		l4_supported = 1;
1153 
1154 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1155 		hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1156 		if (hdr->csum_start <= hdrlen && l4_supported) {
1157 			m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1158 		} else {
1159 			/* Unknown proto or tunnel, do sw cksum. We can assume
1160 			 * the cksum field is in the first segment since the
1161 			 * buffers we provided to the host are large enough.
1162 			 * In case of SCTP, this will be wrong since it's a CRC
1163 			 * but there's nothing we can do.
1164 			 */
1165 			uint16_t csum = 0, off;
1166 
1167 			rte_raw_cksum_mbuf(m, hdr->csum_start,
1168 				rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1169 				&csum);
1170 			if (likely(csum != 0xffff))
1171 				csum = ~csum;
1172 			off = hdr->csum_offset + hdr->csum_start;
1173 			if (rte_pktmbuf_data_len(m) >= off + 1)
1174 				*rte_pktmbuf_mtod_offset(m, uint16_t *,
1175 					off) = csum;
1176 		}
1177 	} else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1178 		m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1179 	}
1180 
1181 	/* GSO request, save required information in mbuf */
1182 	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1183 		/* Check unsupported modes */
1184 		if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1185 		    (hdr->gso_size == 0)) {
1186 			return -EINVAL;
1187 		}
1188 
1189 		/* Update mss lengthes in mbuf */
1190 		m->tso_segsz = hdr->gso_size;
1191 		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1192 			case VIRTIO_NET_HDR_GSO_TCPV4:
1193 			case VIRTIO_NET_HDR_GSO_TCPV6:
1194 				m->ol_flags |= PKT_RX_LRO | \
1195 					PKT_RX_L4_CKSUM_NONE;
1196 				break;
1197 			default:
1198 				return -EINVAL;
1199 		}
1200 	}
1201 
1202 	return 0;
1203 }
1204 
1205 #define VIRTIO_MBUF_BURST_SZ 64
1206 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1207 uint16_t
1208 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1209 {
1210 	struct virtnet_rx *rxvq = rx_queue;
1211 	struct virtqueue *vq = rxvq->vq;
1212 	struct virtio_hw *hw = vq->hw;
1213 	struct rte_mbuf *rxm;
1214 	uint16_t nb_used, num, nb_rx;
1215 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1216 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1217 	int error;
1218 	uint32_t i, nb_enqueued;
1219 	uint32_t hdr_size;
1220 	struct virtio_net_hdr *hdr;
1221 
1222 	nb_rx = 0;
1223 	if (unlikely(hw->started == 0))
1224 		return nb_rx;
1225 
1226 	nb_used = VIRTQUEUE_NUSED(vq);
1227 
1228 	virtio_rmb(hw->weak_barriers);
1229 
1230 	num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1231 	if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1232 		num = VIRTIO_MBUF_BURST_SZ;
1233 	if (likely(num > DESC_PER_CACHELINE))
1234 		num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1235 
1236 	num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1237 	PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1238 
1239 	nb_enqueued = 0;
1240 	hdr_size = hw->vtnet_hdr_size;
1241 
1242 	for (i = 0; i < num ; i++) {
1243 		rxm = rcv_pkts[i];
1244 
1245 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1246 
1247 		if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1248 			PMD_RX_LOG(ERR, "Packet drop");
1249 			nb_enqueued++;
1250 			virtio_discard_rxbuf(vq, rxm);
1251 			rxvq->stats.errors++;
1252 			continue;
1253 		}
1254 
1255 		rxm->port = rxvq->port_id;
1256 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1257 		rxm->ol_flags = 0;
1258 		rxm->vlan_tci = 0;
1259 
1260 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1261 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
1262 
1263 		hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1264 			RTE_PKTMBUF_HEADROOM - hdr_size);
1265 
1266 		if (hw->vlan_strip)
1267 			rte_vlan_strip(rxm);
1268 
1269 		if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1270 			virtio_discard_rxbuf(vq, rxm);
1271 			rxvq->stats.errors++;
1272 			continue;
1273 		}
1274 
1275 		virtio_rx_stats_updated(rxvq, rxm);
1276 
1277 		rx_pkts[nb_rx++] = rxm;
1278 	}
1279 
1280 	rxvq->stats.packets += nb_rx;
1281 
1282 	/* Allocate new mbuf for the used descriptor */
1283 	if (likely(!virtqueue_full(vq))) {
1284 		uint16_t free_cnt = vq->vq_free_cnt;
1285 		struct rte_mbuf *new_pkts[free_cnt];
1286 
1287 		if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1288 						free_cnt) == 0)) {
1289 			error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1290 					free_cnt);
1291 			if (unlikely(error)) {
1292 				for (i = 0; i < free_cnt; i++)
1293 					rte_pktmbuf_free(new_pkts[i]);
1294 			}
1295 			nb_enqueued += free_cnt;
1296 		} else {
1297 			struct rte_eth_dev *dev =
1298 				&rte_eth_devices[rxvq->port_id];
1299 			dev->data->rx_mbuf_alloc_failed += free_cnt;
1300 		}
1301 	}
1302 
1303 	if (likely(nb_enqueued)) {
1304 		vq_update_avail_idx(vq);
1305 
1306 		if (unlikely(virtqueue_kick_prepare(vq))) {
1307 			virtqueue_notify(vq);
1308 			PMD_RX_LOG(DEBUG, "Notified");
1309 		}
1310 	}
1311 
1312 	return nb_rx;
1313 }
1314 
1315 uint16_t
1316 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1317 			uint16_t nb_pkts)
1318 {
1319 	struct virtnet_rx *rxvq = rx_queue;
1320 	struct virtqueue *vq = rxvq->vq;
1321 	struct virtio_hw *hw = vq->hw;
1322 	struct rte_mbuf *rxm;
1323 	uint16_t num, nb_rx;
1324 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1325 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1326 	int error;
1327 	uint32_t i, nb_enqueued;
1328 	uint32_t hdr_size;
1329 	struct virtio_net_hdr *hdr;
1330 
1331 	nb_rx = 0;
1332 	if (unlikely(hw->started == 0))
1333 		return nb_rx;
1334 
1335 	num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1336 	if (likely(num > DESC_PER_CACHELINE))
1337 		num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1338 
1339 	num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1340 	PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1341 
1342 	nb_enqueued = 0;
1343 	hdr_size = hw->vtnet_hdr_size;
1344 
1345 	for (i = 0; i < num; i++) {
1346 		rxm = rcv_pkts[i];
1347 
1348 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1349 
1350 		if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1351 			PMD_RX_LOG(ERR, "Packet drop");
1352 			nb_enqueued++;
1353 			virtio_discard_rxbuf(vq, rxm);
1354 			rxvq->stats.errors++;
1355 			continue;
1356 		}
1357 
1358 		rxm->port = rxvq->port_id;
1359 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1360 		rxm->ol_flags = 0;
1361 		rxm->vlan_tci = 0;
1362 
1363 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1364 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
1365 
1366 		hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1367 			RTE_PKTMBUF_HEADROOM - hdr_size);
1368 
1369 		if (hw->vlan_strip)
1370 			rte_vlan_strip(rxm);
1371 
1372 		if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1373 			virtio_discard_rxbuf(vq, rxm);
1374 			rxvq->stats.errors++;
1375 			continue;
1376 		}
1377 
1378 		virtio_rx_stats_updated(rxvq, rxm);
1379 
1380 		rx_pkts[nb_rx++] = rxm;
1381 	}
1382 
1383 	rxvq->stats.packets += nb_rx;
1384 
1385 	/* Allocate new mbuf for the used descriptor */
1386 	if (likely(!virtqueue_full(vq))) {
1387 		uint16_t free_cnt = vq->vq_free_cnt;
1388 		struct rte_mbuf *new_pkts[free_cnt];
1389 
1390 		if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1391 						free_cnt) == 0)) {
1392 			error = virtqueue_enqueue_recv_refill_packed(vq,
1393 					new_pkts, free_cnt);
1394 			if (unlikely(error)) {
1395 				for (i = 0; i < free_cnt; i++)
1396 					rte_pktmbuf_free(new_pkts[i]);
1397 			}
1398 			nb_enqueued += free_cnt;
1399 		} else {
1400 			struct rte_eth_dev *dev =
1401 				&rte_eth_devices[rxvq->port_id];
1402 			dev->data->rx_mbuf_alloc_failed += free_cnt;
1403 		}
1404 	}
1405 
1406 	if (likely(nb_enqueued)) {
1407 		if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1408 			virtqueue_notify(vq);
1409 			PMD_RX_LOG(DEBUG, "Notified");
1410 		}
1411 	}
1412 
1413 	return nb_rx;
1414 }
1415 
1416 
1417 uint16_t
1418 virtio_recv_pkts_inorder(void *rx_queue,
1419 			struct rte_mbuf **rx_pkts,
1420 			uint16_t nb_pkts)
1421 {
1422 	struct virtnet_rx *rxvq = rx_queue;
1423 	struct virtqueue *vq = rxvq->vq;
1424 	struct virtio_hw *hw = vq->hw;
1425 	struct rte_mbuf *rxm;
1426 	struct rte_mbuf *prev = NULL;
1427 	uint16_t nb_used, num, nb_rx;
1428 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1429 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1430 	int error;
1431 	uint32_t nb_enqueued;
1432 	uint32_t seg_num;
1433 	uint32_t seg_res;
1434 	uint32_t hdr_size;
1435 	int32_t i;
1436 
1437 	nb_rx = 0;
1438 	if (unlikely(hw->started == 0))
1439 		return nb_rx;
1440 
1441 	nb_used = VIRTQUEUE_NUSED(vq);
1442 	nb_used = RTE_MIN(nb_used, nb_pkts);
1443 	nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1444 
1445 	virtio_rmb(hw->weak_barriers);
1446 
1447 	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1448 
1449 	nb_enqueued = 0;
1450 	seg_num = 1;
1451 	seg_res = 0;
1452 	hdr_size = hw->vtnet_hdr_size;
1453 
1454 	num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1455 
1456 	for (i = 0; i < num; i++) {
1457 		struct virtio_net_hdr_mrg_rxbuf *header;
1458 
1459 		PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1460 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1461 
1462 		rxm = rcv_pkts[i];
1463 
1464 		if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1465 			PMD_RX_LOG(ERR, "Packet drop");
1466 			nb_enqueued++;
1467 			virtio_discard_rxbuf_inorder(vq, rxm);
1468 			rxvq->stats.errors++;
1469 			continue;
1470 		}
1471 
1472 		header = (struct virtio_net_hdr_mrg_rxbuf *)
1473 			 ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1474 			 - hdr_size);
1475 
1476 		if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1477 			seg_num = header->num_buffers;
1478 			if (seg_num == 0)
1479 				seg_num = 1;
1480 		} else {
1481 			seg_num = 1;
1482 		}
1483 
1484 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1485 		rxm->nb_segs = seg_num;
1486 		rxm->ol_flags = 0;
1487 		rxm->vlan_tci = 0;
1488 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1489 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
1490 
1491 		rxm->port = rxvq->port_id;
1492 
1493 		rx_pkts[nb_rx] = rxm;
1494 		prev = rxm;
1495 
1496 		if (vq->hw->has_rx_offload &&
1497 				virtio_rx_offload(rxm, &header->hdr) < 0) {
1498 			virtio_discard_rxbuf_inorder(vq, rxm);
1499 			rxvq->stats.errors++;
1500 			continue;
1501 		}
1502 
1503 		if (hw->vlan_strip)
1504 			rte_vlan_strip(rx_pkts[nb_rx]);
1505 
1506 		seg_res = seg_num - 1;
1507 
1508 		/* Merge remaining segments */
1509 		while (seg_res != 0 && i < (num - 1)) {
1510 			i++;
1511 
1512 			rxm = rcv_pkts[i];
1513 			rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1514 			rxm->pkt_len = (uint32_t)(len[i]);
1515 			rxm->data_len = (uint16_t)(len[i]);
1516 
1517 			rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1518 
1519 			prev->next = rxm;
1520 			prev = rxm;
1521 			seg_res -= 1;
1522 		}
1523 
1524 		if (!seg_res) {
1525 			virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1526 			nb_rx++;
1527 		}
1528 	}
1529 
1530 	/* Last packet still need merge segments */
1531 	while (seg_res != 0) {
1532 		uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1533 					VIRTIO_MBUF_BURST_SZ);
1534 
1535 		if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1536 			virtio_rmb(hw->weak_barriers);
1537 			num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1538 							   rcv_cnt);
1539 			uint16_t extra_idx = 0;
1540 
1541 			rcv_cnt = num;
1542 			while (extra_idx < rcv_cnt) {
1543 				rxm = rcv_pkts[extra_idx];
1544 				rxm->data_off =
1545 					RTE_PKTMBUF_HEADROOM - hdr_size;
1546 				rxm->pkt_len = (uint32_t)(len[extra_idx]);
1547 				rxm->data_len = (uint16_t)(len[extra_idx]);
1548 				prev->next = rxm;
1549 				prev = rxm;
1550 				rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1551 				extra_idx += 1;
1552 			};
1553 			seg_res -= rcv_cnt;
1554 
1555 			if (!seg_res) {
1556 				virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1557 				nb_rx++;
1558 			}
1559 		} else {
1560 			PMD_RX_LOG(ERR,
1561 					"No enough segments for packet.");
1562 			rte_pktmbuf_free(rx_pkts[nb_rx]);
1563 			rxvq->stats.errors++;
1564 			break;
1565 		}
1566 	}
1567 
1568 	rxvq->stats.packets += nb_rx;
1569 
1570 	/* Allocate new mbuf for the used descriptor */
1571 
1572 	if (likely(!virtqueue_full(vq))) {
1573 		/* free_cnt may include mrg descs */
1574 		uint16_t free_cnt = vq->vq_free_cnt;
1575 		struct rte_mbuf *new_pkts[free_cnt];
1576 
1577 		if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1578 			error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1579 					free_cnt);
1580 			if (unlikely(error)) {
1581 				for (i = 0; i < free_cnt; i++)
1582 					rte_pktmbuf_free(new_pkts[i]);
1583 			}
1584 			nb_enqueued += free_cnt;
1585 		} else {
1586 			struct rte_eth_dev *dev =
1587 				&rte_eth_devices[rxvq->port_id];
1588 			dev->data->rx_mbuf_alloc_failed += free_cnt;
1589 		}
1590 	}
1591 
1592 	if (likely(nb_enqueued)) {
1593 		vq_update_avail_idx(vq);
1594 
1595 		if (unlikely(virtqueue_kick_prepare(vq))) {
1596 			virtqueue_notify(vq);
1597 			PMD_RX_LOG(DEBUG, "Notified");
1598 		}
1599 	}
1600 
1601 	return nb_rx;
1602 }
1603 
1604 uint16_t
1605 virtio_recv_mergeable_pkts(void *rx_queue,
1606 			struct rte_mbuf **rx_pkts,
1607 			uint16_t nb_pkts)
1608 {
1609 	struct virtnet_rx *rxvq = rx_queue;
1610 	struct virtqueue *vq = rxvq->vq;
1611 	struct virtio_hw *hw = vq->hw;
1612 	struct rte_mbuf *rxm;
1613 	struct rte_mbuf *prev = NULL;
1614 	uint16_t nb_used, num, nb_rx = 0;
1615 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1616 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1617 	int error;
1618 	uint32_t nb_enqueued = 0;
1619 	uint32_t seg_num = 0;
1620 	uint32_t seg_res = 0;
1621 	uint32_t hdr_size = hw->vtnet_hdr_size;
1622 	int32_t i;
1623 
1624 	if (unlikely(hw->started == 0))
1625 		return nb_rx;
1626 
1627 	nb_used = VIRTQUEUE_NUSED(vq);
1628 
1629 	virtio_rmb(hw->weak_barriers);
1630 
1631 	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1632 
1633 	num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1634 	if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1635 		num = VIRTIO_MBUF_BURST_SZ;
1636 	if (likely(num > DESC_PER_CACHELINE))
1637 		num = num - ((vq->vq_used_cons_idx + num) %
1638 				DESC_PER_CACHELINE);
1639 
1640 
1641 	num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1642 
1643 	for (i = 0; i < num; i++) {
1644 		struct virtio_net_hdr_mrg_rxbuf *header;
1645 
1646 		PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1647 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1648 
1649 		rxm = rcv_pkts[i];
1650 
1651 		if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1652 			PMD_RX_LOG(ERR, "Packet drop");
1653 			nb_enqueued++;
1654 			virtio_discard_rxbuf(vq, rxm);
1655 			rxvq->stats.errors++;
1656 			continue;
1657 		}
1658 
1659 		header = (struct virtio_net_hdr_mrg_rxbuf *)
1660 			 ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1661 			 - hdr_size);
1662 		seg_num = header->num_buffers;
1663 		if (seg_num == 0)
1664 			seg_num = 1;
1665 
1666 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1667 		rxm->nb_segs = seg_num;
1668 		rxm->ol_flags = 0;
1669 		rxm->vlan_tci = 0;
1670 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1671 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
1672 
1673 		rxm->port = rxvq->port_id;
1674 
1675 		rx_pkts[nb_rx] = rxm;
1676 		prev = rxm;
1677 
1678 		if (hw->has_rx_offload &&
1679 				virtio_rx_offload(rxm, &header->hdr) < 0) {
1680 			virtio_discard_rxbuf(vq, rxm);
1681 			rxvq->stats.errors++;
1682 			continue;
1683 		}
1684 
1685 		if (hw->vlan_strip)
1686 			rte_vlan_strip(rx_pkts[nb_rx]);
1687 
1688 		seg_res = seg_num - 1;
1689 
1690 		/* Merge remaining segments */
1691 		while (seg_res != 0 && i < (num - 1)) {
1692 			i++;
1693 
1694 			rxm = rcv_pkts[i];
1695 			rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1696 			rxm->pkt_len = (uint32_t)(len[i]);
1697 			rxm->data_len = (uint16_t)(len[i]);
1698 
1699 			rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1700 
1701 			prev->next = rxm;
1702 			prev = rxm;
1703 			seg_res -= 1;
1704 		}
1705 
1706 		if (!seg_res) {
1707 			virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1708 			nb_rx++;
1709 		}
1710 	}
1711 
1712 	/* Last packet still need merge segments */
1713 	while (seg_res != 0) {
1714 		uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1715 					VIRTIO_MBUF_BURST_SZ);
1716 
1717 		if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1718 			virtio_rmb(hw->weak_barriers);
1719 			num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
1720 							   rcv_cnt);
1721 			uint16_t extra_idx = 0;
1722 
1723 			rcv_cnt = num;
1724 			while (extra_idx < rcv_cnt) {
1725 				rxm = rcv_pkts[extra_idx];
1726 				rxm->data_off =
1727 					RTE_PKTMBUF_HEADROOM - hdr_size;
1728 				rxm->pkt_len = (uint32_t)(len[extra_idx]);
1729 				rxm->data_len = (uint16_t)(len[extra_idx]);
1730 				prev->next = rxm;
1731 				prev = rxm;
1732 				rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1733 				extra_idx += 1;
1734 			};
1735 			seg_res -= rcv_cnt;
1736 
1737 			if (!seg_res) {
1738 				virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1739 				nb_rx++;
1740 			}
1741 		} else {
1742 			PMD_RX_LOG(ERR,
1743 					"No enough segments for packet.");
1744 			rte_pktmbuf_free(rx_pkts[nb_rx]);
1745 			rxvq->stats.errors++;
1746 			break;
1747 		}
1748 	}
1749 
1750 	rxvq->stats.packets += nb_rx;
1751 
1752 	/* Allocate new mbuf for the used descriptor */
1753 	if (likely(!virtqueue_full(vq))) {
1754 		/* free_cnt may include mrg descs */
1755 		uint16_t free_cnt = vq->vq_free_cnt;
1756 		struct rte_mbuf *new_pkts[free_cnt];
1757 
1758 		if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1759 			error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1760 					free_cnt);
1761 			if (unlikely(error)) {
1762 				for (i = 0; i < free_cnt; i++)
1763 					rte_pktmbuf_free(new_pkts[i]);
1764 			}
1765 			nb_enqueued += free_cnt;
1766 		} else {
1767 			struct rte_eth_dev *dev =
1768 				&rte_eth_devices[rxvq->port_id];
1769 			dev->data->rx_mbuf_alloc_failed += free_cnt;
1770 		}
1771 	}
1772 
1773 	if (likely(nb_enqueued)) {
1774 		vq_update_avail_idx(vq);
1775 
1776 		if (unlikely(virtqueue_kick_prepare(vq))) {
1777 			virtqueue_notify(vq);
1778 			PMD_RX_LOG(DEBUG, "Notified");
1779 		}
1780 	}
1781 
1782 	return nb_rx;
1783 }
1784 
1785 uint16_t
1786 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1787 			struct rte_mbuf **rx_pkts,
1788 			uint16_t nb_pkts)
1789 {
1790 	struct virtnet_rx *rxvq = rx_queue;
1791 	struct virtqueue *vq = rxvq->vq;
1792 	struct virtio_hw *hw = vq->hw;
1793 	struct rte_mbuf *rxm;
1794 	struct rte_mbuf *prev = NULL;
1795 	uint16_t num, nb_rx = 0;
1796 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1797 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1798 	uint32_t nb_enqueued = 0;
1799 	uint32_t seg_num = 0;
1800 	uint32_t seg_res = 0;
1801 	uint32_t hdr_size = hw->vtnet_hdr_size;
1802 	int32_t i;
1803 	int error;
1804 
1805 	if (unlikely(hw->started == 0))
1806 		return nb_rx;
1807 
1808 
1809 	num = nb_pkts;
1810 	if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1811 		num = VIRTIO_MBUF_BURST_SZ;
1812 	if (likely(num > DESC_PER_CACHELINE))
1813 		num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1814 
1815 	num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1816 
1817 	for (i = 0; i < num; i++) {
1818 		struct virtio_net_hdr_mrg_rxbuf *header;
1819 
1820 		PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1821 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1822 
1823 		rxm = rcv_pkts[i];
1824 
1825 		if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1826 			PMD_RX_LOG(ERR, "Packet drop");
1827 			nb_enqueued++;
1828 			virtio_discard_rxbuf(vq, rxm);
1829 			rxvq->stats.errors++;
1830 			continue;
1831 		}
1832 
1833 		header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1834 			  rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1835 		seg_num = header->num_buffers;
1836 
1837 		if (seg_num == 0)
1838 			seg_num = 1;
1839 
1840 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1841 		rxm->nb_segs = seg_num;
1842 		rxm->ol_flags = 0;
1843 		rxm->vlan_tci = 0;
1844 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1845 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
1846 
1847 		rxm->port = rxvq->port_id;
1848 		rx_pkts[nb_rx] = rxm;
1849 		prev = rxm;
1850 
1851 		if (hw->has_rx_offload &&
1852 				virtio_rx_offload(rxm, &header->hdr) < 0) {
1853 			virtio_discard_rxbuf(vq, rxm);
1854 			rxvq->stats.errors++;
1855 			continue;
1856 		}
1857 
1858 		if (hw->vlan_strip)
1859 			rte_vlan_strip(rx_pkts[nb_rx]);
1860 
1861 		seg_res = seg_num - 1;
1862 
1863 		/* Merge remaining segments */
1864 		while (seg_res != 0 && i < (num - 1)) {
1865 			i++;
1866 
1867 			rxm = rcv_pkts[i];
1868 			rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1869 			rxm->pkt_len = (uint32_t)(len[i]);
1870 			rxm->data_len = (uint16_t)(len[i]);
1871 
1872 			rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1873 
1874 			prev->next = rxm;
1875 			prev = rxm;
1876 			seg_res -= 1;
1877 		}
1878 
1879 		if (!seg_res) {
1880 			virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1881 			nb_rx++;
1882 		}
1883 	}
1884 
1885 	/* Last packet still need merge segments */
1886 	while (seg_res != 0) {
1887 		uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1888 					VIRTIO_MBUF_BURST_SZ);
1889 		uint16_t extra_idx = 0;
1890 
1891 		rcv_cnt = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1892 				len, rcv_cnt);
1893 		if (unlikely(rcv_cnt == 0)) {
1894 			PMD_RX_LOG(ERR, "No enough segments for packet.");
1895 			rte_pktmbuf_free(rx_pkts[nb_rx]);
1896 			rxvq->stats.errors++;
1897 			break;
1898 		}
1899 
1900 		while (extra_idx < rcv_cnt) {
1901 			rxm = rcv_pkts[extra_idx];
1902 
1903 			rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1904 			rxm->pkt_len = (uint32_t)(len[extra_idx]);
1905 			rxm->data_len = (uint16_t)(len[extra_idx]);
1906 
1907 			prev->next = rxm;
1908 			prev = rxm;
1909 			rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1910 			extra_idx += 1;
1911 		}
1912 		seg_res -= rcv_cnt;
1913 		if (!seg_res) {
1914 			virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1915 			nb_rx++;
1916 		}
1917 	}
1918 
1919 	rxvq->stats.packets += nb_rx;
1920 
1921 	/* Allocate new mbuf for the used descriptor */
1922 	if (likely(!virtqueue_full(vq))) {
1923 		/* free_cnt may include mrg descs */
1924 		uint16_t free_cnt = vq->vq_free_cnt;
1925 		struct rte_mbuf *new_pkts[free_cnt];
1926 
1927 		if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1928 			error = virtqueue_enqueue_recv_refill_packed(vq,
1929 					new_pkts, free_cnt);
1930 			if (unlikely(error)) {
1931 				for (i = 0; i < free_cnt; i++)
1932 					rte_pktmbuf_free(new_pkts[i]);
1933 			}
1934 			nb_enqueued += free_cnt;
1935 		} else {
1936 			struct rte_eth_dev *dev =
1937 				&rte_eth_devices[rxvq->port_id];
1938 			dev->data->rx_mbuf_alloc_failed += free_cnt;
1939 		}
1940 	}
1941 
1942 	if (likely(nb_enqueued)) {
1943 		if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1944 			virtqueue_notify(vq);
1945 			PMD_RX_LOG(DEBUG, "Notified");
1946 		}
1947 	}
1948 
1949 	return nb_rx;
1950 }
1951 
1952 uint16_t
1953 virtio_xmit_pkts_prepare(void *tx_queue __rte_unused, struct rte_mbuf **tx_pkts,
1954 			uint16_t nb_pkts)
1955 {
1956 	uint16_t nb_tx;
1957 	int error;
1958 
1959 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1960 		struct rte_mbuf *m = tx_pkts[nb_tx];
1961 
1962 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
1963 		error = rte_validate_tx_offload(m);
1964 		if (unlikely(error)) {
1965 			rte_errno = -error;
1966 			break;
1967 		}
1968 #endif
1969 
1970 		/* Do VLAN tag insertion */
1971 		if (unlikely(m->ol_flags & PKT_TX_VLAN_PKT)) {
1972 			error = rte_vlan_insert(&m);
1973 			/* rte_vlan_insert() may change pointer
1974 			 * even in the case of failure
1975 			 */
1976 			tx_pkts[nb_tx] = m;
1977 
1978 			if (unlikely(error)) {
1979 				rte_errno = -error;
1980 				break;
1981 			}
1982 		}
1983 
1984 		error = rte_net_intel_cksum_prepare(m);
1985 		if (unlikely(error)) {
1986 			rte_errno = -error;
1987 			break;
1988 		}
1989 
1990 		if (m->ol_flags & PKT_TX_TCP_SEG)
1991 			virtio_tso_fix_cksum(m);
1992 	}
1993 
1994 	return nb_tx;
1995 }
1996 
1997 uint16_t
1998 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
1999 			uint16_t nb_pkts)
2000 {
2001 	struct virtnet_tx *txvq = tx_queue;
2002 	struct virtqueue *vq = txvq->vq;
2003 	struct virtio_hw *hw = vq->hw;
2004 	uint16_t hdr_size = hw->vtnet_hdr_size;
2005 	uint16_t nb_tx = 0;
2006 	bool in_order = hw->use_inorder_tx;
2007 
2008 	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2009 		return nb_tx;
2010 
2011 	if (unlikely(nb_pkts < 1))
2012 		return nb_pkts;
2013 
2014 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2015 
2016 	if (nb_pkts > vq->vq_free_cnt)
2017 		virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt,
2018 					   in_order);
2019 
2020 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2021 		struct rte_mbuf *txm = tx_pkts[nb_tx];
2022 		int can_push = 0, slots, need;
2023 
2024 		/* optimize ring usage */
2025 		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2026 		      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2027 		    rte_mbuf_refcnt_read(txm) == 1 &&
2028 		    RTE_MBUF_DIRECT(txm) &&
2029 		    txm->nb_segs == 1 &&
2030 		    rte_pktmbuf_headroom(txm) >= hdr_size &&
2031 		    rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2032 			   __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2033 			can_push = 1;
2034 
2035 		/* How many main ring entries are needed to this Tx?
2036 		 * any_layout => number of segments
2037 		 * default    => number of segments + 1
2038 		 */
2039 		slots = txm->nb_segs + !can_push;
2040 		need = slots - vq->vq_free_cnt;
2041 
2042 		/* Positive value indicates it need free vring descriptors */
2043 		if (unlikely(need > 0)) {
2044 			virtio_xmit_cleanup_packed(vq, need, in_order);
2045 			need = slots - vq->vq_free_cnt;
2046 			if (unlikely(need > 0)) {
2047 				PMD_TX_LOG(ERR,
2048 					   "No free tx descriptors to transmit");
2049 				break;
2050 			}
2051 		}
2052 
2053 		/* Enqueue Packet buffers */
2054 		if (can_push)
2055 			virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
2056 		else
2057 			virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
2058 						      in_order);
2059 
2060 		virtio_update_packet_stats(&txvq->stats, txm);
2061 	}
2062 
2063 	txvq->stats.packets += nb_tx;
2064 
2065 	if (likely(nb_tx)) {
2066 		if (unlikely(virtqueue_kick_prepare_packed(vq))) {
2067 			virtqueue_notify(vq);
2068 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2069 		}
2070 	}
2071 
2072 	return nb_tx;
2073 }
2074 
2075 uint16_t
2076 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2077 {
2078 	struct virtnet_tx *txvq = tx_queue;
2079 	struct virtqueue *vq = txvq->vq;
2080 	struct virtio_hw *hw = vq->hw;
2081 	uint16_t hdr_size = hw->vtnet_hdr_size;
2082 	uint16_t nb_used, nb_tx = 0;
2083 
2084 	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2085 		return nb_tx;
2086 
2087 	if (unlikely(nb_pkts < 1))
2088 		return nb_pkts;
2089 
2090 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2091 	nb_used = VIRTQUEUE_NUSED(vq);
2092 
2093 	virtio_rmb(hw->weak_barriers);
2094 	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2095 		virtio_xmit_cleanup(vq, nb_used);
2096 
2097 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2098 		struct rte_mbuf *txm = tx_pkts[nb_tx];
2099 		int can_push = 0, use_indirect = 0, slots, need;
2100 
2101 		/* optimize ring usage */
2102 		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2103 		      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2104 		    rte_mbuf_refcnt_read(txm) == 1 &&
2105 		    RTE_MBUF_DIRECT(txm) &&
2106 		    txm->nb_segs == 1 &&
2107 		    rte_pktmbuf_headroom(txm) >= hdr_size &&
2108 		    rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2109 				   __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2110 			can_push = 1;
2111 		else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2112 			 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2113 			use_indirect = 1;
2114 
2115 		/* How many main ring entries are needed to this Tx?
2116 		 * any_layout => number of segments
2117 		 * indirect   => 1
2118 		 * default    => number of segments + 1
2119 		 */
2120 		slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2121 		need = slots - vq->vq_free_cnt;
2122 
2123 		/* Positive value indicates it need free vring descriptors */
2124 		if (unlikely(need > 0)) {
2125 			nb_used = VIRTQUEUE_NUSED(vq);
2126 			virtio_rmb(hw->weak_barriers);
2127 			need = RTE_MIN(need, (int)nb_used);
2128 
2129 			virtio_xmit_cleanup(vq, need);
2130 			need = slots - vq->vq_free_cnt;
2131 			if (unlikely(need > 0)) {
2132 				PMD_TX_LOG(ERR,
2133 					   "No free tx descriptors to transmit");
2134 				break;
2135 			}
2136 		}
2137 
2138 		/* Enqueue Packet buffers */
2139 		virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2140 			can_push, 0);
2141 
2142 		virtio_update_packet_stats(&txvq->stats, txm);
2143 	}
2144 
2145 	txvq->stats.packets += nb_tx;
2146 
2147 	if (likely(nb_tx)) {
2148 		vq_update_avail_idx(vq);
2149 
2150 		if (unlikely(virtqueue_kick_prepare(vq))) {
2151 			virtqueue_notify(vq);
2152 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2153 		}
2154 	}
2155 
2156 	return nb_tx;
2157 }
2158 
2159 static __rte_always_inline int
2160 virtio_xmit_try_cleanup_inorder(struct virtqueue *vq, uint16_t need)
2161 {
2162 	uint16_t nb_used, nb_clean, nb_descs;
2163 	struct virtio_hw *hw = vq->hw;
2164 
2165 	nb_descs = vq->vq_free_cnt + need;
2166 	nb_used = VIRTQUEUE_NUSED(vq);
2167 	virtio_rmb(hw->weak_barriers);
2168 	nb_clean = RTE_MIN(need, (int)nb_used);
2169 
2170 	virtio_xmit_cleanup_inorder(vq, nb_clean);
2171 
2172 	return nb_descs - vq->vq_free_cnt;
2173 }
2174 
2175 uint16_t
2176 virtio_xmit_pkts_inorder(void *tx_queue,
2177 			struct rte_mbuf **tx_pkts,
2178 			uint16_t nb_pkts)
2179 {
2180 	struct virtnet_tx *txvq = tx_queue;
2181 	struct virtqueue *vq = txvq->vq;
2182 	struct virtio_hw *hw = vq->hw;
2183 	uint16_t hdr_size = hw->vtnet_hdr_size;
2184 	uint16_t nb_used, nb_tx = 0, nb_inorder_pkts = 0;
2185 	struct rte_mbuf *inorder_pkts[nb_pkts];
2186 	int need;
2187 
2188 	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2189 		return nb_tx;
2190 
2191 	if (unlikely(nb_pkts < 1))
2192 		return nb_pkts;
2193 
2194 	VIRTQUEUE_DUMP(vq);
2195 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2196 	nb_used = VIRTQUEUE_NUSED(vq);
2197 
2198 	virtio_rmb(hw->weak_barriers);
2199 	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2200 		virtio_xmit_cleanup_inorder(vq, nb_used);
2201 
2202 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2203 		struct rte_mbuf *txm = tx_pkts[nb_tx];
2204 		int slots;
2205 
2206 		/* optimize ring usage */
2207 		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2208 		     vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2209 		     rte_mbuf_refcnt_read(txm) == 1 &&
2210 		     RTE_MBUF_DIRECT(txm) &&
2211 		     txm->nb_segs == 1 &&
2212 		     rte_pktmbuf_headroom(txm) >= hdr_size &&
2213 		     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2214 				__alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2215 			inorder_pkts[nb_inorder_pkts] = txm;
2216 			nb_inorder_pkts++;
2217 
2218 			continue;
2219 		}
2220 
2221 		if (nb_inorder_pkts) {
2222 			need = nb_inorder_pkts - vq->vq_free_cnt;
2223 			if (unlikely(need > 0)) {
2224 				need = virtio_xmit_try_cleanup_inorder(vq,
2225 								       need);
2226 				if (unlikely(need > 0)) {
2227 					PMD_TX_LOG(ERR,
2228 						"No free tx descriptors to "
2229 						"transmit");
2230 					break;
2231 				}
2232 			}
2233 			virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2234 							nb_inorder_pkts);
2235 			nb_inorder_pkts = 0;
2236 		}
2237 
2238 		slots = txm->nb_segs + 1;
2239 		need = slots - vq->vq_free_cnt;
2240 		if (unlikely(need > 0)) {
2241 			need = virtio_xmit_try_cleanup_inorder(vq, slots);
2242 
2243 			if (unlikely(need > 0)) {
2244 				PMD_TX_LOG(ERR,
2245 					"No free tx descriptors to transmit");
2246 				break;
2247 			}
2248 		}
2249 		/* Enqueue Packet buffers */
2250 		virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2251 
2252 		virtio_update_packet_stats(&txvq->stats, txm);
2253 	}
2254 
2255 	/* Transmit all inorder packets */
2256 	if (nb_inorder_pkts) {
2257 		need = nb_inorder_pkts - vq->vq_free_cnt;
2258 		if (unlikely(need > 0)) {
2259 			need = virtio_xmit_try_cleanup_inorder(vq,
2260 								  need);
2261 			if (unlikely(need > 0)) {
2262 				PMD_TX_LOG(ERR,
2263 					"No free tx descriptors to transmit");
2264 				nb_inorder_pkts = vq->vq_free_cnt;
2265 				nb_tx -= need;
2266 			}
2267 		}
2268 
2269 		virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2270 						nb_inorder_pkts);
2271 	}
2272 
2273 	txvq->stats.packets += nb_tx;
2274 
2275 	if (likely(nb_tx)) {
2276 		vq_update_avail_idx(vq);
2277 
2278 		if (unlikely(virtqueue_kick_prepare(vq))) {
2279 			virtqueue_notify(vq);
2280 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2281 		}
2282 	}
2283 
2284 	VIRTQUEUE_DUMP(vq);
2285 
2286 	return nb_tx;
2287 }
2288