xref: /dpdk/drivers/net/virtio/virtio_rxtx.c (revision 8855839c5dbb2e81f9969a48e965633cc40cc7d4)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10 
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27 
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 
35 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
36 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
37 #else
38 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
39 #endif
40 
41 int
42 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
43 {
44 	struct virtnet_rx *rxvq = rxq;
45 	struct virtqueue *vq = rxvq->vq;
46 
47 	return VIRTQUEUE_NUSED(vq) >= offset;
48 }
49 
50 void
51 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
52 {
53 	vq->vq_free_cnt += num;
54 	vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
55 }
56 
57 void
58 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
59 {
60 	struct vring_desc *dp, *dp_tail;
61 	struct vq_desc_extra *dxp;
62 	uint16_t desc_idx_last = desc_idx;
63 
64 	dp  = &vq->vq_ring.desc[desc_idx];
65 	dxp = &vq->vq_descx[desc_idx];
66 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
67 	if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
68 		while (dp->flags & VRING_DESC_F_NEXT) {
69 			desc_idx_last = dp->next;
70 			dp = &vq->vq_ring.desc[dp->next];
71 		}
72 	}
73 	dxp->ndescs = 0;
74 
75 	/*
76 	 * We must append the existing free chain, if any, to the end of
77 	 * newly freed chain. If the virtqueue was completely used, then
78 	 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
79 	 */
80 	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
81 		vq->vq_desc_head_idx = desc_idx;
82 	} else {
83 		dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
84 		dp_tail->next = desc_idx;
85 	}
86 
87 	vq->vq_desc_tail_idx = desc_idx_last;
88 	dp->next = VQ_RING_DESC_CHAIN_END;
89 }
90 
91 static uint16_t
92 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
93 			   uint32_t *len, uint16_t num)
94 {
95 	struct vring_used_elem *uep;
96 	struct rte_mbuf *cookie;
97 	uint16_t used_idx, desc_idx;
98 	uint16_t i;
99 
100 	/*  Caller does the check */
101 	for (i = 0; i < num ; i++) {
102 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
103 		uep = &vq->vq_ring.used->ring[used_idx];
104 		desc_idx = (uint16_t) uep->id;
105 		len[i] = uep->len;
106 		cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
107 
108 		if (unlikely(cookie == NULL)) {
109 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
110 				vq->vq_used_cons_idx);
111 			break;
112 		}
113 
114 		rte_prefetch0(cookie);
115 		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
116 		rx_pkts[i]  = cookie;
117 		vq->vq_used_cons_idx++;
118 		vq_ring_free_chain(vq, desc_idx);
119 		vq->vq_descx[desc_idx].cookie = NULL;
120 	}
121 
122 	return i;
123 }
124 
125 static uint16_t
126 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
127 			struct rte_mbuf **rx_pkts,
128 			uint32_t *len,
129 			uint16_t num)
130 {
131 	struct vring_used_elem *uep;
132 	struct rte_mbuf *cookie;
133 	uint16_t used_idx = 0;
134 	uint16_t i;
135 
136 	if (unlikely(num == 0))
137 		return 0;
138 
139 	for (i = 0; i < num; i++) {
140 		used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
141 		/* Desc idx same as used idx */
142 		uep = &vq->vq_ring.used->ring[used_idx];
143 		len[i] = uep->len;
144 		cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
145 
146 		if (unlikely(cookie == NULL)) {
147 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
148 				vq->vq_used_cons_idx);
149 			break;
150 		}
151 
152 		rte_prefetch0(cookie);
153 		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
154 		rx_pkts[i]  = cookie;
155 		vq->vq_used_cons_idx++;
156 		vq->vq_descx[used_idx].cookie = NULL;
157 	}
158 
159 	vq_ring_free_inorder(vq, used_idx, i);
160 	return i;
161 }
162 
163 #ifndef DEFAULT_TX_FREE_THRESH
164 #define DEFAULT_TX_FREE_THRESH 32
165 #endif
166 
167 /* Cleanup from completed transmits. */
168 static void
169 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
170 {
171 	uint16_t i, used_idx, desc_idx;
172 	for (i = 0; i < num; i++) {
173 		struct vring_used_elem *uep;
174 		struct vq_desc_extra *dxp;
175 
176 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
177 		uep = &vq->vq_ring.used->ring[used_idx];
178 
179 		desc_idx = (uint16_t) uep->id;
180 		dxp = &vq->vq_descx[desc_idx];
181 		vq->vq_used_cons_idx++;
182 		vq_ring_free_chain(vq, desc_idx);
183 
184 		if (dxp->cookie != NULL) {
185 			rte_pktmbuf_free(dxp->cookie);
186 			dxp->cookie = NULL;
187 		}
188 	}
189 }
190 
191 /* Cleanup from completed inorder transmits. */
192 static void
193 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
194 {
195 	uint16_t i, used_idx, desc_idx = 0, last_idx;
196 	int16_t free_cnt = 0;
197 	struct vq_desc_extra *dxp = NULL;
198 
199 	if (unlikely(num == 0))
200 		return;
201 
202 	for (i = 0; i < num; i++) {
203 		struct vring_used_elem *uep;
204 
205 		used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
206 		uep = &vq->vq_ring.used->ring[used_idx];
207 		desc_idx = (uint16_t)uep->id;
208 
209 		dxp = &vq->vq_descx[desc_idx];
210 		vq->vq_used_cons_idx++;
211 
212 		if (dxp->cookie != NULL) {
213 			rte_pktmbuf_free(dxp->cookie);
214 			dxp->cookie = NULL;
215 		}
216 	}
217 
218 	last_idx = desc_idx + dxp->ndescs - 1;
219 	free_cnt = last_idx - vq->vq_desc_tail_idx;
220 	if (free_cnt <= 0)
221 		free_cnt += vq->vq_nentries;
222 
223 	vq_ring_free_inorder(vq, last_idx, free_cnt);
224 }
225 
226 static inline int
227 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
228 			struct rte_mbuf **cookies,
229 			uint16_t num)
230 {
231 	struct vq_desc_extra *dxp;
232 	struct virtio_hw *hw = vq->hw;
233 	struct vring_desc *start_dp;
234 	uint16_t head_idx, idx, i = 0;
235 
236 	if (unlikely(vq->vq_free_cnt == 0))
237 		return -ENOSPC;
238 	if (unlikely(vq->vq_free_cnt < num))
239 		return -EMSGSIZE;
240 
241 	head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
242 	start_dp = vq->vq_ring.desc;
243 
244 	while (i < num) {
245 		idx = head_idx & (vq->vq_nentries - 1);
246 		dxp = &vq->vq_descx[idx];
247 		dxp->cookie = (void *)cookies[i];
248 		dxp->ndescs = 1;
249 
250 		start_dp[idx].addr =
251 				VIRTIO_MBUF_ADDR(cookies[i], vq) +
252 				RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
253 		start_dp[idx].len =
254 				cookies[i]->buf_len -
255 				RTE_PKTMBUF_HEADROOM +
256 				hw->vtnet_hdr_size;
257 		start_dp[idx].flags =  VRING_DESC_F_WRITE;
258 
259 		vq_update_avail_ring(vq, idx);
260 		head_idx++;
261 		i++;
262 	}
263 
264 	vq->vq_desc_head_idx += num;
265 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
266 	return 0;
267 }
268 
269 static inline int
270 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
271 {
272 	struct vq_desc_extra *dxp;
273 	struct virtio_hw *hw = vq->hw;
274 	struct vring_desc *start_dp;
275 	uint16_t needed = 1;
276 	uint16_t head_idx, idx;
277 
278 	if (unlikely(vq->vq_free_cnt == 0))
279 		return -ENOSPC;
280 	if (unlikely(vq->vq_free_cnt < needed))
281 		return -EMSGSIZE;
282 
283 	head_idx = vq->vq_desc_head_idx;
284 	if (unlikely(head_idx >= vq->vq_nentries))
285 		return -EFAULT;
286 
287 	idx = head_idx;
288 	dxp = &vq->vq_descx[idx];
289 	dxp->cookie = (void *)cookie;
290 	dxp->ndescs = needed;
291 
292 	start_dp = vq->vq_ring.desc;
293 	start_dp[idx].addr =
294 		VIRTIO_MBUF_ADDR(cookie, vq) +
295 		RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
296 	start_dp[idx].len =
297 		cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
298 	start_dp[idx].flags =  VRING_DESC_F_WRITE;
299 	idx = start_dp[idx].next;
300 	vq->vq_desc_head_idx = idx;
301 	if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
302 		vq->vq_desc_tail_idx = idx;
303 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
304 	vq_update_avail_ring(vq, head_idx);
305 
306 	return 0;
307 }
308 
309 /* When doing TSO, the IP length is not included in the pseudo header
310  * checksum of the packet given to the PMD, but for virtio it is
311  * expected.
312  */
313 static void
314 virtio_tso_fix_cksum(struct rte_mbuf *m)
315 {
316 	/* common case: header is not fragmented */
317 	if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
318 			m->l4_len)) {
319 		struct ipv4_hdr *iph;
320 		struct ipv6_hdr *ip6h;
321 		struct tcp_hdr *th;
322 		uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
323 		uint32_t tmp;
324 
325 		iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
326 		th = RTE_PTR_ADD(iph, m->l3_len);
327 		if ((iph->version_ihl >> 4) == 4) {
328 			iph->hdr_checksum = 0;
329 			iph->hdr_checksum = rte_ipv4_cksum(iph);
330 			ip_len = iph->total_length;
331 			ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
332 				m->l3_len);
333 		} else {
334 			ip6h = (struct ipv6_hdr *)iph;
335 			ip_paylen = ip6h->payload_len;
336 		}
337 
338 		/* calculate the new phdr checksum not including ip_paylen */
339 		prev_cksum = th->cksum;
340 		tmp = prev_cksum;
341 		tmp += ip_paylen;
342 		tmp = (tmp & 0xffff) + (tmp >> 16);
343 		new_cksum = tmp;
344 
345 		/* replace it in the packet */
346 		th->cksum = new_cksum;
347 	}
348 }
349 
350 
351 /* avoid write operation when necessary, to lessen cache issues */
352 #define ASSIGN_UNLESS_EQUAL(var, val) do {	\
353 	if ((var) != (val))			\
354 		(var) = (val);			\
355 } while (0)
356 
357 static inline void
358 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
359 			struct rte_mbuf *cookie,
360 			bool offload)
361 {
362 	if (offload) {
363 		if (cookie->ol_flags & PKT_TX_TCP_SEG)
364 			cookie->ol_flags |= PKT_TX_TCP_CKSUM;
365 
366 		switch (cookie->ol_flags & PKT_TX_L4_MASK) {
367 		case PKT_TX_UDP_CKSUM:
368 			hdr->csum_start = cookie->l2_len + cookie->l3_len;
369 			hdr->csum_offset = offsetof(struct udp_hdr,
370 				dgram_cksum);
371 			hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
372 			break;
373 
374 		case PKT_TX_TCP_CKSUM:
375 			hdr->csum_start = cookie->l2_len + cookie->l3_len;
376 			hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
377 			hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
378 			break;
379 
380 		default:
381 			ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
382 			ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
383 			ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
384 			break;
385 		}
386 
387 		/* TCP Segmentation Offload */
388 		if (cookie->ol_flags & PKT_TX_TCP_SEG) {
389 			virtio_tso_fix_cksum(cookie);
390 			hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
391 				VIRTIO_NET_HDR_GSO_TCPV6 :
392 				VIRTIO_NET_HDR_GSO_TCPV4;
393 			hdr->gso_size = cookie->tso_segsz;
394 			hdr->hdr_len =
395 				cookie->l2_len +
396 				cookie->l3_len +
397 				cookie->l4_len;
398 		} else {
399 			ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
400 			ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
401 			ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
402 		}
403 	}
404 }
405 
406 static inline void
407 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
408 			struct rte_mbuf **cookies,
409 			uint16_t num)
410 {
411 	struct vq_desc_extra *dxp;
412 	struct virtqueue *vq = txvq->vq;
413 	struct vring_desc *start_dp;
414 	struct virtio_net_hdr *hdr;
415 	uint16_t idx;
416 	uint16_t head_size = vq->hw->vtnet_hdr_size;
417 	uint16_t i = 0;
418 
419 	idx = vq->vq_desc_head_idx;
420 	start_dp = vq->vq_ring.desc;
421 
422 	while (i < num) {
423 		idx = idx & (vq->vq_nentries - 1);
424 		dxp = &vq->vq_descx[idx];
425 		dxp->cookie = (void *)cookies[i];
426 		dxp->ndescs = 1;
427 
428 		hdr = (struct virtio_net_hdr *)
429 			rte_pktmbuf_prepend(cookies[i], head_size);
430 		cookies[i]->pkt_len -= head_size;
431 
432 		/* if offload disabled, it is not zeroed below, do it now */
433 		if (!vq->hw->has_tx_offload) {
434 			ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
435 			ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
436 			ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
437 			ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
438 			ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
439 			ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
440 		}
441 
442 		virtqueue_xmit_offload(hdr, cookies[i],
443 				vq->hw->has_tx_offload);
444 
445 		start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
446 		start_dp[idx].len   = cookies[i]->data_len;
447 		start_dp[idx].flags = 0;
448 
449 		vq_update_avail_ring(vq, idx);
450 
451 		idx++;
452 		i++;
453 	};
454 
455 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
456 	vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
457 }
458 
459 static inline void
460 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
461 			uint16_t needed, int use_indirect, int can_push,
462 			int in_order)
463 {
464 	struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
465 	struct vq_desc_extra *dxp;
466 	struct virtqueue *vq = txvq->vq;
467 	struct vring_desc *start_dp;
468 	uint16_t seg_num = cookie->nb_segs;
469 	uint16_t head_idx, idx;
470 	uint16_t head_size = vq->hw->vtnet_hdr_size;
471 	struct virtio_net_hdr *hdr;
472 
473 	head_idx = vq->vq_desc_head_idx;
474 	idx = head_idx;
475 	dxp = &vq->vq_descx[idx];
476 	dxp->cookie = (void *)cookie;
477 	dxp->ndescs = needed;
478 
479 	start_dp = vq->vq_ring.desc;
480 
481 	if (can_push) {
482 		/* prepend cannot fail, checked by caller */
483 		hdr = (struct virtio_net_hdr *)
484 			rte_pktmbuf_prepend(cookie, head_size);
485 		/* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
486 		 * which is wrong. Below subtract restores correct pkt size.
487 		 */
488 		cookie->pkt_len -= head_size;
489 
490 		/* if offload disabled, it is not zeroed below, do it now */
491 		if (!vq->hw->has_tx_offload) {
492 			ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
493 			ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
494 			ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
495 			ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
496 			ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
497 			ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
498 		}
499 	} else if (use_indirect) {
500 		/* setup tx ring slot to point to indirect
501 		 * descriptor list stored in reserved region.
502 		 *
503 		 * the first slot in indirect ring is already preset
504 		 * to point to the header in reserved region
505 		 */
506 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
507 			RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
508 		start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
509 		start_dp[idx].flags = VRING_DESC_F_INDIRECT;
510 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
511 
512 		/* loop below will fill in rest of the indirect elements */
513 		start_dp = txr[idx].tx_indir;
514 		idx = 1;
515 	} else {
516 		/* setup first tx ring slot to point to header
517 		 * stored in reserved region.
518 		 */
519 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
520 			RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
521 		start_dp[idx].len   = vq->hw->vtnet_hdr_size;
522 		start_dp[idx].flags = VRING_DESC_F_NEXT;
523 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
524 
525 		idx = start_dp[idx].next;
526 	}
527 
528 	virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
529 
530 	do {
531 		start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
532 		start_dp[idx].len   = cookie->data_len;
533 		start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
534 		idx = start_dp[idx].next;
535 	} while ((cookie = cookie->next) != NULL);
536 
537 	if (use_indirect)
538 		idx = vq->vq_ring.desc[head_idx].next;
539 
540 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
541 
542 	vq->vq_desc_head_idx = idx;
543 	vq_update_avail_ring(vq, head_idx);
544 
545 	if (!in_order) {
546 		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
547 			vq->vq_desc_tail_idx = idx;
548 	}
549 }
550 
551 void
552 virtio_dev_cq_start(struct rte_eth_dev *dev)
553 {
554 	struct virtio_hw *hw = dev->data->dev_private;
555 
556 	if (hw->cvq && hw->cvq->vq) {
557 		rte_spinlock_init(&hw->cvq->lock);
558 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
559 	}
560 }
561 
562 int
563 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
564 			uint16_t queue_idx,
565 			uint16_t nb_desc,
566 			unsigned int socket_id __rte_unused,
567 			const struct rte_eth_rxconf *rx_conf __rte_unused,
568 			struct rte_mempool *mp)
569 {
570 	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
571 	struct virtio_hw *hw = dev->data->dev_private;
572 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
573 	struct virtnet_rx *rxvq;
574 
575 	PMD_INIT_FUNC_TRACE();
576 
577 	if (nb_desc == 0 || nb_desc > vq->vq_nentries)
578 		nb_desc = vq->vq_nentries;
579 	vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
580 
581 	rxvq = &vq->rxq;
582 	rxvq->queue_id = queue_idx;
583 	rxvq->mpool = mp;
584 	if (rxvq->mpool == NULL) {
585 		rte_exit(EXIT_FAILURE,
586 			"Cannot allocate mbufs for rx virtqueue");
587 	}
588 
589 	dev->data->rx_queues[queue_idx] = rxvq;
590 
591 	return 0;
592 }
593 
594 int
595 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
596 {
597 	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
598 	struct virtio_hw *hw = dev->data->dev_private;
599 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
600 	struct virtnet_rx *rxvq = &vq->rxq;
601 	struct rte_mbuf *m;
602 	uint16_t desc_idx;
603 	int error, nbufs, i;
604 
605 	PMD_INIT_FUNC_TRACE();
606 
607 	/* Allocate blank mbufs for the each rx descriptor */
608 	nbufs = 0;
609 
610 	if (hw->use_simple_rx) {
611 		for (desc_idx = 0; desc_idx < vq->vq_nentries;
612 		     desc_idx++) {
613 			vq->vq_ring.avail->ring[desc_idx] = desc_idx;
614 			vq->vq_ring.desc[desc_idx].flags =
615 				VRING_DESC_F_WRITE;
616 		}
617 
618 		virtio_rxq_vec_setup(rxvq);
619 	}
620 
621 	memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
622 	for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
623 	     desc_idx++) {
624 		vq->sw_ring[vq->vq_nentries + desc_idx] =
625 			&rxvq->fake_mbuf;
626 	}
627 
628 	if (hw->use_simple_rx) {
629 		while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
630 			virtio_rxq_rearm_vec(rxvq);
631 			nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
632 		}
633 	} else if (hw->use_inorder_rx) {
634 		if ((!virtqueue_full(vq))) {
635 			uint16_t free_cnt = vq->vq_free_cnt;
636 			struct rte_mbuf *pkts[free_cnt];
637 
638 			if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
639 				free_cnt)) {
640 				error = virtqueue_enqueue_refill_inorder(vq,
641 						pkts,
642 						free_cnt);
643 				if (unlikely(error)) {
644 					for (i = 0; i < free_cnt; i++)
645 						rte_pktmbuf_free(pkts[i]);
646 				}
647 			}
648 
649 			nbufs += free_cnt;
650 			vq_update_avail_idx(vq);
651 		}
652 	} else {
653 		while (!virtqueue_full(vq)) {
654 			m = rte_mbuf_raw_alloc(rxvq->mpool);
655 			if (m == NULL)
656 				break;
657 
658 			/* Enqueue allocated buffers */
659 			error = virtqueue_enqueue_recv_refill(vq, m);
660 			if (error) {
661 				rte_pktmbuf_free(m);
662 				break;
663 			}
664 			nbufs++;
665 		}
666 
667 		vq_update_avail_idx(vq);
668 	}
669 
670 	PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
671 
672 	VIRTQUEUE_DUMP(vq);
673 
674 	return 0;
675 }
676 
677 /*
678  * struct rte_eth_dev *dev: Used to update dev
679  * uint16_t nb_desc: Defaults to values read from config space
680  * unsigned int socket_id: Used to allocate memzone
681  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
682  * uint16_t queue_idx: Just used as an index in dev txq list
683  */
684 int
685 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
686 			uint16_t queue_idx,
687 			uint16_t nb_desc,
688 			unsigned int socket_id __rte_unused,
689 			const struct rte_eth_txconf *tx_conf)
690 {
691 	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
692 	struct virtio_hw *hw = dev->data->dev_private;
693 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
694 	struct virtnet_tx *txvq;
695 	uint16_t tx_free_thresh;
696 
697 	PMD_INIT_FUNC_TRACE();
698 
699 	if (nb_desc == 0 || nb_desc > vq->vq_nentries)
700 		nb_desc = vq->vq_nentries;
701 	vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
702 
703 	txvq = &vq->txq;
704 	txvq->queue_id = queue_idx;
705 
706 	tx_free_thresh = tx_conf->tx_free_thresh;
707 	if (tx_free_thresh == 0)
708 		tx_free_thresh =
709 			RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
710 
711 	if (tx_free_thresh >= (vq->vq_nentries - 3)) {
712 		RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
713 			"number of TX entries minus 3 (%u)."
714 			" (tx_free_thresh=%u port=%u queue=%u)\n",
715 			vq->vq_nentries - 3,
716 			tx_free_thresh, dev->data->port_id, queue_idx);
717 		return -EINVAL;
718 	}
719 
720 	vq->vq_free_thresh = tx_free_thresh;
721 
722 	dev->data->tx_queues[queue_idx] = txvq;
723 	return 0;
724 }
725 
726 int
727 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
728 				uint16_t queue_idx)
729 {
730 	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
731 	struct virtio_hw *hw = dev->data->dev_private;
732 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
733 
734 	PMD_INIT_FUNC_TRACE();
735 
736 	if (hw->use_inorder_tx)
737 		vq->vq_ring.desc[vq->vq_nentries - 1].next = 0;
738 
739 	VIRTQUEUE_DUMP(vq);
740 
741 	return 0;
742 }
743 
744 static void
745 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
746 {
747 	int error;
748 	/*
749 	 * Requeue the discarded mbuf. This should always be
750 	 * successful since it was just dequeued.
751 	 */
752 	error = virtqueue_enqueue_recv_refill(vq, m);
753 
754 	if (unlikely(error)) {
755 		RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
756 		rte_pktmbuf_free(m);
757 	}
758 }
759 
760 static void
761 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
762 {
763 	int error;
764 
765 	error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
766 	if (unlikely(error)) {
767 		RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
768 		rte_pktmbuf_free(m);
769 	}
770 }
771 
772 static void
773 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
774 {
775 	uint32_t s = mbuf->pkt_len;
776 	struct ether_addr *ea;
777 
778 	if (s == 64) {
779 		stats->size_bins[1]++;
780 	} else if (s > 64 && s < 1024) {
781 		uint32_t bin;
782 
783 		/* count zeros, and offset into correct bin */
784 		bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
785 		stats->size_bins[bin]++;
786 	} else {
787 		if (s < 64)
788 			stats->size_bins[0]++;
789 		else if (s < 1519)
790 			stats->size_bins[6]++;
791 		else if (s >= 1519)
792 			stats->size_bins[7]++;
793 	}
794 
795 	ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
796 	if (is_multicast_ether_addr(ea)) {
797 		if (is_broadcast_ether_addr(ea))
798 			stats->broadcast++;
799 		else
800 			stats->multicast++;
801 	}
802 }
803 
804 static inline void
805 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
806 {
807 	VIRTIO_DUMP_PACKET(m, m->data_len);
808 
809 	rxvq->stats.bytes += m->pkt_len;
810 	virtio_update_packet_stats(&rxvq->stats, m);
811 }
812 
813 /* Optionally fill offload information in structure */
814 static int
815 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
816 {
817 	struct rte_net_hdr_lens hdr_lens;
818 	uint32_t hdrlen, ptype;
819 	int l4_supported = 0;
820 
821 	/* nothing to do */
822 	if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
823 		return 0;
824 
825 	m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
826 
827 	ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
828 	m->packet_type = ptype;
829 	if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
830 	    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
831 	    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
832 		l4_supported = 1;
833 
834 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
835 		hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
836 		if (hdr->csum_start <= hdrlen && l4_supported) {
837 			m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
838 		} else {
839 			/* Unknown proto or tunnel, do sw cksum. We can assume
840 			 * the cksum field is in the first segment since the
841 			 * buffers we provided to the host are large enough.
842 			 * In case of SCTP, this will be wrong since it's a CRC
843 			 * but there's nothing we can do.
844 			 */
845 			uint16_t csum = 0, off;
846 
847 			rte_raw_cksum_mbuf(m, hdr->csum_start,
848 				rte_pktmbuf_pkt_len(m) - hdr->csum_start,
849 				&csum);
850 			if (likely(csum != 0xffff))
851 				csum = ~csum;
852 			off = hdr->csum_offset + hdr->csum_start;
853 			if (rte_pktmbuf_data_len(m) >= off + 1)
854 				*rte_pktmbuf_mtod_offset(m, uint16_t *,
855 					off) = csum;
856 		}
857 	} else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
858 		m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
859 	}
860 
861 	/* GSO request, save required information in mbuf */
862 	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
863 		/* Check unsupported modes */
864 		if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
865 		    (hdr->gso_size == 0)) {
866 			return -EINVAL;
867 		}
868 
869 		/* Update mss lengthes in mbuf */
870 		m->tso_segsz = hdr->gso_size;
871 		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
872 			case VIRTIO_NET_HDR_GSO_TCPV4:
873 			case VIRTIO_NET_HDR_GSO_TCPV6:
874 				m->ol_flags |= PKT_RX_LRO | \
875 					PKT_RX_L4_CKSUM_NONE;
876 				break;
877 			default:
878 				return -EINVAL;
879 		}
880 	}
881 
882 	return 0;
883 }
884 
885 #define VIRTIO_MBUF_BURST_SZ 64
886 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
887 uint16_t
888 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
889 {
890 	struct virtnet_rx *rxvq = rx_queue;
891 	struct virtqueue *vq = rxvq->vq;
892 	struct virtio_hw *hw = vq->hw;
893 	struct rte_mbuf *rxm, *new_mbuf;
894 	uint16_t nb_used, num, nb_rx;
895 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
896 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
897 	int error;
898 	uint32_t i, nb_enqueued;
899 	uint32_t hdr_size;
900 	struct virtio_net_hdr *hdr;
901 
902 	nb_rx = 0;
903 	if (unlikely(hw->started == 0))
904 		return nb_rx;
905 
906 	nb_used = VIRTQUEUE_NUSED(vq);
907 
908 	virtio_rmb();
909 
910 	num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
911 	if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
912 		num = VIRTIO_MBUF_BURST_SZ;
913 	if (likely(num > DESC_PER_CACHELINE))
914 		num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
915 
916 	num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
917 	PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
918 
919 	nb_enqueued = 0;
920 	hdr_size = hw->vtnet_hdr_size;
921 
922 	for (i = 0; i < num ; i++) {
923 		rxm = rcv_pkts[i];
924 
925 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
926 
927 		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
928 			PMD_RX_LOG(ERR, "Packet drop");
929 			nb_enqueued++;
930 			virtio_discard_rxbuf(vq, rxm);
931 			rxvq->stats.errors++;
932 			continue;
933 		}
934 
935 		rxm->port = rxvq->port_id;
936 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
937 		rxm->ol_flags = 0;
938 		rxm->vlan_tci = 0;
939 
940 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
941 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
942 
943 		hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
944 			RTE_PKTMBUF_HEADROOM - hdr_size);
945 
946 		if (hw->vlan_strip)
947 			rte_vlan_strip(rxm);
948 
949 		if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
950 			virtio_discard_rxbuf(vq, rxm);
951 			rxvq->stats.errors++;
952 			continue;
953 		}
954 
955 		virtio_rx_stats_updated(rxvq, rxm);
956 
957 		rx_pkts[nb_rx++] = rxm;
958 	}
959 
960 	rxvq->stats.packets += nb_rx;
961 
962 	/* Allocate new mbuf for the used descriptor */
963 	while (likely(!virtqueue_full(vq))) {
964 		new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
965 		if (unlikely(new_mbuf == NULL)) {
966 			struct rte_eth_dev *dev
967 				= &rte_eth_devices[rxvq->port_id];
968 			dev->data->rx_mbuf_alloc_failed++;
969 			break;
970 		}
971 		error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
972 		if (unlikely(error)) {
973 			rte_pktmbuf_free(new_mbuf);
974 			break;
975 		}
976 		nb_enqueued++;
977 	}
978 
979 	if (likely(nb_enqueued)) {
980 		vq_update_avail_idx(vq);
981 
982 		if (unlikely(virtqueue_kick_prepare(vq))) {
983 			virtqueue_notify(vq);
984 			PMD_RX_LOG(DEBUG, "Notified");
985 		}
986 	}
987 
988 	return nb_rx;
989 }
990 
991 uint16_t
992 virtio_recv_mergeable_pkts_inorder(void *rx_queue,
993 			struct rte_mbuf **rx_pkts,
994 			uint16_t nb_pkts)
995 {
996 	struct virtnet_rx *rxvq = rx_queue;
997 	struct virtqueue *vq = rxvq->vq;
998 	struct virtio_hw *hw = vq->hw;
999 	struct rte_mbuf *rxm;
1000 	struct rte_mbuf *prev;
1001 	uint16_t nb_used, num, nb_rx;
1002 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1003 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1004 	int error;
1005 	uint32_t nb_enqueued;
1006 	uint32_t seg_num;
1007 	uint32_t seg_res;
1008 	uint32_t hdr_size;
1009 	int32_t i;
1010 
1011 	nb_rx = 0;
1012 	if (unlikely(hw->started == 0))
1013 		return nb_rx;
1014 
1015 	nb_used = VIRTQUEUE_NUSED(vq);
1016 	nb_used = RTE_MIN(nb_used, nb_pkts);
1017 	nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1018 
1019 	virtio_rmb();
1020 
1021 	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1022 
1023 	nb_enqueued = 0;
1024 	seg_num = 1;
1025 	seg_res = 0;
1026 	hdr_size = hw->vtnet_hdr_size;
1027 
1028 	num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1029 
1030 	for (i = 0; i < num; i++) {
1031 		struct virtio_net_hdr_mrg_rxbuf *header;
1032 
1033 		PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1034 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1035 
1036 		rxm = rcv_pkts[i];
1037 
1038 		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1039 			PMD_RX_LOG(ERR, "Packet drop");
1040 			nb_enqueued++;
1041 			virtio_discard_rxbuf_inorder(vq, rxm);
1042 			rxvq->stats.errors++;
1043 			continue;
1044 		}
1045 
1046 		header = (struct virtio_net_hdr_mrg_rxbuf *)
1047 			 ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1048 			 - hdr_size);
1049 		seg_num = header->num_buffers;
1050 
1051 		if (seg_num == 0)
1052 			seg_num = 1;
1053 
1054 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1055 		rxm->nb_segs = seg_num;
1056 		rxm->ol_flags = 0;
1057 		rxm->vlan_tci = 0;
1058 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1059 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
1060 
1061 		rxm->port = rxvq->port_id;
1062 
1063 		rx_pkts[nb_rx] = rxm;
1064 		prev = rxm;
1065 
1066 		if (vq->hw->has_rx_offload &&
1067 				virtio_rx_offload(rxm, &header->hdr) < 0) {
1068 			virtio_discard_rxbuf_inorder(vq, rxm);
1069 			rxvq->stats.errors++;
1070 			continue;
1071 		}
1072 
1073 		if (hw->vlan_strip)
1074 			rte_vlan_strip(rx_pkts[nb_rx]);
1075 
1076 		seg_res = seg_num - 1;
1077 
1078 		/* Merge remaining segments */
1079 		while (seg_res != 0 && i < (num - 1)) {
1080 			i++;
1081 
1082 			rxm = rcv_pkts[i];
1083 			rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1084 			rxm->pkt_len = (uint32_t)(len[i]);
1085 			rxm->data_len = (uint16_t)(len[i]);
1086 
1087 			rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1088 			rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1089 
1090 			if (prev)
1091 				prev->next = rxm;
1092 
1093 			prev = rxm;
1094 			seg_res -= 1;
1095 		}
1096 
1097 		if (!seg_res) {
1098 			virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1099 			nb_rx++;
1100 		}
1101 	}
1102 
1103 	/* Last packet still need merge segments */
1104 	while (seg_res != 0) {
1105 		uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1106 					VIRTIO_MBUF_BURST_SZ);
1107 
1108 		prev = rcv_pkts[nb_rx];
1109 		if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1110 			num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1111 							   rcv_cnt);
1112 			uint16_t extra_idx = 0;
1113 
1114 			rcv_cnt = num;
1115 			while (extra_idx < rcv_cnt) {
1116 				rxm = rcv_pkts[extra_idx];
1117 				rxm->data_off =
1118 					RTE_PKTMBUF_HEADROOM - hdr_size;
1119 				rxm->pkt_len = (uint32_t)(len[extra_idx]);
1120 				rxm->data_len = (uint16_t)(len[extra_idx]);
1121 				prev->next = rxm;
1122 				prev = rxm;
1123 				rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1124 				rx_pkts[nb_rx]->data_len += len[extra_idx];
1125 				extra_idx += 1;
1126 			};
1127 			seg_res -= rcv_cnt;
1128 
1129 			if (!seg_res) {
1130 				virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1131 				nb_rx++;
1132 			}
1133 		} else {
1134 			PMD_RX_LOG(ERR,
1135 					"No enough segments for packet.");
1136 			virtio_discard_rxbuf_inorder(vq, prev);
1137 			rxvq->stats.errors++;
1138 			break;
1139 		}
1140 	}
1141 
1142 	rxvq->stats.packets += nb_rx;
1143 
1144 	/* Allocate new mbuf for the used descriptor */
1145 
1146 	if (likely(!virtqueue_full(vq))) {
1147 		/* free_cnt may include mrg descs */
1148 		uint16_t free_cnt = vq->vq_free_cnt;
1149 		struct rte_mbuf *new_pkts[free_cnt];
1150 
1151 		if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1152 			error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1153 					free_cnt);
1154 			if (unlikely(error)) {
1155 				for (i = 0; i < free_cnt; i++)
1156 					rte_pktmbuf_free(new_pkts[i]);
1157 			}
1158 			nb_enqueued += free_cnt;
1159 		} else {
1160 			struct rte_eth_dev *dev =
1161 				&rte_eth_devices[rxvq->port_id];
1162 			dev->data->rx_mbuf_alloc_failed += free_cnt;
1163 		}
1164 	}
1165 
1166 	if (likely(nb_enqueued)) {
1167 		vq_update_avail_idx(vq);
1168 
1169 		if (unlikely(virtqueue_kick_prepare(vq))) {
1170 			virtqueue_notify(vq);
1171 			PMD_RX_LOG(DEBUG, "Notified");
1172 		}
1173 	}
1174 
1175 	return nb_rx;
1176 }
1177 
1178 uint16_t
1179 virtio_recv_mergeable_pkts(void *rx_queue,
1180 			struct rte_mbuf **rx_pkts,
1181 			uint16_t nb_pkts)
1182 {
1183 	struct virtnet_rx *rxvq = rx_queue;
1184 	struct virtqueue *vq = rxvq->vq;
1185 	struct virtio_hw *hw = vq->hw;
1186 	struct rte_mbuf *rxm, *new_mbuf;
1187 	uint16_t nb_used, num, nb_rx;
1188 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1189 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1190 	struct rte_mbuf *prev;
1191 	int error;
1192 	uint32_t i, nb_enqueued;
1193 	uint32_t seg_num;
1194 	uint16_t extra_idx;
1195 	uint32_t seg_res;
1196 	uint32_t hdr_size;
1197 
1198 	nb_rx = 0;
1199 	if (unlikely(hw->started == 0))
1200 		return nb_rx;
1201 
1202 	nb_used = VIRTQUEUE_NUSED(vq);
1203 
1204 	virtio_rmb();
1205 
1206 	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1207 
1208 	i = 0;
1209 	nb_enqueued = 0;
1210 	seg_num = 0;
1211 	extra_idx = 0;
1212 	seg_res = 0;
1213 	hdr_size = hw->vtnet_hdr_size;
1214 
1215 	while (i < nb_used) {
1216 		struct virtio_net_hdr_mrg_rxbuf *header;
1217 
1218 		if (nb_rx == nb_pkts)
1219 			break;
1220 
1221 		num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1);
1222 		if (num != 1)
1223 			continue;
1224 
1225 		i++;
1226 
1227 		PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1228 		PMD_RX_LOG(DEBUG, "packet len:%d", len[0]);
1229 
1230 		rxm = rcv_pkts[0];
1231 
1232 		if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
1233 			PMD_RX_LOG(ERR, "Packet drop");
1234 			nb_enqueued++;
1235 			virtio_discard_rxbuf(vq, rxm);
1236 			rxvq->stats.errors++;
1237 			continue;
1238 		}
1239 
1240 		header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
1241 			RTE_PKTMBUF_HEADROOM - hdr_size);
1242 		seg_num = header->num_buffers;
1243 
1244 		if (seg_num == 0)
1245 			seg_num = 1;
1246 
1247 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1248 		rxm->nb_segs = seg_num;
1249 		rxm->ol_flags = 0;
1250 		rxm->vlan_tci = 0;
1251 		rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
1252 		rxm->data_len = (uint16_t)(len[0] - hdr_size);
1253 
1254 		rxm->port = rxvq->port_id;
1255 		rx_pkts[nb_rx] = rxm;
1256 		prev = rxm;
1257 
1258 		if (hw->has_rx_offload &&
1259 				virtio_rx_offload(rxm, &header->hdr) < 0) {
1260 			virtio_discard_rxbuf(vq, rxm);
1261 			rxvq->stats.errors++;
1262 			continue;
1263 		}
1264 
1265 		seg_res = seg_num - 1;
1266 
1267 		while (seg_res != 0) {
1268 			/*
1269 			 * Get extra segments for current uncompleted packet.
1270 			 */
1271 			uint16_t  rcv_cnt =
1272 				RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
1273 			if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1274 				uint32_t rx_num =
1275 					virtqueue_dequeue_burst_rx(vq,
1276 					rcv_pkts, len, rcv_cnt);
1277 				i += rx_num;
1278 				rcv_cnt = rx_num;
1279 			} else {
1280 				PMD_RX_LOG(ERR,
1281 					   "No enough segments for packet.");
1282 				nb_enqueued++;
1283 				virtio_discard_rxbuf(vq, rxm);
1284 				rxvq->stats.errors++;
1285 				break;
1286 			}
1287 
1288 			extra_idx = 0;
1289 
1290 			while (extra_idx < rcv_cnt) {
1291 				rxm = rcv_pkts[extra_idx];
1292 
1293 				rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1294 				rxm->pkt_len = (uint32_t)(len[extra_idx]);
1295 				rxm->data_len = (uint16_t)(len[extra_idx]);
1296 
1297 				if (prev)
1298 					prev->next = rxm;
1299 
1300 				prev = rxm;
1301 				rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
1302 				extra_idx++;
1303 			};
1304 			seg_res -= rcv_cnt;
1305 		}
1306 
1307 		if (hw->vlan_strip)
1308 			rte_vlan_strip(rx_pkts[nb_rx]);
1309 
1310 		VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
1311 			rx_pkts[nb_rx]->data_len);
1312 
1313 		rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len;
1314 		virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]);
1315 		nb_rx++;
1316 	}
1317 
1318 	rxvq->stats.packets += nb_rx;
1319 
1320 	/* Allocate new mbuf for the used descriptor */
1321 	while (likely(!virtqueue_full(vq))) {
1322 		new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1323 		if (unlikely(new_mbuf == NULL)) {
1324 			struct rte_eth_dev *dev
1325 				= &rte_eth_devices[rxvq->port_id];
1326 			dev->data->rx_mbuf_alloc_failed++;
1327 			break;
1328 		}
1329 		error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
1330 		if (unlikely(error)) {
1331 			rte_pktmbuf_free(new_mbuf);
1332 			break;
1333 		}
1334 		nb_enqueued++;
1335 	}
1336 
1337 	if (likely(nb_enqueued)) {
1338 		vq_update_avail_idx(vq);
1339 
1340 		if (unlikely(virtqueue_kick_prepare(vq))) {
1341 			virtqueue_notify(vq);
1342 			PMD_RX_LOG(DEBUG, "Notified");
1343 		}
1344 	}
1345 
1346 	return nb_rx;
1347 }
1348 
1349 uint16_t
1350 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1351 {
1352 	struct virtnet_tx *txvq = tx_queue;
1353 	struct virtqueue *vq = txvq->vq;
1354 	struct virtio_hw *hw = vq->hw;
1355 	uint16_t hdr_size = hw->vtnet_hdr_size;
1356 	uint16_t nb_used, nb_tx = 0;
1357 	int error;
1358 
1359 	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1360 		return nb_tx;
1361 
1362 	if (unlikely(nb_pkts < 1))
1363 		return nb_pkts;
1364 
1365 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1366 	nb_used = VIRTQUEUE_NUSED(vq);
1367 
1368 	virtio_rmb();
1369 	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1370 		virtio_xmit_cleanup(vq, nb_used);
1371 
1372 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1373 		struct rte_mbuf *txm = tx_pkts[nb_tx];
1374 		int can_push = 0, use_indirect = 0, slots, need;
1375 
1376 		/* Do VLAN tag insertion */
1377 		if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1378 			error = rte_vlan_insert(&txm);
1379 			if (unlikely(error)) {
1380 				rte_pktmbuf_free(txm);
1381 				continue;
1382 			}
1383 		}
1384 
1385 		/* optimize ring usage */
1386 		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1387 		      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1388 		    rte_mbuf_refcnt_read(txm) == 1 &&
1389 		    RTE_MBUF_DIRECT(txm) &&
1390 		    txm->nb_segs == 1 &&
1391 		    rte_pktmbuf_headroom(txm) >= hdr_size &&
1392 		    rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1393 				   __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1394 			can_push = 1;
1395 		else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
1396 			 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
1397 			use_indirect = 1;
1398 
1399 		/* How many main ring entries are needed to this Tx?
1400 		 * any_layout => number of segments
1401 		 * indirect   => 1
1402 		 * default    => number of segments + 1
1403 		 */
1404 		slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
1405 		need = slots - vq->vq_free_cnt;
1406 
1407 		/* Positive value indicates it need free vring descriptors */
1408 		if (unlikely(need > 0)) {
1409 			nb_used = VIRTQUEUE_NUSED(vq);
1410 			virtio_rmb();
1411 			need = RTE_MIN(need, (int)nb_used);
1412 
1413 			virtio_xmit_cleanup(vq, need);
1414 			need = slots - vq->vq_free_cnt;
1415 			if (unlikely(need > 0)) {
1416 				PMD_TX_LOG(ERR,
1417 					   "No free tx descriptors to transmit");
1418 				break;
1419 			}
1420 		}
1421 
1422 		/* Enqueue Packet buffers */
1423 		virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
1424 			can_push, 0);
1425 
1426 		txvq->stats.bytes += txm->pkt_len;
1427 		virtio_update_packet_stats(&txvq->stats, txm);
1428 	}
1429 
1430 	txvq->stats.packets += nb_tx;
1431 
1432 	if (likely(nb_tx)) {
1433 		vq_update_avail_idx(vq);
1434 
1435 		if (unlikely(virtqueue_kick_prepare(vq))) {
1436 			virtqueue_notify(vq);
1437 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1438 		}
1439 	}
1440 
1441 	return nb_tx;
1442 }
1443 
1444 uint16_t
1445 virtio_xmit_pkts_inorder(void *tx_queue,
1446 			struct rte_mbuf **tx_pkts,
1447 			uint16_t nb_pkts)
1448 {
1449 	struct virtnet_tx *txvq = tx_queue;
1450 	struct virtqueue *vq = txvq->vq;
1451 	struct virtio_hw *hw = vq->hw;
1452 	uint16_t hdr_size = hw->vtnet_hdr_size;
1453 	uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
1454 	struct rte_mbuf *inorder_pkts[nb_pkts];
1455 	int error;
1456 
1457 	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1458 		return nb_tx;
1459 
1460 	if (unlikely(nb_pkts < 1))
1461 		return nb_pkts;
1462 
1463 	VIRTQUEUE_DUMP(vq);
1464 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1465 	nb_used = VIRTQUEUE_NUSED(vq);
1466 
1467 	virtio_rmb();
1468 	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1469 		virtio_xmit_cleanup_inorder(vq, nb_used);
1470 
1471 	if (unlikely(!vq->vq_free_cnt))
1472 		virtio_xmit_cleanup_inorder(vq, nb_used);
1473 
1474 	nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
1475 
1476 	for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
1477 		struct rte_mbuf *txm = tx_pkts[nb_tx];
1478 		int slots, need;
1479 
1480 		/* Do VLAN tag insertion */
1481 		if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1482 			error = rte_vlan_insert(&txm);
1483 			if (unlikely(error)) {
1484 				rte_pktmbuf_free(txm);
1485 				continue;
1486 			}
1487 		}
1488 
1489 		/* optimize ring usage */
1490 		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1491 		     vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1492 		     rte_mbuf_refcnt_read(txm) == 1 &&
1493 		     RTE_MBUF_DIRECT(txm) &&
1494 		     txm->nb_segs == 1 &&
1495 		     rte_pktmbuf_headroom(txm) >= hdr_size &&
1496 		     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1497 				__alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
1498 			inorder_pkts[nb_inorder_pkts] = txm;
1499 			nb_inorder_pkts++;
1500 
1501 			txvq->stats.bytes += txm->pkt_len;
1502 			virtio_update_packet_stats(&txvq->stats, txm);
1503 			continue;
1504 		}
1505 
1506 		if (nb_inorder_pkts) {
1507 			virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
1508 							nb_inorder_pkts);
1509 			nb_inorder_pkts = 0;
1510 		}
1511 
1512 		slots = txm->nb_segs + 1;
1513 		need = slots - vq->vq_free_cnt;
1514 		if (unlikely(need > 0)) {
1515 			nb_used = VIRTQUEUE_NUSED(vq);
1516 			virtio_rmb();
1517 			need = RTE_MIN(need, (int)nb_used);
1518 
1519 			virtio_xmit_cleanup_inorder(vq, need);
1520 
1521 			need = slots - vq->vq_free_cnt;
1522 
1523 			if (unlikely(need > 0)) {
1524 				PMD_TX_LOG(ERR,
1525 					"No free tx descriptors to transmit");
1526 				break;
1527 			}
1528 		}
1529 		/* Enqueue Packet buffers */
1530 		virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
1531 
1532 		txvq->stats.bytes += txm->pkt_len;
1533 		virtio_update_packet_stats(&txvq->stats, txm);
1534 	}
1535 
1536 	/* Transmit all inorder packets */
1537 	if (nb_inorder_pkts)
1538 		virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
1539 						nb_inorder_pkts);
1540 
1541 	txvq->stats.packets += nb_tx;
1542 
1543 	if (likely(nb_tx)) {
1544 		vq_update_avail_idx(vq);
1545 
1546 		if (unlikely(virtqueue_kick_prepare(vq))) {
1547 			virtqueue_notify(vq);
1548 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1549 		}
1550 	}
1551 
1552 	VIRTQUEUE_DUMP(vq);
1553 
1554 	return nb_tx;
1555 }
1556