xref: /dpdk/drivers/net/virtio/virtio_rxtx.c (revision 57f818963d809df7b2800b60ac98fc6f68a4e414)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10 
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27 
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 
35 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
36 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
37 #else
38 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
39 #endif
40 
41 int
42 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
43 {
44 	struct virtnet_rx *rxvq = rxq;
45 	struct virtqueue *vq = rxvq->vq;
46 
47 	return VIRTQUEUE_NUSED(vq) >= offset;
48 }
49 
50 void
51 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
52 {
53 	vq->vq_free_cnt += num;
54 	vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
55 }
56 
57 void
58 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
59 {
60 	struct vring_desc *dp, *dp_tail;
61 	struct vq_desc_extra *dxp;
62 	uint16_t desc_idx_last = desc_idx;
63 
64 	dp  = &vq->vq_ring.desc[desc_idx];
65 	dxp = &vq->vq_descx[desc_idx];
66 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
67 	if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
68 		while (dp->flags & VRING_DESC_F_NEXT) {
69 			desc_idx_last = dp->next;
70 			dp = &vq->vq_ring.desc[dp->next];
71 		}
72 	}
73 	dxp->ndescs = 0;
74 
75 	/*
76 	 * We must append the existing free chain, if any, to the end of
77 	 * newly freed chain. If the virtqueue was completely used, then
78 	 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
79 	 */
80 	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
81 		vq->vq_desc_head_idx = desc_idx;
82 	} else {
83 		dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
84 		dp_tail->next = desc_idx;
85 	}
86 
87 	vq->vq_desc_tail_idx = desc_idx_last;
88 	dp->next = VQ_RING_DESC_CHAIN_END;
89 }
90 
91 static uint16_t
92 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
93 			   uint32_t *len, uint16_t num)
94 {
95 	struct vring_used_elem *uep;
96 	struct rte_mbuf *cookie;
97 	uint16_t used_idx, desc_idx;
98 	uint16_t i;
99 
100 	/*  Caller does the check */
101 	for (i = 0; i < num ; i++) {
102 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
103 		uep = &vq->vq_ring.used->ring[used_idx];
104 		desc_idx = (uint16_t) uep->id;
105 		len[i] = uep->len;
106 		cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
107 
108 		if (unlikely(cookie == NULL)) {
109 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
110 				vq->vq_used_cons_idx);
111 			break;
112 		}
113 
114 		rte_prefetch0(cookie);
115 		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
116 		rx_pkts[i]  = cookie;
117 		vq->vq_used_cons_idx++;
118 		vq_ring_free_chain(vq, desc_idx);
119 		vq->vq_descx[desc_idx].cookie = NULL;
120 	}
121 
122 	return i;
123 }
124 
125 static uint16_t
126 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
127 			struct rte_mbuf **rx_pkts,
128 			uint32_t *len,
129 			uint16_t num)
130 {
131 	struct vring_used_elem *uep;
132 	struct rte_mbuf *cookie;
133 	uint16_t used_idx = 0;
134 	uint16_t i;
135 
136 	if (unlikely(num == 0))
137 		return 0;
138 
139 	for (i = 0; i < num; i++) {
140 		used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
141 		/* Desc idx same as used idx */
142 		uep = &vq->vq_ring.used->ring[used_idx];
143 		len[i] = uep->len;
144 		cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
145 
146 		if (unlikely(cookie == NULL)) {
147 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
148 				vq->vq_used_cons_idx);
149 			break;
150 		}
151 
152 		rte_prefetch0(cookie);
153 		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
154 		rx_pkts[i]  = cookie;
155 		vq->vq_used_cons_idx++;
156 		vq->vq_descx[used_idx].cookie = NULL;
157 	}
158 
159 	vq_ring_free_inorder(vq, used_idx, i);
160 	return i;
161 }
162 
163 #ifndef DEFAULT_TX_FREE_THRESH
164 #define DEFAULT_TX_FREE_THRESH 32
165 #endif
166 
167 /* Cleanup from completed transmits. */
168 static void
169 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
170 {
171 	uint16_t i, used_idx, desc_idx;
172 	for (i = 0; i < num; i++) {
173 		struct vring_used_elem *uep;
174 		struct vq_desc_extra *dxp;
175 
176 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
177 		uep = &vq->vq_ring.used->ring[used_idx];
178 
179 		desc_idx = (uint16_t) uep->id;
180 		dxp = &vq->vq_descx[desc_idx];
181 		vq->vq_used_cons_idx++;
182 		vq_ring_free_chain(vq, desc_idx);
183 
184 		if (dxp->cookie != NULL) {
185 			rte_pktmbuf_free(dxp->cookie);
186 			dxp->cookie = NULL;
187 		}
188 	}
189 }
190 
191 /* Cleanup from completed inorder transmits. */
192 static void
193 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
194 {
195 	uint16_t i, used_idx, desc_idx = 0, last_idx;
196 	int16_t free_cnt = 0;
197 	struct vq_desc_extra *dxp = NULL;
198 
199 	if (unlikely(num == 0))
200 		return;
201 
202 	for (i = 0; i < num; i++) {
203 		struct vring_used_elem *uep;
204 
205 		used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
206 		uep = &vq->vq_ring.used->ring[used_idx];
207 		desc_idx = (uint16_t)uep->id;
208 
209 		dxp = &vq->vq_descx[desc_idx];
210 		vq->vq_used_cons_idx++;
211 
212 		if (dxp->cookie != NULL) {
213 			rte_pktmbuf_free(dxp->cookie);
214 			dxp->cookie = NULL;
215 		}
216 	}
217 
218 	last_idx = desc_idx + dxp->ndescs - 1;
219 	free_cnt = last_idx - vq->vq_desc_tail_idx;
220 	if (free_cnt <= 0)
221 		free_cnt += vq->vq_nentries;
222 
223 	vq_ring_free_inorder(vq, last_idx, free_cnt);
224 }
225 
226 static inline int
227 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
228 			struct rte_mbuf **cookies,
229 			uint16_t num)
230 {
231 	struct vq_desc_extra *dxp;
232 	struct virtio_hw *hw = vq->hw;
233 	struct vring_desc *start_dp;
234 	uint16_t head_idx, idx, i = 0;
235 
236 	if (unlikely(vq->vq_free_cnt == 0))
237 		return -ENOSPC;
238 	if (unlikely(vq->vq_free_cnt < num))
239 		return -EMSGSIZE;
240 
241 	head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
242 	start_dp = vq->vq_ring.desc;
243 
244 	while (i < num) {
245 		idx = head_idx & (vq->vq_nentries - 1);
246 		dxp = &vq->vq_descx[idx];
247 		dxp->cookie = (void *)cookies[i];
248 		dxp->ndescs = 1;
249 
250 		start_dp[idx].addr =
251 				VIRTIO_MBUF_ADDR(cookies[i], vq) +
252 				RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
253 		start_dp[idx].len =
254 				cookies[i]->buf_len -
255 				RTE_PKTMBUF_HEADROOM +
256 				hw->vtnet_hdr_size;
257 		start_dp[idx].flags =  VRING_DESC_F_WRITE;
258 
259 		vq_update_avail_ring(vq, idx);
260 		head_idx++;
261 		i++;
262 	}
263 
264 	vq->vq_desc_head_idx += num;
265 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
266 	return 0;
267 }
268 
269 static inline int
270 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
271 {
272 	struct vq_desc_extra *dxp;
273 	struct virtio_hw *hw = vq->hw;
274 	struct vring_desc *start_dp;
275 	uint16_t needed = 1;
276 	uint16_t head_idx, idx;
277 
278 	if (unlikely(vq->vq_free_cnt == 0))
279 		return -ENOSPC;
280 	if (unlikely(vq->vq_free_cnt < needed))
281 		return -EMSGSIZE;
282 
283 	head_idx = vq->vq_desc_head_idx;
284 	if (unlikely(head_idx >= vq->vq_nentries))
285 		return -EFAULT;
286 
287 	idx = head_idx;
288 	dxp = &vq->vq_descx[idx];
289 	dxp->cookie = (void *)cookie;
290 	dxp->ndescs = needed;
291 
292 	start_dp = vq->vq_ring.desc;
293 	start_dp[idx].addr =
294 		VIRTIO_MBUF_ADDR(cookie, vq) +
295 		RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
296 	start_dp[idx].len =
297 		cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
298 	start_dp[idx].flags =  VRING_DESC_F_WRITE;
299 	idx = start_dp[idx].next;
300 	vq->vq_desc_head_idx = idx;
301 	if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
302 		vq->vq_desc_tail_idx = idx;
303 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
304 	vq_update_avail_ring(vq, head_idx);
305 
306 	return 0;
307 }
308 
309 /* When doing TSO, the IP length is not included in the pseudo header
310  * checksum of the packet given to the PMD, but for virtio it is
311  * expected.
312  */
313 static void
314 virtio_tso_fix_cksum(struct rte_mbuf *m)
315 {
316 	/* common case: header is not fragmented */
317 	if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
318 			m->l4_len)) {
319 		struct ipv4_hdr *iph;
320 		struct ipv6_hdr *ip6h;
321 		struct tcp_hdr *th;
322 		uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
323 		uint32_t tmp;
324 
325 		iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
326 		th = RTE_PTR_ADD(iph, m->l3_len);
327 		if ((iph->version_ihl >> 4) == 4) {
328 			iph->hdr_checksum = 0;
329 			iph->hdr_checksum = rte_ipv4_cksum(iph);
330 			ip_len = iph->total_length;
331 			ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
332 				m->l3_len);
333 		} else {
334 			ip6h = (struct ipv6_hdr *)iph;
335 			ip_paylen = ip6h->payload_len;
336 		}
337 
338 		/* calculate the new phdr checksum not including ip_paylen */
339 		prev_cksum = th->cksum;
340 		tmp = prev_cksum;
341 		tmp += ip_paylen;
342 		tmp = (tmp & 0xffff) + (tmp >> 16);
343 		new_cksum = tmp;
344 
345 		/* replace it in the packet */
346 		th->cksum = new_cksum;
347 	}
348 }
349 
350 static inline int
351 tx_offload_enabled(struct virtio_hw *hw)
352 {
353 	return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) ||
354 		vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
355 		vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
356 }
357 
358 /* avoid write operation when necessary, to lessen cache issues */
359 #define ASSIGN_UNLESS_EQUAL(var, val) do {	\
360 	if ((var) != (val))			\
361 		(var) = (val);			\
362 } while (0)
363 
364 static inline void
365 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
366 			struct rte_mbuf *cookie,
367 			int offload)
368 {
369 	if (offload) {
370 		if (cookie->ol_flags & PKT_TX_TCP_SEG)
371 			cookie->ol_flags |= PKT_TX_TCP_CKSUM;
372 
373 		switch (cookie->ol_flags & PKT_TX_L4_MASK) {
374 		case PKT_TX_UDP_CKSUM:
375 			hdr->csum_start = cookie->l2_len + cookie->l3_len;
376 			hdr->csum_offset = offsetof(struct udp_hdr,
377 				dgram_cksum);
378 			hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
379 			break;
380 
381 		case PKT_TX_TCP_CKSUM:
382 			hdr->csum_start = cookie->l2_len + cookie->l3_len;
383 			hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
384 			hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
385 			break;
386 
387 		default:
388 			ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
389 			ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
390 			ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
391 			break;
392 		}
393 
394 		/* TCP Segmentation Offload */
395 		if (cookie->ol_flags & PKT_TX_TCP_SEG) {
396 			virtio_tso_fix_cksum(cookie);
397 			hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
398 				VIRTIO_NET_HDR_GSO_TCPV6 :
399 				VIRTIO_NET_HDR_GSO_TCPV4;
400 			hdr->gso_size = cookie->tso_segsz;
401 			hdr->hdr_len =
402 				cookie->l2_len +
403 				cookie->l3_len +
404 				cookie->l4_len;
405 		} else {
406 			ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
407 			ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
408 			ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
409 		}
410 	}
411 }
412 
413 static inline void
414 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
415 			struct rte_mbuf **cookies,
416 			uint16_t num)
417 {
418 	struct vq_desc_extra *dxp;
419 	struct virtqueue *vq = txvq->vq;
420 	struct vring_desc *start_dp;
421 	struct virtio_net_hdr *hdr;
422 	uint16_t idx;
423 	uint16_t head_size = vq->hw->vtnet_hdr_size;
424 	int offload;
425 	uint16_t i = 0;
426 
427 	idx = vq->vq_desc_head_idx;
428 	start_dp = vq->vq_ring.desc;
429 
430 	offload = tx_offload_enabled(vq->hw);
431 
432 	while (i < num) {
433 		idx = idx & (vq->vq_nentries - 1);
434 		dxp = &vq->vq_descx[idx];
435 		dxp->cookie = (void *)cookies[i];
436 		dxp->ndescs = 1;
437 
438 		hdr = (struct virtio_net_hdr *)
439 			rte_pktmbuf_prepend(cookies[i], head_size);
440 		cookies[i]->pkt_len -= head_size;
441 
442 		/* if offload disabled, it is not zeroed below, do it now */
443 		if (offload == 0) {
444 			ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
445 			ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
446 			ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
447 			ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
448 			ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
449 			ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
450 		}
451 
452 		virtqueue_xmit_offload(hdr, cookies[i], offload);
453 
454 		start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
455 		start_dp[idx].len   = cookies[i]->data_len;
456 		start_dp[idx].flags = 0;
457 
458 		vq_update_avail_ring(vq, idx);
459 
460 		idx++;
461 		i++;
462 	};
463 
464 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
465 	vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
466 }
467 
468 static inline void
469 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
470 			uint16_t needed, int use_indirect, int can_push,
471 			int in_order)
472 {
473 	struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
474 	struct vq_desc_extra *dxp;
475 	struct virtqueue *vq = txvq->vq;
476 	struct vring_desc *start_dp;
477 	uint16_t seg_num = cookie->nb_segs;
478 	uint16_t head_idx, idx;
479 	uint16_t head_size = vq->hw->vtnet_hdr_size;
480 	struct virtio_net_hdr *hdr;
481 	int offload;
482 
483 	offload = tx_offload_enabled(vq->hw);
484 
485 	head_idx = vq->vq_desc_head_idx;
486 	idx = head_idx;
487 	dxp = &vq->vq_descx[idx];
488 	dxp->cookie = (void *)cookie;
489 	dxp->ndescs = needed;
490 
491 	start_dp = vq->vq_ring.desc;
492 
493 	if (can_push) {
494 		/* prepend cannot fail, checked by caller */
495 		hdr = (struct virtio_net_hdr *)
496 			rte_pktmbuf_prepend(cookie, head_size);
497 		/* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
498 		 * which is wrong. Below subtract restores correct pkt size.
499 		 */
500 		cookie->pkt_len -= head_size;
501 
502 		/* if offload disabled, it is not zeroed below, do it now */
503 		if (offload == 0) {
504 			ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
505 			ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
506 			ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
507 			ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
508 			ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
509 			ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
510 		}
511 	} else if (use_indirect) {
512 		/* setup tx ring slot to point to indirect
513 		 * descriptor list stored in reserved region.
514 		 *
515 		 * the first slot in indirect ring is already preset
516 		 * to point to the header in reserved region
517 		 */
518 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
519 			RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
520 		start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
521 		start_dp[idx].flags = VRING_DESC_F_INDIRECT;
522 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
523 
524 		/* loop below will fill in rest of the indirect elements */
525 		start_dp = txr[idx].tx_indir;
526 		idx = 1;
527 	} else {
528 		/* setup first tx ring slot to point to header
529 		 * stored in reserved region.
530 		 */
531 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
532 			RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
533 		start_dp[idx].len   = vq->hw->vtnet_hdr_size;
534 		start_dp[idx].flags = VRING_DESC_F_NEXT;
535 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
536 
537 		idx = start_dp[idx].next;
538 	}
539 
540 	virtqueue_xmit_offload(hdr, cookie, offload);
541 
542 	do {
543 		start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
544 		start_dp[idx].len   = cookie->data_len;
545 		start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
546 		idx = start_dp[idx].next;
547 	} while ((cookie = cookie->next) != NULL);
548 
549 	if (use_indirect)
550 		idx = vq->vq_ring.desc[head_idx].next;
551 
552 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
553 
554 	vq->vq_desc_head_idx = idx;
555 	vq_update_avail_ring(vq, head_idx);
556 
557 	if (!in_order) {
558 		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
559 			vq->vq_desc_tail_idx = idx;
560 	}
561 }
562 
563 void
564 virtio_dev_cq_start(struct rte_eth_dev *dev)
565 {
566 	struct virtio_hw *hw = dev->data->dev_private;
567 
568 	if (hw->cvq && hw->cvq->vq) {
569 		rte_spinlock_init(&hw->cvq->lock);
570 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
571 	}
572 }
573 
574 int
575 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
576 			uint16_t queue_idx,
577 			uint16_t nb_desc,
578 			unsigned int socket_id __rte_unused,
579 			const struct rte_eth_rxconf *rx_conf __rte_unused,
580 			struct rte_mempool *mp)
581 {
582 	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
583 	struct virtio_hw *hw = dev->data->dev_private;
584 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
585 	struct virtnet_rx *rxvq;
586 
587 	PMD_INIT_FUNC_TRACE();
588 
589 	if (nb_desc == 0 || nb_desc > vq->vq_nentries)
590 		nb_desc = vq->vq_nentries;
591 	vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
592 
593 	rxvq = &vq->rxq;
594 	rxvq->queue_id = queue_idx;
595 	rxvq->mpool = mp;
596 	if (rxvq->mpool == NULL) {
597 		rte_exit(EXIT_FAILURE,
598 			"Cannot allocate mbufs for rx virtqueue");
599 	}
600 
601 	dev->data->rx_queues[queue_idx] = rxvq;
602 
603 	return 0;
604 }
605 
606 int
607 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
608 {
609 	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
610 	struct virtio_hw *hw = dev->data->dev_private;
611 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
612 	struct virtnet_rx *rxvq = &vq->rxq;
613 	struct rte_mbuf *m;
614 	uint16_t desc_idx;
615 	int error, nbufs, i;
616 
617 	PMD_INIT_FUNC_TRACE();
618 
619 	/* Allocate blank mbufs for the each rx descriptor */
620 	nbufs = 0;
621 
622 	if (hw->use_simple_rx) {
623 		for (desc_idx = 0; desc_idx < vq->vq_nentries;
624 		     desc_idx++) {
625 			vq->vq_ring.avail->ring[desc_idx] = desc_idx;
626 			vq->vq_ring.desc[desc_idx].flags =
627 				VRING_DESC_F_WRITE;
628 		}
629 
630 		virtio_rxq_vec_setup(rxvq);
631 	}
632 
633 	memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
634 	for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
635 	     desc_idx++) {
636 		vq->sw_ring[vq->vq_nentries + desc_idx] =
637 			&rxvq->fake_mbuf;
638 	}
639 
640 	if (hw->use_simple_rx) {
641 		while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
642 			virtio_rxq_rearm_vec(rxvq);
643 			nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
644 		}
645 	} else if (hw->use_inorder_rx) {
646 		if ((!virtqueue_full(vq))) {
647 			uint16_t free_cnt = vq->vq_free_cnt;
648 			struct rte_mbuf *pkts[free_cnt];
649 
650 			if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
651 				free_cnt)) {
652 				error = virtqueue_enqueue_refill_inorder(vq,
653 						pkts,
654 						free_cnt);
655 				if (unlikely(error)) {
656 					for (i = 0; i < free_cnt; i++)
657 						rte_pktmbuf_free(pkts[i]);
658 				}
659 			}
660 
661 			nbufs += free_cnt;
662 			vq_update_avail_idx(vq);
663 		}
664 	} else {
665 		while (!virtqueue_full(vq)) {
666 			m = rte_mbuf_raw_alloc(rxvq->mpool);
667 			if (m == NULL)
668 				break;
669 
670 			/* Enqueue allocated buffers */
671 			error = virtqueue_enqueue_recv_refill(vq, m);
672 			if (error) {
673 				rte_pktmbuf_free(m);
674 				break;
675 			}
676 			nbufs++;
677 		}
678 
679 		vq_update_avail_idx(vq);
680 	}
681 
682 	PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
683 
684 	VIRTQUEUE_DUMP(vq);
685 
686 	return 0;
687 }
688 
689 /*
690  * struct rte_eth_dev *dev: Used to update dev
691  * uint16_t nb_desc: Defaults to values read from config space
692  * unsigned int socket_id: Used to allocate memzone
693  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
694  * uint16_t queue_idx: Just used as an index in dev txq list
695  */
696 int
697 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
698 			uint16_t queue_idx,
699 			uint16_t nb_desc,
700 			unsigned int socket_id __rte_unused,
701 			const struct rte_eth_txconf *tx_conf)
702 {
703 	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
704 	struct virtio_hw *hw = dev->data->dev_private;
705 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
706 	struct virtnet_tx *txvq;
707 	uint16_t tx_free_thresh;
708 
709 	PMD_INIT_FUNC_TRACE();
710 
711 	if (nb_desc == 0 || nb_desc > vq->vq_nentries)
712 		nb_desc = vq->vq_nentries;
713 	vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
714 
715 	txvq = &vq->txq;
716 	txvq->queue_id = queue_idx;
717 
718 	tx_free_thresh = tx_conf->tx_free_thresh;
719 	if (tx_free_thresh == 0)
720 		tx_free_thresh =
721 			RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
722 
723 	if (tx_free_thresh >= (vq->vq_nentries - 3)) {
724 		RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
725 			"number of TX entries minus 3 (%u)."
726 			" (tx_free_thresh=%u port=%u queue=%u)\n",
727 			vq->vq_nentries - 3,
728 			tx_free_thresh, dev->data->port_id, queue_idx);
729 		return -EINVAL;
730 	}
731 
732 	vq->vq_free_thresh = tx_free_thresh;
733 
734 	dev->data->tx_queues[queue_idx] = txvq;
735 	return 0;
736 }
737 
738 int
739 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
740 				uint16_t queue_idx)
741 {
742 	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
743 	struct virtio_hw *hw = dev->data->dev_private;
744 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
745 
746 	PMD_INIT_FUNC_TRACE();
747 
748 	if (hw->use_inorder_tx)
749 		vq->vq_ring.desc[vq->vq_nentries - 1].next = 0;
750 
751 	VIRTQUEUE_DUMP(vq);
752 
753 	return 0;
754 }
755 
756 static void
757 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
758 {
759 	int error;
760 	/*
761 	 * Requeue the discarded mbuf. This should always be
762 	 * successful since it was just dequeued.
763 	 */
764 	error = virtqueue_enqueue_recv_refill(vq, m);
765 
766 	if (unlikely(error)) {
767 		RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
768 		rte_pktmbuf_free(m);
769 	}
770 }
771 
772 static void
773 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
774 {
775 	int error;
776 
777 	error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
778 	if (unlikely(error)) {
779 		RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
780 		rte_pktmbuf_free(m);
781 	}
782 }
783 
784 static void
785 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
786 {
787 	uint32_t s = mbuf->pkt_len;
788 	struct ether_addr *ea;
789 
790 	if (s == 64) {
791 		stats->size_bins[1]++;
792 	} else if (s > 64 && s < 1024) {
793 		uint32_t bin;
794 
795 		/* count zeros, and offset into correct bin */
796 		bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
797 		stats->size_bins[bin]++;
798 	} else {
799 		if (s < 64)
800 			stats->size_bins[0]++;
801 		else if (s < 1519)
802 			stats->size_bins[6]++;
803 		else if (s >= 1519)
804 			stats->size_bins[7]++;
805 	}
806 
807 	ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
808 	if (is_multicast_ether_addr(ea)) {
809 		if (is_broadcast_ether_addr(ea))
810 			stats->broadcast++;
811 		else
812 			stats->multicast++;
813 	}
814 }
815 
816 static inline void
817 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
818 {
819 	VIRTIO_DUMP_PACKET(m, m->data_len);
820 
821 	rxvq->stats.bytes += m->pkt_len;
822 	virtio_update_packet_stats(&rxvq->stats, m);
823 }
824 
825 /* Optionally fill offload information in structure */
826 static int
827 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
828 {
829 	struct rte_net_hdr_lens hdr_lens;
830 	uint32_t hdrlen, ptype;
831 	int l4_supported = 0;
832 
833 	/* nothing to do */
834 	if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
835 		return 0;
836 
837 	m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
838 
839 	ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
840 	m->packet_type = ptype;
841 	if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
842 	    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
843 	    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
844 		l4_supported = 1;
845 
846 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
847 		hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
848 		if (hdr->csum_start <= hdrlen && l4_supported) {
849 			m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
850 		} else {
851 			/* Unknown proto or tunnel, do sw cksum. We can assume
852 			 * the cksum field is in the first segment since the
853 			 * buffers we provided to the host are large enough.
854 			 * In case of SCTP, this will be wrong since it's a CRC
855 			 * but there's nothing we can do.
856 			 */
857 			uint16_t csum = 0, off;
858 
859 			rte_raw_cksum_mbuf(m, hdr->csum_start,
860 				rte_pktmbuf_pkt_len(m) - hdr->csum_start,
861 				&csum);
862 			if (likely(csum != 0xffff))
863 				csum = ~csum;
864 			off = hdr->csum_offset + hdr->csum_start;
865 			if (rte_pktmbuf_data_len(m) >= off + 1)
866 				*rte_pktmbuf_mtod_offset(m, uint16_t *,
867 					off) = csum;
868 		}
869 	} else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
870 		m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
871 	}
872 
873 	/* GSO request, save required information in mbuf */
874 	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
875 		/* Check unsupported modes */
876 		if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
877 		    (hdr->gso_size == 0)) {
878 			return -EINVAL;
879 		}
880 
881 		/* Update mss lengthes in mbuf */
882 		m->tso_segsz = hdr->gso_size;
883 		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
884 			case VIRTIO_NET_HDR_GSO_TCPV4:
885 			case VIRTIO_NET_HDR_GSO_TCPV6:
886 				m->ol_flags |= PKT_RX_LRO | \
887 					PKT_RX_L4_CKSUM_NONE;
888 				break;
889 			default:
890 				return -EINVAL;
891 		}
892 	}
893 
894 	return 0;
895 }
896 
897 static inline int
898 rx_offload_enabled(struct virtio_hw *hw)
899 {
900 	return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
901 		vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
902 		vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
903 }
904 
905 #define VIRTIO_MBUF_BURST_SZ 64
906 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
907 uint16_t
908 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
909 {
910 	struct virtnet_rx *rxvq = rx_queue;
911 	struct virtqueue *vq = rxvq->vq;
912 	struct virtio_hw *hw = vq->hw;
913 	struct rte_mbuf *rxm, *new_mbuf;
914 	uint16_t nb_used, num, nb_rx;
915 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
916 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
917 	int error;
918 	uint32_t i, nb_enqueued;
919 	uint32_t hdr_size;
920 	int offload;
921 	struct virtio_net_hdr *hdr;
922 
923 	nb_rx = 0;
924 	if (unlikely(hw->started == 0))
925 		return nb_rx;
926 
927 	nb_used = VIRTQUEUE_NUSED(vq);
928 
929 	virtio_rmb();
930 
931 	num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
932 	if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
933 		num = VIRTIO_MBUF_BURST_SZ;
934 	if (likely(num > DESC_PER_CACHELINE))
935 		num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
936 
937 	num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
938 	PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
939 
940 	nb_enqueued = 0;
941 	hdr_size = hw->vtnet_hdr_size;
942 	offload = rx_offload_enabled(hw);
943 
944 	for (i = 0; i < num ; i++) {
945 		rxm = rcv_pkts[i];
946 
947 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
948 
949 		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
950 			PMD_RX_LOG(ERR, "Packet drop");
951 			nb_enqueued++;
952 			virtio_discard_rxbuf(vq, rxm);
953 			rxvq->stats.errors++;
954 			continue;
955 		}
956 
957 		rxm->port = rxvq->port_id;
958 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
959 		rxm->ol_flags = 0;
960 		rxm->vlan_tci = 0;
961 
962 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
963 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
964 
965 		hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
966 			RTE_PKTMBUF_HEADROOM - hdr_size);
967 
968 		if (hw->vlan_strip)
969 			rte_vlan_strip(rxm);
970 
971 		if (offload && virtio_rx_offload(rxm, hdr) < 0) {
972 			virtio_discard_rxbuf(vq, rxm);
973 			rxvq->stats.errors++;
974 			continue;
975 		}
976 
977 		virtio_rx_stats_updated(rxvq, rxm);
978 
979 		rx_pkts[nb_rx++] = rxm;
980 	}
981 
982 	rxvq->stats.packets += nb_rx;
983 
984 	/* Allocate new mbuf for the used descriptor */
985 	error = ENOSPC;
986 	while (likely(!virtqueue_full(vq))) {
987 		new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
988 		if (unlikely(new_mbuf == NULL)) {
989 			struct rte_eth_dev *dev
990 				= &rte_eth_devices[rxvq->port_id];
991 			dev->data->rx_mbuf_alloc_failed++;
992 			break;
993 		}
994 		error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
995 		if (unlikely(error)) {
996 			rte_pktmbuf_free(new_mbuf);
997 			break;
998 		}
999 		nb_enqueued++;
1000 	}
1001 
1002 	if (likely(nb_enqueued)) {
1003 		vq_update_avail_idx(vq);
1004 
1005 		if (unlikely(virtqueue_kick_prepare(vq))) {
1006 			virtqueue_notify(vq);
1007 			PMD_RX_LOG(DEBUG, "Notified");
1008 		}
1009 	}
1010 
1011 	return nb_rx;
1012 }
1013 
1014 uint16_t
1015 virtio_recv_mergeable_pkts_inorder(void *rx_queue,
1016 			struct rte_mbuf **rx_pkts,
1017 			uint16_t nb_pkts)
1018 {
1019 	struct virtnet_rx *rxvq = rx_queue;
1020 	struct virtqueue *vq = rxvq->vq;
1021 	struct virtio_hw *hw = vq->hw;
1022 	struct rte_mbuf *rxm;
1023 	struct rte_mbuf *prev;
1024 	uint16_t nb_used, num, nb_rx;
1025 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1026 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1027 	int error;
1028 	uint32_t nb_enqueued;
1029 	uint32_t seg_num;
1030 	uint32_t seg_res;
1031 	uint32_t hdr_size;
1032 	int32_t i;
1033 	int offload;
1034 
1035 	nb_rx = 0;
1036 	if (unlikely(hw->started == 0))
1037 		return nb_rx;
1038 
1039 	nb_used = VIRTQUEUE_NUSED(vq);
1040 	nb_used = RTE_MIN(nb_used, nb_pkts);
1041 	nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1042 
1043 	virtio_rmb();
1044 
1045 	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1046 
1047 	nb_enqueued = 0;
1048 	seg_num = 1;
1049 	seg_res = 0;
1050 	hdr_size = hw->vtnet_hdr_size;
1051 	offload = rx_offload_enabled(hw);
1052 
1053 	num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1054 
1055 	for (i = 0; i < num; i++) {
1056 		struct virtio_net_hdr_mrg_rxbuf *header;
1057 
1058 		PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1059 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1060 
1061 		rxm = rcv_pkts[i];
1062 
1063 		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1064 			PMD_RX_LOG(ERR, "Packet drop");
1065 			nb_enqueued++;
1066 			virtio_discard_rxbuf_inorder(vq, rxm);
1067 			rxvq->stats.errors++;
1068 			continue;
1069 		}
1070 
1071 		header = (struct virtio_net_hdr_mrg_rxbuf *)
1072 			 ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1073 			 - hdr_size);
1074 		seg_num = header->num_buffers;
1075 
1076 		if (seg_num == 0)
1077 			seg_num = 1;
1078 
1079 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1080 		rxm->nb_segs = seg_num;
1081 		rxm->ol_flags = 0;
1082 		rxm->vlan_tci = 0;
1083 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1084 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
1085 
1086 		rxm->port = rxvq->port_id;
1087 
1088 		rx_pkts[nb_rx] = rxm;
1089 		prev = rxm;
1090 
1091 		if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) {
1092 			virtio_discard_rxbuf_inorder(vq, rxm);
1093 			rxvq->stats.errors++;
1094 			continue;
1095 		}
1096 
1097 		if (hw->vlan_strip)
1098 			rte_vlan_strip(rx_pkts[nb_rx]);
1099 
1100 		seg_res = seg_num - 1;
1101 
1102 		/* Merge remaining segments */
1103 		while (seg_res != 0 && i < (num - 1)) {
1104 			i++;
1105 
1106 			rxm = rcv_pkts[i];
1107 			rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1108 			rxm->pkt_len = (uint32_t)(len[i]);
1109 			rxm->data_len = (uint16_t)(len[i]);
1110 
1111 			rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1112 			rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1113 
1114 			if (prev)
1115 				prev->next = rxm;
1116 
1117 			prev = rxm;
1118 			seg_res -= 1;
1119 		}
1120 
1121 		if (!seg_res) {
1122 			virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1123 			nb_rx++;
1124 		}
1125 	}
1126 
1127 	/* Last packet still need merge segments */
1128 	while (seg_res != 0) {
1129 		uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1130 					VIRTIO_MBUF_BURST_SZ);
1131 
1132 		prev = rcv_pkts[nb_rx];
1133 		if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1134 			num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1135 							   rcv_cnt);
1136 			uint16_t extra_idx = 0;
1137 
1138 			rcv_cnt = num;
1139 			while (extra_idx < rcv_cnt) {
1140 				rxm = rcv_pkts[extra_idx];
1141 				rxm->data_off =
1142 					RTE_PKTMBUF_HEADROOM - hdr_size;
1143 				rxm->pkt_len = (uint32_t)(len[extra_idx]);
1144 				rxm->data_len = (uint16_t)(len[extra_idx]);
1145 				prev->next = rxm;
1146 				prev = rxm;
1147 				rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1148 				rx_pkts[nb_rx]->data_len += len[extra_idx];
1149 				extra_idx += 1;
1150 			};
1151 			seg_res -= rcv_cnt;
1152 
1153 			if (!seg_res) {
1154 				virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1155 				nb_rx++;
1156 			}
1157 		} else {
1158 			PMD_RX_LOG(ERR,
1159 					"No enough segments for packet.");
1160 			virtio_discard_rxbuf_inorder(vq, prev);
1161 			rxvq->stats.errors++;
1162 			break;
1163 		}
1164 	}
1165 
1166 	rxvq->stats.packets += nb_rx;
1167 
1168 	/* Allocate new mbuf for the used descriptor */
1169 
1170 	if (likely(!virtqueue_full(vq))) {
1171 		/* free_cnt may include mrg descs */
1172 		uint16_t free_cnt = vq->vq_free_cnt;
1173 		struct rte_mbuf *new_pkts[free_cnt];
1174 
1175 		if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1176 			error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1177 					free_cnt);
1178 			if (unlikely(error)) {
1179 				for (i = 0; i < free_cnt; i++)
1180 					rte_pktmbuf_free(new_pkts[i]);
1181 			}
1182 			nb_enqueued += free_cnt;
1183 		} else {
1184 			struct rte_eth_dev *dev =
1185 				&rte_eth_devices[rxvq->port_id];
1186 			dev->data->rx_mbuf_alloc_failed += free_cnt;
1187 		}
1188 	}
1189 
1190 	if (likely(nb_enqueued)) {
1191 		vq_update_avail_idx(vq);
1192 
1193 		if (unlikely(virtqueue_kick_prepare(vq))) {
1194 			virtqueue_notify(vq);
1195 			PMD_RX_LOG(DEBUG, "Notified");
1196 		}
1197 	}
1198 
1199 	return nb_rx;
1200 }
1201 
1202 uint16_t
1203 virtio_recv_mergeable_pkts(void *rx_queue,
1204 			struct rte_mbuf **rx_pkts,
1205 			uint16_t nb_pkts)
1206 {
1207 	struct virtnet_rx *rxvq = rx_queue;
1208 	struct virtqueue *vq = rxvq->vq;
1209 	struct virtio_hw *hw = vq->hw;
1210 	struct rte_mbuf *rxm, *new_mbuf;
1211 	uint16_t nb_used, num, nb_rx;
1212 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
1213 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1214 	struct rte_mbuf *prev;
1215 	int error;
1216 	uint32_t i, nb_enqueued;
1217 	uint32_t seg_num;
1218 	uint16_t extra_idx;
1219 	uint32_t seg_res;
1220 	uint32_t hdr_size;
1221 	int offload;
1222 
1223 	nb_rx = 0;
1224 	if (unlikely(hw->started == 0))
1225 		return nb_rx;
1226 
1227 	nb_used = VIRTQUEUE_NUSED(vq);
1228 
1229 	virtio_rmb();
1230 
1231 	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1232 
1233 	i = 0;
1234 	nb_enqueued = 0;
1235 	seg_num = 0;
1236 	extra_idx = 0;
1237 	seg_res = 0;
1238 	hdr_size = hw->vtnet_hdr_size;
1239 	offload = rx_offload_enabled(hw);
1240 
1241 	while (i < nb_used) {
1242 		struct virtio_net_hdr_mrg_rxbuf *header;
1243 
1244 		if (nb_rx == nb_pkts)
1245 			break;
1246 
1247 		num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1);
1248 		if (num != 1)
1249 			continue;
1250 
1251 		i++;
1252 
1253 		PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1254 		PMD_RX_LOG(DEBUG, "packet len:%d", len[0]);
1255 
1256 		rxm = rcv_pkts[0];
1257 
1258 		if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
1259 			PMD_RX_LOG(ERR, "Packet drop");
1260 			nb_enqueued++;
1261 			virtio_discard_rxbuf(vq, rxm);
1262 			rxvq->stats.errors++;
1263 			continue;
1264 		}
1265 
1266 		header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
1267 			RTE_PKTMBUF_HEADROOM - hdr_size);
1268 		seg_num = header->num_buffers;
1269 
1270 		if (seg_num == 0)
1271 			seg_num = 1;
1272 
1273 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1274 		rxm->nb_segs = seg_num;
1275 		rxm->ol_flags = 0;
1276 		rxm->vlan_tci = 0;
1277 		rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
1278 		rxm->data_len = (uint16_t)(len[0] - hdr_size);
1279 
1280 		rxm->port = rxvq->port_id;
1281 		rx_pkts[nb_rx] = rxm;
1282 		prev = rxm;
1283 
1284 		if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) {
1285 			virtio_discard_rxbuf(vq, rxm);
1286 			rxvq->stats.errors++;
1287 			continue;
1288 		}
1289 
1290 		seg_res = seg_num - 1;
1291 
1292 		while (seg_res != 0) {
1293 			/*
1294 			 * Get extra segments for current uncompleted packet.
1295 			 */
1296 			uint16_t  rcv_cnt =
1297 				RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
1298 			if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1299 				uint32_t rx_num =
1300 					virtqueue_dequeue_burst_rx(vq,
1301 					rcv_pkts, len, rcv_cnt);
1302 				i += rx_num;
1303 				rcv_cnt = rx_num;
1304 			} else {
1305 				PMD_RX_LOG(ERR,
1306 					   "No enough segments for packet.");
1307 				nb_enqueued++;
1308 				virtio_discard_rxbuf(vq, rxm);
1309 				rxvq->stats.errors++;
1310 				break;
1311 			}
1312 
1313 			extra_idx = 0;
1314 
1315 			while (extra_idx < rcv_cnt) {
1316 				rxm = rcv_pkts[extra_idx];
1317 
1318 				rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1319 				rxm->pkt_len = (uint32_t)(len[extra_idx]);
1320 				rxm->data_len = (uint16_t)(len[extra_idx]);
1321 
1322 				if (prev)
1323 					prev->next = rxm;
1324 
1325 				prev = rxm;
1326 				rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
1327 				extra_idx++;
1328 			};
1329 			seg_res -= rcv_cnt;
1330 		}
1331 
1332 		if (hw->vlan_strip)
1333 			rte_vlan_strip(rx_pkts[nb_rx]);
1334 
1335 		VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
1336 			rx_pkts[nb_rx]->data_len);
1337 
1338 		rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len;
1339 		virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]);
1340 		nb_rx++;
1341 	}
1342 
1343 	rxvq->stats.packets += nb_rx;
1344 
1345 	/* Allocate new mbuf for the used descriptor */
1346 	error = ENOSPC;
1347 	while (likely(!virtqueue_full(vq))) {
1348 		new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1349 		if (unlikely(new_mbuf == NULL)) {
1350 			struct rte_eth_dev *dev
1351 				= &rte_eth_devices[rxvq->port_id];
1352 			dev->data->rx_mbuf_alloc_failed++;
1353 			break;
1354 		}
1355 		error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
1356 		if (unlikely(error)) {
1357 			rte_pktmbuf_free(new_mbuf);
1358 			break;
1359 		}
1360 		nb_enqueued++;
1361 	}
1362 
1363 	if (likely(nb_enqueued)) {
1364 		vq_update_avail_idx(vq);
1365 
1366 		if (unlikely(virtqueue_kick_prepare(vq))) {
1367 			virtqueue_notify(vq);
1368 			PMD_RX_LOG(DEBUG, "Notified");
1369 		}
1370 	}
1371 
1372 	return nb_rx;
1373 }
1374 
1375 uint16_t
1376 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1377 {
1378 	struct virtnet_tx *txvq = tx_queue;
1379 	struct virtqueue *vq = txvq->vq;
1380 	struct virtio_hw *hw = vq->hw;
1381 	uint16_t hdr_size = hw->vtnet_hdr_size;
1382 	uint16_t nb_used, nb_tx = 0;
1383 	int error;
1384 
1385 	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1386 		return nb_tx;
1387 
1388 	if (unlikely(nb_pkts < 1))
1389 		return nb_pkts;
1390 
1391 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1392 	nb_used = VIRTQUEUE_NUSED(vq);
1393 
1394 	virtio_rmb();
1395 	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1396 		virtio_xmit_cleanup(vq, nb_used);
1397 
1398 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1399 		struct rte_mbuf *txm = tx_pkts[nb_tx];
1400 		int can_push = 0, use_indirect = 0, slots, need;
1401 
1402 		/* Do VLAN tag insertion */
1403 		if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1404 			error = rte_vlan_insert(&txm);
1405 			if (unlikely(error)) {
1406 				rte_pktmbuf_free(txm);
1407 				continue;
1408 			}
1409 		}
1410 
1411 		/* optimize ring usage */
1412 		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1413 		      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1414 		    rte_mbuf_refcnt_read(txm) == 1 &&
1415 		    RTE_MBUF_DIRECT(txm) &&
1416 		    txm->nb_segs == 1 &&
1417 		    rte_pktmbuf_headroom(txm) >= hdr_size &&
1418 		    rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1419 				   __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1420 			can_push = 1;
1421 		else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
1422 			 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
1423 			use_indirect = 1;
1424 
1425 		/* How many main ring entries are needed to this Tx?
1426 		 * any_layout => number of segments
1427 		 * indirect   => 1
1428 		 * default    => number of segments + 1
1429 		 */
1430 		slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
1431 		need = slots - vq->vq_free_cnt;
1432 
1433 		/* Positive value indicates it need free vring descriptors */
1434 		if (unlikely(need > 0)) {
1435 			nb_used = VIRTQUEUE_NUSED(vq);
1436 			virtio_rmb();
1437 			need = RTE_MIN(need, (int)nb_used);
1438 
1439 			virtio_xmit_cleanup(vq, need);
1440 			need = slots - vq->vq_free_cnt;
1441 			if (unlikely(need > 0)) {
1442 				PMD_TX_LOG(ERR,
1443 					   "No free tx descriptors to transmit");
1444 				break;
1445 			}
1446 		}
1447 
1448 		/* Enqueue Packet buffers */
1449 		virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
1450 			can_push, 0);
1451 
1452 		txvq->stats.bytes += txm->pkt_len;
1453 		virtio_update_packet_stats(&txvq->stats, txm);
1454 	}
1455 
1456 	txvq->stats.packets += nb_tx;
1457 
1458 	if (likely(nb_tx)) {
1459 		vq_update_avail_idx(vq);
1460 
1461 		if (unlikely(virtqueue_kick_prepare(vq))) {
1462 			virtqueue_notify(vq);
1463 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1464 		}
1465 	}
1466 
1467 	return nb_tx;
1468 }
1469 
1470 uint16_t
1471 virtio_xmit_pkts_inorder(void *tx_queue,
1472 			struct rte_mbuf **tx_pkts,
1473 			uint16_t nb_pkts)
1474 {
1475 	struct virtnet_tx *txvq = tx_queue;
1476 	struct virtqueue *vq = txvq->vq;
1477 	struct virtio_hw *hw = vq->hw;
1478 	uint16_t hdr_size = hw->vtnet_hdr_size;
1479 	uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
1480 	struct rte_mbuf *inorder_pkts[nb_pkts];
1481 	int error;
1482 
1483 	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1484 		return nb_tx;
1485 
1486 	if (unlikely(nb_pkts < 1))
1487 		return nb_pkts;
1488 
1489 	VIRTQUEUE_DUMP(vq);
1490 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1491 	nb_used = VIRTQUEUE_NUSED(vq);
1492 
1493 	virtio_rmb();
1494 	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1495 		virtio_xmit_cleanup_inorder(vq, nb_used);
1496 
1497 	if (unlikely(!vq->vq_free_cnt))
1498 		virtio_xmit_cleanup_inorder(vq, nb_used);
1499 
1500 	nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
1501 
1502 	for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
1503 		struct rte_mbuf *txm = tx_pkts[nb_tx];
1504 		int slots, need;
1505 
1506 		/* Do VLAN tag insertion */
1507 		if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1508 			error = rte_vlan_insert(&txm);
1509 			if (unlikely(error)) {
1510 				rte_pktmbuf_free(txm);
1511 				continue;
1512 			}
1513 		}
1514 
1515 		/* optimize ring usage */
1516 		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1517 		     vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1518 		     rte_mbuf_refcnt_read(txm) == 1 &&
1519 		     RTE_MBUF_DIRECT(txm) &&
1520 		     txm->nb_segs == 1 &&
1521 		     rte_pktmbuf_headroom(txm) >= hdr_size &&
1522 		     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1523 				__alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
1524 			inorder_pkts[nb_inorder_pkts] = txm;
1525 			nb_inorder_pkts++;
1526 
1527 			txvq->stats.bytes += txm->pkt_len;
1528 			virtio_update_packet_stats(&txvq->stats, txm);
1529 			continue;
1530 		}
1531 
1532 		if (nb_inorder_pkts) {
1533 			virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
1534 							nb_inorder_pkts);
1535 			nb_inorder_pkts = 0;
1536 		}
1537 
1538 		slots = txm->nb_segs + 1;
1539 		need = slots - vq->vq_free_cnt;
1540 		if (unlikely(need > 0)) {
1541 			nb_used = VIRTQUEUE_NUSED(vq);
1542 			virtio_rmb();
1543 			need = RTE_MIN(need, (int)nb_used);
1544 
1545 			virtio_xmit_cleanup_inorder(vq, need);
1546 
1547 			need = slots - vq->vq_free_cnt;
1548 
1549 			if (unlikely(need > 0)) {
1550 				PMD_TX_LOG(ERR,
1551 					"No free tx descriptors to transmit");
1552 				break;
1553 			}
1554 		}
1555 		/* Enqueue Packet buffers */
1556 		virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
1557 
1558 		txvq->stats.bytes += txm->pkt_len;
1559 		virtio_update_packet_stats(&txvq->stats, txm);
1560 	}
1561 
1562 	/* Transmit all inorder packets */
1563 	if (nb_inorder_pkts)
1564 		virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
1565 						nb_inorder_pkts);
1566 
1567 	txvq->stats.packets += nb_tx;
1568 
1569 	if (likely(nb_tx)) {
1570 		vq_update_avail_idx(vq);
1571 
1572 		if (unlikely(virtqueue_kick_prepare(vq))) {
1573 			virtqueue_notify(vq);
1574 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1575 		}
1576 	}
1577 
1578 	VIRTQUEUE_DUMP(vq);
1579 
1580 	return nb_tx;
1581 }
1582