xref: /dpdk/drivers/net/virtio/virtio_rxtx.c (revision 96cb6711939e936258c0632bb4912bcc6b15e47c)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <stdint.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <errno.h>
39 
40 #include <rte_cycles.h>
41 #include <rte_memory.h>
42 #include <rte_memzone.h>
43 #include <rte_branch_prediction.h>
44 #include <rte_mempool.h>
45 #include <rte_malloc.h>
46 #include <rte_mbuf.h>
47 #include <rte_ether.h>
48 #include <rte_ethdev.h>
49 #include <rte_prefetch.h>
50 #include <rte_string_fns.h>
51 #include <rte_errno.h>
52 #include <rte_byteorder.h>
53 #include <rte_cpuflags.h>
54 #include <rte_net.h>
55 #include <rte_ip.h>
56 
57 #include "virtio_logs.h"
58 #include "virtio_ethdev.h"
59 #include "virtio_pci.h"
60 #include "virtqueue.h"
61 #include "virtio_rxtx.h"
62 
63 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
64 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
65 #else
66 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
67 #endif
68 
69 
70 #define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
71 	ETH_TXQ_FLAGS_NOOFFLOADS)
72 
73 static void
74 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
75 {
76 	struct vring_desc *dp, *dp_tail;
77 	struct vq_desc_extra *dxp;
78 	uint16_t desc_idx_last = desc_idx;
79 
80 	dp  = &vq->vq_ring.desc[desc_idx];
81 	dxp = &vq->vq_descx[desc_idx];
82 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
83 	if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
84 		while (dp->flags & VRING_DESC_F_NEXT) {
85 			desc_idx_last = dp->next;
86 			dp = &vq->vq_ring.desc[dp->next];
87 		}
88 	}
89 	dxp->ndescs = 0;
90 
91 	/*
92 	 * We must append the existing free chain, if any, to the end of
93 	 * newly freed chain. If the virtqueue was completely used, then
94 	 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
95 	 */
96 	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
97 		vq->vq_desc_head_idx = desc_idx;
98 	} else {
99 		dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
100 		dp_tail->next = desc_idx;
101 	}
102 
103 	vq->vq_desc_tail_idx = desc_idx_last;
104 	dp->next = VQ_RING_DESC_CHAIN_END;
105 }
106 
107 static uint16_t
108 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
109 			   uint32_t *len, uint16_t num)
110 {
111 	struct vring_used_elem *uep;
112 	struct rte_mbuf *cookie;
113 	uint16_t used_idx, desc_idx;
114 	uint16_t i;
115 
116 	/*  Caller does the check */
117 	for (i = 0; i < num ; i++) {
118 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
119 		uep = &vq->vq_ring.used->ring[used_idx];
120 		desc_idx = (uint16_t) uep->id;
121 		len[i] = uep->len;
122 		cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
123 
124 		if (unlikely(cookie == NULL)) {
125 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n",
126 				vq->vq_used_cons_idx);
127 			break;
128 		}
129 
130 		rte_prefetch0(cookie);
131 		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
132 		rx_pkts[i]  = cookie;
133 		vq->vq_used_cons_idx++;
134 		vq_ring_free_chain(vq, desc_idx);
135 		vq->vq_descx[desc_idx].cookie = NULL;
136 	}
137 
138 	return i;
139 }
140 
141 #ifndef DEFAULT_TX_FREE_THRESH
142 #define DEFAULT_TX_FREE_THRESH 32
143 #endif
144 
145 /* Cleanup from completed transmits. */
146 static void
147 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
148 {
149 	uint16_t i, used_idx, desc_idx;
150 	for (i = 0; i < num; i++) {
151 		struct vring_used_elem *uep;
152 		struct vq_desc_extra *dxp;
153 
154 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
155 		uep = &vq->vq_ring.used->ring[used_idx];
156 
157 		desc_idx = (uint16_t) uep->id;
158 		dxp = &vq->vq_descx[desc_idx];
159 		vq->vq_used_cons_idx++;
160 		vq_ring_free_chain(vq, desc_idx);
161 
162 		if (dxp->cookie != NULL) {
163 			rte_pktmbuf_free(dxp->cookie);
164 			dxp->cookie = NULL;
165 		}
166 	}
167 }
168 
169 
170 static inline int
171 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
172 {
173 	struct vq_desc_extra *dxp;
174 	struct virtio_hw *hw = vq->hw;
175 	struct vring_desc *start_dp;
176 	uint16_t needed = 1;
177 	uint16_t head_idx, idx;
178 
179 	if (unlikely(vq->vq_free_cnt == 0))
180 		return -ENOSPC;
181 	if (unlikely(vq->vq_free_cnt < needed))
182 		return -EMSGSIZE;
183 
184 	head_idx = vq->vq_desc_head_idx;
185 	if (unlikely(head_idx >= vq->vq_nentries))
186 		return -EFAULT;
187 
188 	idx = head_idx;
189 	dxp = &vq->vq_descx[idx];
190 	dxp->cookie = (void *)cookie;
191 	dxp->ndescs = needed;
192 
193 	start_dp = vq->vq_ring.desc;
194 	start_dp[idx].addr =
195 		VIRTIO_MBUF_ADDR(cookie, vq) +
196 		RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
197 	start_dp[idx].len =
198 		cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
199 	start_dp[idx].flags =  VRING_DESC_F_WRITE;
200 	idx = start_dp[idx].next;
201 	vq->vq_desc_head_idx = idx;
202 	if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
203 		vq->vq_desc_tail_idx = idx;
204 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
205 	vq_update_avail_ring(vq, head_idx);
206 
207 	return 0;
208 }
209 
210 static inline void
211 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
212 		       uint16_t needed, int use_indirect, int can_push)
213 {
214 	struct vq_desc_extra *dxp;
215 	struct virtqueue *vq = txvq->vq;
216 	struct vring_desc *start_dp;
217 	uint16_t seg_num = cookie->nb_segs;
218 	uint16_t head_idx, idx;
219 	uint16_t head_size = vq->hw->vtnet_hdr_size;
220 	unsigned long offs;
221 
222 	head_idx = vq->vq_desc_head_idx;
223 	idx = head_idx;
224 	dxp = &vq->vq_descx[idx];
225 	dxp->cookie = (void *)cookie;
226 	dxp->ndescs = needed;
227 
228 	start_dp = vq->vq_ring.desc;
229 
230 	if (can_push) {
231 		/* put on zero'd transmit header (no offloads) */
232 		void *hdr = rte_pktmbuf_prepend(cookie, head_size);
233 
234 		memset(hdr, 0, head_size);
235 	} else if (use_indirect) {
236 		/* setup tx ring slot to point to indirect
237 		 * descriptor list stored in reserved region.
238 		 *
239 		 * the first slot in indirect ring is already preset
240 		 * to point to the header in reserved region
241 		 */
242 		struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
243 
244 		offs = idx * sizeof(struct virtio_tx_region)
245 			+ offsetof(struct virtio_tx_region, tx_indir);
246 
247 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem + offs;
248 		start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
249 		start_dp[idx].flags = VRING_DESC_F_INDIRECT;
250 
251 		/* loop below will fill in rest of the indirect elements */
252 		start_dp = txr[idx].tx_indir;
253 		idx = 1;
254 	} else {
255 		/* setup first tx ring slot to point to header
256 		 * stored in reserved region.
257 		 */
258 		offs = idx * sizeof(struct virtio_tx_region)
259 			+ offsetof(struct virtio_tx_region, tx_hdr);
260 
261 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem + offs;
262 		start_dp[idx].len   = vq->hw->vtnet_hdr_size;
263 		start_dp[idx].flags = VRING_DESC_F_NEXT;
264 		idx = start_dp[idx].next;
265 	}
266 
267 	do {
268 		start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
269 		start_dp[idx].len   = cookie->data_len;
270 		start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
271 		idx = start_dp[idx].next;
272 	} while ((cookie = cookie->next) != NULL);
273 
274 	if (use_indirect)
275 		idx = vq->vq_ring.desc[head_idx].next;
276 
277 	vq->vq_desc_head_idx = idx;
278 	if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
279 		vq->vq_desc_tail_idx = idx;
280 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
281 	vq_update_avail_ring(vq, head_idx);
282 }
283 
284 static void
285 virtio_dev_vring_start(struct virtqueue *vq)
286 {
287 	int size = vq->vq_nentries;
288 	struct vring *vr = &vq->vq_ring;
289 	uint8_t *ring_mem = vq->vq_ring_virt_mem;
290 
291 	PMD_INIT_FUNC_TRACE();
292 
293 	/*
294 	 * Reinitialise since virtio port might have been stopped and restarted
295 	 */
296 	memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size);
297 	vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN);
298 	vq->vq_used_cons_idx = 0;
299 	vq->vq_desc_head_idx = 0;
300 	vq->vq_avail_idx = 0;
301 	vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
302 	vq->vq_free_cnt = vq->vq_nentries;
303 	memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
304 
305 	vring_desc_init(vr->desc, size);
306 
307 	/*
308 	 * Disable device(host) interrupting guest
309 	 */
310 	virtqueue_disable_intr(vq);
311 }
312 
313 void
314 virtio_dev_cq_start(struct rte_eth_dev *dev)
315 {
316 	struct virtio_hw *hw = dev->data->dev_private;
317 
318 	if (hw->cvq && hw->cvq->vq) {
319 		virtio_dev_vring_start(hw->cvq->vq);
320 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
321 	}
322 }
323 
324 void
325 virtio_dev_rxtx_start(struct rte_eth_dev *dev)
326 {
327 	/*
328 	 * Start receive and transmit vrings
329 	 * -	Setup vring structure for all queues
330 	 * -	Initialize descriptor for the rx vring
331 	 * -	Allocate blank mbufs for the each rx descriptor
332 	 *
333 	 */
334 	uint16_t i;
335 	uint16_t desc_idx;
336 	struct virtio_hw *hw = dev->data->dev_private;
337 
338 	PMD_INIT_FUNC_TRACE();
339 
340 	/* Start rx vring. */
341 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
342 		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
343 		struct virtqueue *vq = rxvq->vq;
344 		int error, nbufs;
345 		struct rte_mbuf *m;
346 
347 		virtio_dev_vring_start(vq);
348 		if (rxvq->mpool == NULL) {
349 			rte_exit(EXIT_FAILURE,
350 				"Cannot allocate mbufs for rx virtqueue");
351 		}
352 
353 		/* Allocate blank mbufs for the each rx descriptor */
354 		nbufs = 0;
355 		error = ENOSPC;
356 
357 		if (hw->use_simple_rxtx) {
358 			for (desc_idx = 0; desc_idx < vq->vq_nentries;
359 			     desc_idx++) {
360 				vq->vq_ring.avail->ring[desc_idx] = desc_idx;
361 				vq->vq_ring.desc[desc_idx].flags =
362 					VRING_DESC_F_WRITE;
363 			}
364 		}
365 
366 		memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
367 		for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
368 		     desc_idx++) {
369 			vq->sw_ring[vq->vq_nentries + desc_idx] =
370 				&rxvq->fake_mbuf;
371 		}
372 
373 		while (!virtqueue_full(vq)) {
374 			m = rte_mbuf_raw_alloc(rxvq->mpool);
375 			if (m == NULL)
376 				break;
377 
378 			/******************************************
379 			*         Enqueue allocated buffers        *
380 			*******************************************/
381 			if (hw->use_simple_rxtx)
382 				error = virtqueue_enqueue_recv_refill_simple(vq, m);
383 			else
384 				error = virtqueue_enqueue_recv_refill(vq, m);
385 
386 			if (error) {
387 				rte_pktmbuf_free(m);
388 				break;
389 			}
390 			nbufs++;
391 		}
392 
393 		vq_update_avail_idx(vq);
394 
395 		PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
396 
397 		VIRTQUEUE_DUMP(vq);
398 	}
399 
400 	/* Start tx vring. */
401 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
402 		struct virtnet_tx *txvq = dev->data->tx_queues[i];
403 		struct virtqueue *vq = txvq->vq;
404 
405 		virtio_dev_vring_start(vq);
406 		if (hw->use_simple_rxtx) {
407 			uint16_t mid_idx  = vq->vq_nentries >> 1;
408 
409 			for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) {
410 				vq->vq_ring.avail->ring[desc_idx] =
411 					desc_idx + mid_idx;
412 				vq->vq_ring.desc[desc_idx + mid_idx].next =
413 					desc_idx;
414 				vq->vq_ring.desc[desc_idx + mid_idx].addr =
415 					txvq->virtio_net_hdr_mem +
416 					offsetof(struct virtio_tx_region, tx_hdr);
417 				vq->vq_ring.desc[desc_idx + mid_idx].len =
418 					vq->hw->vtnet_hdr_size;
419 				vq->vq_ring.desc[desc_idx + mid_idx].flags =
420 					VRING_DESC_F_NEXT;
421 				vq->vq_ring.desc[desc_idx].flags = 0;
422 			}
423 			for (desc_idx = mid_idx; desc_idx < vq->vq_nentries;
424 			     desc_idx++)
425 				vq->vq_ring.avail->ring[desc_idx] = desc_idx;
426 		}
427 
428 		VIRTQUEUE_DUMP(vq);
429 	}
430 }
431 
432 int
433 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
434 			uint16_t queue_idx,
435 			uint16_t nb_desc,
436 			unsigned int socket_id,
437 			__rte_unused const struct rte_eth_rxconf *rx_conf,
438 			struct rte_mempool *mp)
439 {
440 	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
441 	struct virtnet_rx *rxvq;
442 	int ret;
443 
444 	PMD_INIT_FUNC_TRACE();
445 	ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx,
446 			nb_desc, socket_id, (void **)&rxvq);
447 	if (ret < 0) {
448 		PMD_INIT_LOG(ERR, "rvq initialization failed");
449 		return ret;
450 	}
451 
452 	/* Create mempool for rx mbuf allocation */
453 	rxvq->mpool = mp;
454 
455 	dev->data->rx_queues[queue_idx] = rxvq;
456 
457 	virtio_rxq_vec_setup(rxvq);
458 
459 	return 0;
460 }
461 
462 void
463 virtio_dev_rx_queue_release(void *rxq)
464 {
465 	struct virtnet_rx *rxvq = rxq;
466 	struct virtqueue *vq;
467 	const struct rte_memzone *mz;
468 
469 	if (rxvq == NULL)
470 		return;
471 
472 	/*
473 	 * rxvq is freed when vq is freed, and as mz should be freed after the
474 	 * del_queue, so we reserve the mz pointer first.
475 	 */
476 	vq = rxvq->vq;
477 	mz = rxvq->mz;
478 
479 	virtio_dev_queue_release(vq);
480 	rte_memzone_free(mz);
481 }
482 
483 static void
484 virtio_update_rxtx_handler(struct rte_eth_dev *dev,
485 			   const struct rte_eth_txconf *tx_conf)
486 {
487 	uint8_t use_simple_rxtx = 0;
488 	struct virtio_hw *hw = dev->data->dev_private;
489 
490 #if defined RTE_ARCH_X86
491 	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE3))
492 		use_simple_rxtx = 1;
493 #elif defined RTE_ARCH_ARM64 || defined CONFIG_RTE_ARCH_ARM
494 	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
495 		use_simple_rxtx = 1;
496 #endif
497 	/* Use simple rx/tx func if single segment and no offloads */
498 	if (use_simple_rxtx &&
499 	    (tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
500 	    !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
501 		PMD_INIT_LOG(INFO, "Using simple rx/tx path");
502 		dev->tx_pkt_burst = virtio_xmit_pkts_simple;
503 		dev->rx_pkt_burst = virtio_recv_pkts_vec;
504 		hw->use_simple_rxtx = use_simple_rxtx;
505 	}
506 }
507 
508 /*
509  * struct rte_eth_dev *dev: Used to update dev
510  * uint16_t nb_desc: Defaults to values read from config space
511  * unsigned int socket_id: Used to allocate memzone
512  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
513  * uint16_t queue_idx: Just used as an index in dev txq list
514  */
515 int
516 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
517 			uint16_t queue_idx,
518 			uint16_t nb_desc,
519 			unsigned int socket_id,
520 			const struct rte_eth_txconf *tx_conf)
521 {
522 	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
523 	struct virtnet_tx *txvq;
524 	struct virtqueue *vq;
525 	uint16_t tx_free_thresh;
526 	int ret;
527 
528 	PMD_INIT_FUNC_TRACE();
529 
530 	if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS)
531 	    != ETH_TXQ_FLAGS_NOXSUMS) {
532 		PMD_INIT_LOG(ERR, "TX checksum offload not supported\n");
533 		return -EINVAL;
534 	}
535 
536 	virtio_update_rxtx_handler(dev, tx_conf);
537 
538 	ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
539 			nb_desc, socket_id, (void **)&txvq);
540 	if (ret < 0) {
541 		PMD_INIT_LOG(ERR, "tvq initialization failed");
542 		return ret;
543 	}
544 	vq = txvq->vq;
545 
546 	tx_free_thresh = tx_conf->tx_free_thresh;
547 	if (tx_free_thresh == 0)
548 		tx_free_thresh =
549 			RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
550 
551 	if (tx_free_thresh >= (vq->vq_nentries - 3)) {
552 		RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
553 			"number of TX entries minus 3 (%u)."
554 			" (tx_free_thresh=%u port=%u queue=%u)\n",
555 			vq->vq_nentries - 3,
556 			tx_free_thresh, dev->data->port_id, queue_idx);
557 		return -EINVAL;
558 	}
559 
560 	vq->vq_free_thresh = tx_free_thresh;
561 
562 	dev->data->tx_queues[queue_idx] = txvq;
563 	return 0;
564 }
565 
566 void
567 virtio_dev_tx_queue_release(void *txq)
568 {
569 	struct virtnet_tx *txvq = txq;
570 	struct virtqueue *vq;
571 	const struct rte_memzone *mz;
572 	const struct rte_memzone *hdr_mz;
573 
574 	if (txvq == NULL)
575 		return;
576 
577 	/*
578 	 * txvq is freed when vq is freed, and as mz should be freed after the
579 	 * del_queue, so we reserve the mz pointer first.
580 	 */
581 	vq = txvq->vq;
582 	mz = txvq->mz;
583 	hdr_mz = txvq->virtio_net_hdr_mz;
584 
585 	virtio_dev_queue_release(vq);
586 	rte_memzone_free(mz);
587 	rte_memzone_free(hdr_mz);
588 }
589 
590 static void
591 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
592 {
593 	int error;
594 	/*
595 	 * Requeue the discarded mbuf. This should always be
596 	 * successful since it was just dequeued.
597 	 */
598 	error = virtqueue_enqueue_recv_refill(vq, m);
599 	if (unlikely(error)) {
600 		RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
601 		rte_pktmbuf_free(m);
602 	}
603 }
604 
605 static void
606 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
607 {
608 	uint32_t s = mbuf->pkt_len;
609 	struct ether_addr *ea;
610 
611 	if (s == 64) {
612 		stats->size_bins[1]++;
613 	} else if (s > 64 && s < 1024) {
614 		uint32_t bin;
615 
616 		/* count zeros, and offset into correct bin */
617 		bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
618 		stats->size_bins[bin]++;
619 	} else {
620 		if (s < 64)
621 			stats->size_bins[0]++;
622 		else if (s < 1519)
623 			stats->size_bins[6]++;
624 		else if (s >= 1519)
625 			stats->size_bins[7]++;
626 	}
627 
628 	ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
629 	if (is_multicast_ether_addr(ea)) {
630 		if (is_broadcast_ether_addr(ea))
631 			stats->broadcast++;
632 		else
633 			stats->multicast++;
634 	}
635 }
636 
637 /* Optionally fill offload information in structure */
638 static int
639 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
640 {
641 	struct rte_net_hdr_lens hdr_lens;
642 	uint32_t hdrlen, ptype;
643 	int l4_supported = 0;
644 
645 	/* nothing to do */
646 	if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
647 		return 0;
648 
649 	m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
650 
651 	ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
652 	m->packet_type = ptype;
653 	if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
654 	    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
655 	    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
656 		l4_supported = 1;
657 
658 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
659 		hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
660 		if (hdr->csum_start <= hdrlen && l4_supported) {
661 			m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
662 		} else {
663 			/* Unknown proto or tunnel, do sw cksum. We can assume
664 			 * the cksum field is in the first segment since the
665 			 * buffers we provided to the host are large enough.
666 			 * In case of SCTP, this will be wrong since it's a CRC
667 			 * but there's nothing we can do.
668 			 */
669 			uint16_t csum, off;
670 
671 			rte_raw_cksum_mbuf(m, hdr->csum_start,
672 				rte_pktmbuf_pkt_len(m) - hdr->csum_start,
673 				&csum);
674 			if (likely(csum != 0xffff))
675 				csum = ~csum;
676 			off = hdr->csum_offset + hdr->csum_start;
677 			if (rte_pktmbuf_data_len(m) >= off + 1)
678 				*rte_pktmbuf_mtod_offset(m, uint16_t *,
679 					off) = csum;
680 		}
681 	} else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
682 		m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
683 	}
684 
685 	return 0;
686 }
687 
688 static inline int
689 rx_offload_enabled(struct virtio_hw *hw)
690 {
691 	return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM);
692 }
693 
694 #define VIRTIO_MBUF_BURST_SZ 64
695 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
696 uint16_t
697 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
698 {
699 	struct virtnet_rx *rxvq = rx_queue;
700 	struct virtqueue *vq = rxvq->vq;
701 	struct virtio_hw *hw;
702 	struct rte_mbuf *rxm, *new_mbuf;
703 	uint16_t nb_used, num, nb_rx;
704 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
705 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
706 	int error;
707 	uint32_t i, nb_enqueued;
708 	uint32_t hdr_size;
709 	int offload;
710 	struct virtio_net_hdr *hdr;
711 
712 	nb_used = VIRTQUEUE_NUSED(vq);
713 
714 	virtio_rmb();
715 
716 	num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
717 	num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
718 	if (likely(num > DESC_PER_CACHELINE))
719 		num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
720 
721 	num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
722 	PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
723 
724 	hw = vq->hw;
725 	nb_rx = 0;
726 	nb_enqueued = 0;
727 	hdr_size = hw->vtnet_hdr_size;
728 	offload = rx_offload_enabled(hw);
729 
730 	for (i = 0; i < num ; i++) {
731 		rxm = rcv_pkts[i];
732 
733 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
734 
735 		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
736 			PMD_RX_LOG(ERR, "Packet drop");
737 			nb_enqueued++;
738 			virtio_discard_rxbuf(vq, rxm);
739 			rxvq->stats.errors++;
740 			continue;
741 		}
742 
743 		rxm->port = rxvq->port_id;
744 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
745 		rxm->ol_flags = 0;
746 		rxm->vlan_tci = 0;
747 
748 		rxm->nb_segs = 1;
749 		rxm->next = NULL;
750 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
751 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
752 
753 		hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
754 			RTE_PKTMBUF_HEADROOM - hdr_size);
755 
756 		if (hw->vlan_strip)
757 			rte_vlan_strip(rxm);
758 
759 		if (offload && virtio_rx_offload(rxm, hdr) < 0) {
760 			virtio_discard_rxbuf(vq, rxm);
761 			rxvq->stats.errors++;
762 			continue;
763 		}
764 
765 		VIRTIO_DUMP_PACKET(rxm, rxm->data_len);
766 
767 		rx_pkts[nb_rx++] = rxm;
768 
769 		rxvq->stats.bytes += rx_pkts[nb_rx - 1]->pkt_len;
770 		virtio_update_packet_stats(&rxvq->stats, rxm);
771 	}
772 
773 	rxvq->stats.packets += nb_rx;
774 
775 	/* Allocate new mbuf for the used descriptor */
776 	error = ENOSPC;
777 	while (likely(!virtqueue_full(vq))) {
778 		new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
779 		if (unlikely(new_mbuf == NULL)) {
780 			struct rte_eth_dev *dev
781 				= &rte_eth_devices[rxvq->port_id];
782 			dev->data->rx_mbuf_alloc_failed++;
783 			break;
784 		}
785 		error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
786 		if (unlikely(error)) {
787 			rte_pktmbuf_free(new_mbuf);
788 			break;
789 		}
790 		nb_enqueued++;
791 	}
792 
793 	if (likely(nb_enqueued)) {
794 		vq_update_avail_idx(vq);
795 
796 		if (unlikely(virtqueue_kick_prepare(vq))) {
797 			virtqueue_notify(vq);
798 			PMD_RX_LOG(DEBUG, "Notified");
799 		}
800 	}
801 
802 	return nb_rx;
803 }
804 
805 uint16_t
806 virtio_recv_mergeable_pkts(void *rx_queue,
807 			struct rte_mbuf **rx_pkts,
808 			uint16_t nb_pkts)
809 {
810 	struct virtnet_rx *rxvq = rx_queue;
811 	struct virtqueue *vq = rxvq->vq;
812 	struct virtio_hw *hw;
813 	struct rte_mbuf *rxm, *new_mbuf;
814 	uint16_t nb_used, num, nb_rx;
815 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
816 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
817 	struct rte_mbuf *prev;
818 	int error;
819 	uint32_t i, nb_enqueued;
820 	uint32_t seg_num;
821 	uint16_t extra_idx;
822 	uint32_t seg_res;
823 	uint32_t hdr_size;
824 	int offload;
825 
826 	nb_used = VIRTQUEUE_NUSED(vq);
827 
828 	virtio_rmb();
829 
830 	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
831 
832 	hw = vq->hw;
833 	nb_rx = 0;
834 	i = 0;
835 	nb_enqueued = 0;
836 	seg_num = 0;
837 	extra_idx = 0;
838 	seg_res = 0;
839 	hdr_size = hw->vtnet_hdr_size;
840 	offload = rx_offload_enabled(hw);
841 
842 	while (i < nb_used) {
843 		struct virtio_net_hdr_mrg_rxbuf *header;
844 
845 		if (nb_rx == nb_pkts)
846 			break;
847 
848 		num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1);
849 		if (num != 1)
850 			continue;
851 
852 		i++;
853 
854 		PMD_RX_LOG(DEBUG, "dequeue:%d", num);
855 		PMD_RX_LOG(DEBUG, "packet len:%d", len[0]);
856 
857 		rxm = rcv_pkts[0];
858 
859 		if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
860 			PMD_RX_LOG(ERR, "Packet drop");
861 			nb_enqueued++;
862 			virtio_discard_rxbuf(vq, rxm);
863 			rxvq->stats.errors++;
864 			continue;
865 		}
866 
867 		header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
868 			RTE_PKTMBUF_HEADROOM - hdr_size);
869 		seg_num = header->num_buffers;
870 
871 		if (seg_num == 0)
872 			seg_num = 1;
873 
874 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
875 		rxm->nb_segs = seg_num;
876 		rxm->next = NULL;
877 		rxm->ol_flags = 0;
878 		rxm->vlan_tci = 0;
879 		rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
880 		rxm->data_len = (uint16_t)(len[0] - hdr_size);
881 
882 		rxm->port = rxvq->port_id;
883 		rx_pkts[nb_rx] = rxm;
884 		prev = rxm;
885 
886 		if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) {
887 			virtio_discard_rxbuf(vq, rxm);
888 			rxvq->stats.errors++;
889 			continue;
890 		}
891 
892 		seg_res = seg_num - 1;
893 
894 		while (seg_res != 0) {
895 			/*
896 			 * Get extra segments for current uncompleted packet.
897 			 */
898 			uint16_t  rcv_cnt =
899 				RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
900 			if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
901 				uint32_t rx_num =
902 					virtqueue_dequeue_burst_rx(vq,
903 					rcv_pkts, len, rcv_cnt);
904 				i += rx_num;
905 				rcv_cnt = rx_num;
906 			} else {
907 				PMD_RX_LOG(ERR,
908 					   "No enough segments for packet.");
909 				nb_enqueued++;
910 				virtio_discard_rxbuf(vq, rxm);
911 				rxvq->stats.errors++;
912 				break;
913 			}
914 
915 			extra_idx = 0;
916 
917 			while (extra_idx < rcv_cnt) {
918 				rxm = rcv_pkts[extra_idx];
919 
920 				rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
921 				rxm->next = NULL;
922 				rxm->pkt_len = (uint32_t)(len[extra_idx]);
923 				rxm->data_len = (uint16_t)(len[extra_idx]);
924 
925 				if (prev)
926 					prev->next = rxm;
927 
928 				prev = rxm;
929 				rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
930 				extra_idx++;
931 			};
932 			seg_res -= rcv_cnt;
933 		}
934 
935 		if (hw->vlan_strip)
936 			rte_vlan_strip(rx_pkts[nb_rx]);
937 
938 		VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
939 			rx_pkts[nb_rx]->data_len);
940 
941 		rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len;
942 		virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]);
943 		nb_rx++;
944 	}
945 
946 	rxvq->stats.packets += nb_rx;
947 
948 	/* Allocate new mbuf for the used descriptor */
949 	error = ENOSPC;
950 	while (likely(!virtqueue_full(vq))) {
951 		new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
952 		if (unlikely(new_mbuf == NULL)) {
953 			struct rte_eth_dev *dev
954 				= &rte_eth_devices[rxvq->port_id];
955 			dev->data->rx_mbuf_alloc_failed++;
956 			break;
957 		}
958 		error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
959 		if (unlikely(error)) {
960 			rte_pktmbuf_free(new_mbuf);
961 			break;
962 		}
963 		nb_enqueued++;
964 	}
965 
966 	if (likely(nb_enqueued)) {
967 		vq_update_avail_idx(vq);
968 
969 		if (unlikely(virtqueue_kick_prepare(vq))) {
970 			virtqueue_notify(vq);
971 			PMD_RX_LOG(DEBUG, "Notified");
972 		}
973 	}
974 
975 	return nb_rx;
976 }
977 
978 uint16_t
979 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
980 {
981 	struct virtnet_tx *txvq = tx_queue;
982 	struct virtqueue *vq = txvq->vq;
983 	struct virtio_hw *hw = vq->hw;
984 	uint16_t hdr_size = hw->vtnet_hdr_size;
985 	uint16_t nb_used, nb_tx;
986 	int error;
987 
988 	if (unlikely(nb_pkts < 1))
989 		return nb_pkts;
990 
991 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
992 	nb_used = VIRTQUEUE_NUSED(vq);
993 
994 	virtio_rmb();
995 	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
996 		virtio_xmit_cleanup(vq, nb_used);
997 
998 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
999 		struct rte_mbuf *txm = tx_pkts[nb_tx];
1000 		int can_push = 0, use_indirect = 0, slots, need;
1001 
1002 		/* Do VLAN tag insertion */
1003 		if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1004 			error = rte_vlan_insert(&txm);
1005 			if (unlikely(error)) {
1006 				rte_pktmbuf_free(txm);
1007 				continue;
1008 			}
1009 		}
1010 
1011 		/* optimize ring usage */
1012 		if (vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) &&
1013 		    rte_mbuf_refcnt_read(txm) == 1 &&
1014 		    RTE_MBUF_DIRECT(txm) &&
1015 		    txm->nb_segs == 1 &&
1016 		    rte_pktmbuf_headroom(txm) >= hdr_size &&
1017 		    rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1018 				   __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1019 			can_push = 1;
1020 		else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
1021 			 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
1022 			use_indirect = 1;
1023 
1024 		/* How many main ring entries are needed to this Tx?
1025 		 * any_layout => number of segments
1026 		 * indirect   => 1
1027 		 * default    => number of segments + 1
1028 		 */
1029 		slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
1030 		need = slots - vq->vq_free_cnt;
1031 
1032 		/* Positive value indicates it need free vring descriptors */
1033 		if (unlikely(need > 0)) {
1034 			nb_used = VIRTQUEUE_NUSED(vq);
1035 			virtio_rmb();
1036 			need = RTE_MIN(need, (int)nb_used);
1037 
1038 			virtio_xmit_cleanup(vq, need);
1039 			need = slots - vq->vq_free_cnt;
1040 			if (unlikely(need > 0)) {
1041 				PMD_TX_LOG(ERR,
1042 					   "No free tx descriptors to transmit");
1043 				break;
1044 			}
1045 		}
1046 
1047 		/* Enqueue Packet buffers */
1048 		virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push);
1049 
1050 		txvq->stats.bytes += txm->pkt_len;
1051 		virtio_update_packet_stats(&txvq->stats, txm);
1052 	}
1053 
1054 	txvq->stats.packets += nb_tx;
1055 
1056 	if (likely(nb_tx)) {
1057 		vq_update_avail_idx(vq);
1058 
1059 		if (unlikely(virtqueue_kick_prepare(vq))) {
1060 			virtqueue_notify(vq);
1061 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1062 		}
1063 	}
1064 
1065 	return nb_tx;
1066 }
1067