xref: /dpdk/drivers/net/virtio/virtio_rxtx.c (revision ceb1ccd5d50c1a89ba8bdd97cc199e7f07422b98)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <stdint.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <errno.h>
39 
40 #include <rte_cycles.h>
41 #include <rte_memory.h>
42 #include <rte_memzone.h>
43 #include <rte_branch_prediction.h>
44 #include <rte_mempool.h>
45 #include <rte_malloc.h>
46 #include <rte_mbuf.h>
47 #include <rte_ether.h>
48 #include <rte_ethdev.h>
49 #include <rte_prefetch.h>
50 #include <rte_string_fns.h>
51 #include <rte_errno.h>
52 #include <rte_byteorder.h>
53 
54 #include "virtio_logs.h"
55 #include "virtio_ethdev.h"
56 #include "virtio_pci.h"
57 #include "virtqueue.h"
58 #include "virtio_rxtx.h"
59 
60 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
61 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
62 #else
63 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
64 #endif
65 
66 
67 #define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
68 	ETH_TXQ_FLAGS_NOOFFLOADS)
69 
70 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
71 static int use_simple_rxtx;
72 #endif
73 
74 static void
75 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
76 {
77 	struct vring_desc *dp, *dp_tail;
78 	struct vq_desc_extra *dxp;
79 	uint16_t desc_idx_last = desc_idx;
80 
81 	dp  = &vq->vq_ring.desc[desc_idx];
82 	dxp = &vq->vq_descx[desc_idx];
83 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
84 	if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
85 		while (dp->flags & VRING_DESC_F_NEXT) {
86 			desc_idx_last = dp->next;
87 			dp = &vq->vq_ring.desc[dp->next];
88 		}
89 	}
90 	dxp->ndescs = 0;
91 
92 	/*
93 	 * We must append the existing free chain, if any, to the end of
94 	 * newly freed chain. If the virtqueue was completely used, then
95 	 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
96 	 */
97 	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
98 		vq->vq_desc_head_idx = desc_idx;
99 	} else {
100 		dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
101 		dp_tail->next = desc_idx;
102 	}
103 
104 	vq->vq_desc_tail_idx = desc_idx_last;
105 	dp->next = VQ_RING_DESC_CHAIN_END;
106 }
107 
108 static uint16_t
109 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
110 			   uint32_t *len, uint16_t num)
111 {
112 	struct vring_used_elem *uep;
113 	struct rte_mbuf *cookie;
114 	uint16_t used_idx, desc_idx;
115 	uint16_t i;
116 
117 	/*  Caller does the check */
118 	for (i = 0; i < num ; i++) {
119 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
120 		uep = &vq->vq_ring.used->ring[used_idx];
121 		desc_idx = (uint16_t) uep->id;
122 		len[i] = uep->len;
123 		cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
124 
125 		if (unlikely(cookie == NULL)) {
126 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n",
127 				vq->vq_used_cons_idx);
128 			break;
129 		}
130 
131 		rte_prefetch0(cookie);
132 		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
133 		rx_pkts[i]  = cookie;
134 		vq->vq_used_cons_idx++;
135 		vq_ring_free_chain(vq, desc_idx);
136 		vq->vq_descx[desc_idx].cookie = NULL;
137 	}
138 
139 	return i;
140 }
141 
142 #ifndef DEFAULT_TX_FREE_THRESH
143 #define DEFAULT_TX_FREE_THRESH 32
144 #endif
145 
146 /* Cleanup from completed transmits. */
147 static void
148 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
149 {
150 	uint16_t i, used_idx, desc_idx;
151 	for (i = 0; i < num; i++) {
152 		struct vring_used_elem *uep;
153 		struct vq_desc_extra *dxp;
154 
155 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
156 		uep = &vq->vq_ring.used->ring[used_idx];
157 
158 		desc_idx = (uint16_t) uep->id;
159 		dxp = &vq->vq_descx[desc_idx];
160 		vq->vq_used_cons_idx++;
161 		vq_ring_free_chain(vq, desc_idx);
162 
163 		if (dxp->cookie != NULL) {
164 			rte_pktmbuf_free(dxp->cookie);
165 			dxp->cookie = NULL;
166 		}
167 	}
168 }
169 
170 
171 static inline int
172 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
173 {
174 	struct vq_desc_extra *dxp;
175 	struct virtio_hw *hw = vq->hw;
176 	struct vring_desc *start_dp;
177 	uint16_t needed = 1;
178 	uint16_t head_idx, idx;
179 
180 	if (unlikely(vq->vq_free_cnt == 0))
181 		return -ENOSPC;
182 	if (unlikely(vq->vq_free_cnt < needed))
183 		return -EMSGSIZE;
184 
185 	head_idx = vq->vq_desc_head_idx;
186 	if (unlikely(head_idx >= vq->vq_nentries))
187 		return -EFAULT;
188 
189 	idx = head_idx;
190 	dxp = &vq->vq_descx[idx];
191 	dxp->cookie = (void *)cookie;
192 	dxp->ndescs = needed;
193 
194 	start_dp = vq->vq_ring.desc;
195 	start_dp[idx].addr =
196 		(uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM
197 		- hw->vtnet_hdr_size);
198 	start_dp[idx].len =
199 		cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
200 	start_dp[idx].flags =  VRING_DESC_F_WRITE;
201 	idx = start_dp[idx].next;
202 	vq->vq_desc_head_idx = idx;
203 	if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
204 		vq->vq_desc_tail_idx = idx;
205 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
206 	vq_update_avail_ring(vq, head_idx);
207 
208 	return 0;
209 }
210 
211 static inline void
212 virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie,
213 		       uint16_t needed, int use_indirect, int can_push)
214 {
215 	struct vq_desc_extra *dxp;
216 	struct vring_desc *start_dp;
217 	uint16_t seg_num = cookie->nb_segs;
218 	uint16_t head_idx, idx;
219 	uint16_t head_size = txvq->hw->vtnet_hdr_size;
220 	unsigned long offs;
221 
222 	head_idx = txvq->vq_desc_head_idx;
223 	idx = head_idx;
224 	dxp = &txvq->vq_descx[idx];
225 	dxp->cookie = (void *)cookie;
226 	dxp->ndescs = needed;
227 
228 	start_dp = txvq->vq_ring.desc;
229 
230 	if (can_push) {
231 		/* put on zero'd transmit header (no offloads) */
232 		void *hdr = rte_pktmbuf_prepend(cookie, head_size);
233 
234 		memset(hdr, 0, head_size);
235 	} else if (use_indirect) {
236 		/* setup tx ring slot to point to indirect
237 		 * descriptor list stored in reserved region.
238 		 *
239 		 * the first slot in indirect ring is already preset
240 		 * to point to the header in reserved region
241 		 */
242 		struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
243 
244 		offs = idx * sizeof(struct virtio_tx_region)
245 			+ offsetof(struct virtio_tx_region, tx_indir);
246 
247 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem + offs;
248 		start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
249 		start_dp[idx].flags = VRING_DESC_F_INDIRECT;
250 
251 		/* loop below will fill in rest of the indirect elements */
252 		start_dp = txr[idx].tx_indir;
253 		idx = 1;
254 	} else {
255 		/* setup first tx ring slot to point to header
256 		 * stored in reserved region.
257 		 */
258 		offs = idx * sizeof(struct virtio_tx_region)
259 			+ offsetof(struct virtio_tx_region, tx_hdr);
260 
261 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem + offs;
262 		start_dp[idx].len   = txvq->hw->vtnet_hdr_size;
263 		start_dp[idx].flags = VRING_DESC_F_NEXT;
264 		idx = start_dp[idx].next;
265 	}
266 
267 	do {
268 		start_dp[idx].addr  = rte_mbuf_data_dma_addr(cookie);
269 		start_dp[idx].len   = cookie->data_len;
270 		start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
271 		idx = start_dp[idx].next;
272 	} while ((cookie = cookie->next) != NULL);
273 
274 	start_dp[idx].flags &= ~VRING_DESC_F_NEXT;
275 
276 	if (use_indirect)
277 		idx = txvq->vq_ring.desc[head_idx].next;
278 
279 	txvq->vq_desc_head_idx = idx;
280 	if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
281 		txvq->vq_desc_tail_idx = idx;
282 	txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed);
283 	vq_update_avail_ring(txvq, head_idx);
284 }
285 
286 static inline struct rte_mbuf *
287 rte_rxmbuf_alloc(struct rte_mempool *mp)
288 {
289 	struct rte_mbuf *m;
290 
291 	m = __rte_mbuf_raw_alloc(mp);
292 	__rte_mbuf_sanity_check_raw(m, 0);
293 
294 	return m;
295 }
296 
297 static void
298 virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
299 {
300 	struct rte_mbuf *m;
301 	int i, nbufs, error, size = vq->vq_nentries;
302 	struct vring *vr = &vq->vq_ring;
303 	uint8_t *ring_mem = vq->vq_ring_virt_mem;
304 
305 	PMD_INIT_FUNC_TRACE();
306 
307 	/*
308 	 * Reinitialise since virtio port might have been stopped and restarted
309 	 */
310 	memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size);
311 	vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN);
312 	vq->vq_used_cons_idx = 0;
313 	vq->vq_desc_head_idx = 0;
314 	vq->vq_avail_idx = 0;
315 	vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
316 	vq->vq_free_cnt = vq->vq_nentries;
317 	memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
318 
319 	vring_desc_init(vr->desc, size);
320 
321 	/*
322 	 * Disable device(host) interrupting guest
323 	 */
324 	virtqueue_disable_intr(vq);
325 
326 	/* Only rx virtqueue needs mbufs to be allocated at initialization */
327 	if (queue_type == VTNET_RQ) {
328 		if (vq->mpool == NULL)
329 			rte_exit(EXIT_FAILURE,
330 			"Cannot allocate initial mbufs for rx virtqueue");
331 
332 		/* Allocate blank mbufs for the each rx descriptor */
333 		nbufs = 0;
334 		error = ENOSPC;
335 
336 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
337 		if (use_simple_rxtx)
338 			for (i = 0; i < vq->vq_nentries; i++) {
339 				vq->vq_ring.avail->ring[i] = i;
340 				vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
341 			}
342 #endif
343 		memset(&vq->fake_mbuf, 0, sizeof(vq->fake_mbuf));
344 		for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++)
345 			vq->sw_ring[vq->vq_nentries + i] = &vq->fake_mbuf;
346 
347 		while (!virtqueue_full(vq)) {
348 			m = rte_rxmbuf_alloc(vq->mpool);
349 			if (m == NULL)
350 				break;
351 
352 			/******************************************
353 			*         Enqueue allocated buffers        *
354 			*******************************************/
355 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
356 			if (use_simple_rxtx)
357 				error = virtqueue_enqueue_recv_refill_simple(vq, m);
358 			else
359 #endif
360 				error = virtqueue_enqueue_recv_refill(vq, m);
361 			if (error) {
362 				rte_pktmbuf_free(m);
363 				break;
364 			}
365 			nbufs++;
366 		}
367 
368 		vq_update_avail_idx(vq);
369 
370 		PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
371 	} else if (queue_type == VTNET_TQ) {
372 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
373 		if (use_simple_rxtx) {
374 			int mid_idx  = vq->vq_nentries >> 1;
375 			for (i = 0; i < mid_idx; i++) {
376 				vq->vq_ring.avail->ring[i] = i + mid_idx;
377 				vq->vq_ring.desc[i + mid_idx].next = i;
378 				vq->vq_ring.desc[i + mid_idx].addr =
379 					vq->virtio_net_hdr_mem +
380 						i * vq->hw->vtnet_hdr_size;
381 				vq->vq_ring.desc[i + mid_idx].len =
382 					vq->hw->vtnet_hdr_size;
383 				vq->vq_ring.desc[i + mid_idx].flags =
384 					VRING_DESC_F_NEXT;
385 				vq->vq_ring.desc[i].flags = 0;
386 			}
387 			for (i = mid_idx; i < vq->vq_nentries; i++)
388 				vq->vq_ring.avail->ring[i] = i;
389 		}
390 #endif
391 	}
392 }
393 
394 void
395 virtio_dev_cq_start(struct rte_eth_dev *dev)
396 {
397 	struct virtio_hw *hw = dev->data->dev_private;
398 
399 	if (hw->cvq) {
400 		virtio_dev_vring_start(hw->cvq, VTNET_CQ);
401 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq);
402 	}
403 }
404 
405 void
406 virtio_dev_rxtx_start(struct rte_eth_dev *dev)
407 {
408 	/*
409 	 * Start receive and transmit vrings
410 	 * -	Setup vring structure for all queues
411 	 * -	Initialize descriptor for the rx vring
412 	 * -	Allocate blank mbufs for the each rx descriptor
413 	 *
414 	 */
415 	int i;
416 
417 	PMD_INIT_FUNC_TRACE();
418 
419 	/* Start rx vring. */
420 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
421 		virtio_dev_vring_start(dev->data->rx_queues[i], VTNET_RQ);
422 		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
423 	}
424 
425 	/* Start tx vring. */
426 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
427 		virtio_dev_vring_start(dev->data->tx_queues[i], VTNET_TQ);
428 		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
429 	}
430 }
431 
432 int
433 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
434 			uint16_t queue_idx,
435 			uint16_t nb_desc,
436 			unsigned int socket_id,
437 			__rte_unused const struct rte_eth_rxconf *rx_conf,
438 			struct rte_mempool *mp)
439 {
440 	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
441 	struct virtqueue *vq;
442 	int ret;
443 
444 	PMD_INIT_FUNC_TRACE();
445 	ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx,
446 			nb_desc, socket_id, &vq);
447 	if (ret < 0) {
448 		PMD_INIT_LOG(ERR, "rvq initialization failed");
449 		return ret;
450 	}
451 
452 	/* Create mempool for rx mbuf allocation */
453 	vq->mpool = mp;
454 
455 	dev->data->rx_queues[queue_idx] = vq;
456 
457 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
458 	virtio_rxq_vec_setup(vq);
459 #endif
460 
461 	return 0;
462 }
463 
464 void
465 virtio_dev_rx_queue_release(void *rxq)
466 {
467 	virtio_dev_queue_release(rxq);
468 }
469 
470 /*
471  * struct rte_eth_dev *dev: Used to update dev
472  * uint16_t nb_desc: Defaults to values read from config space
473  * unsigned int socket_id: Used to allocate memzone
474  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
475  * uint16_t queue_idx: Just used as an index in dev txq list
476  */
477 int
478 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
479 			uint16_t queue_idx,
480 			uint16_t nb_desc,
481 			unsigned int socket_id,
482 			const struct rte_eth_txconf *tx_conf)
483 {
484 	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
485 
486 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
487 	struct virtio_hw *hw = dev->data->dev_private;
488 #endif
489 	struct virtqueue *vq;
490 	uint16_t tx_free_thresh;
491 	int ret;
492 
493 	PMD_INIT_FUNC_TRACE();
494 
495 	if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS)
496 	    != ETH_TXQ_FLAGS_NOXSUMS) {
497 		PMD_INIT_LOG(ERR, "TX checksum offload not supported\n");
498 		return -EINVAL;
499 	}
500 
501 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
502 	/* Use simple rx/tx func if single segment and no offloads */
503 	if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
504 	     !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
505 		PMD_INIT_LOG(INFO, "Using simple rx/tx path");
506 		dev->tx_pkt_burst = virtio_xmit_pkts_simple;
507 		dev->rx_pkt_burst = virtio_recv_pkts_vec;
508 		use_simple_rxtx = 1;
509 	}
510 #endif
511 
512 	ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
513 			nb_desc, socket_id, &vq);
514 	if (ret < 0) {
515 		PMD_INIT_LOG(ERR, "rvq initialization failed");
516 		return ret;
517 	}
518 
519 	tx_free_thresh = tx_conf->tx_free_thresh;
520 	if (tx_free_thresh == 0)
521 		tx_free_thresh =
522 			RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
523 
524 	if (tx_free_thresh >= (vq->vq_nentries - 3)) {
525 		RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
526 			"number of TX entries minus 3 (%u)."
527 			" (tx_free_thresh=%u port=%u queue=%u)\n",
528 			vq->vq_nentries - 3,
529 			tx_free_thresh, dev->data->port_id, queue_idx);
530 		return -EINVAL;
531 	}
532 
533 	vq->vq_free_thresh = tx_free_thresh;
534 
535 	dev->data->tx_queues[queue_idx] = vq;
536 	return 0;
537 }
538 
539 void
540 virtio_dev_tx_queue_release(void *txq)
541 {
542 	virtio_dev_queue_release(txq);
543 }
544 
545 static void
546 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
547 {
548 	int error;
549 	/*
550 	 * Requeue the discarded mbuf. This should always be
551 	 * successful since it was just dequeued.
552 	 */
553 	error = virtqueue_enqueue_recv_refill(vq, m);
554 	if (unlikely(error)) {
555 		RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
556 		rte_pktmbuf_free(m);
557 	}
558 }
559 
560 static void
561 virtio_update_packet_stats(struct virtqueue *vq, struct rte_mbuf *mbuf)
562 {
563 	uint32_t s = mbuf->pkt_len;
564 	struct ether_addr *ea;
565 
566 	if (s == 64) {
567 		vq->size_bins[1]++;
568 	} else if (s > 64 && s < 1024) {
569 		uint32_t bin;
570 
571 		/* count zeros, and offset into correct bin */
572 		bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
573 		vq->size_bins[bin]++;
574 	} else {
575 		if (s < 64)
576 			vq->size_bins[0]++;
577 		else if (s < 1519)
578 			vq->size_bins[6]++;
579 		else if (s >= 1519)
580 			vq->size_bins[7]++;
581 	}
582 
583 	ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
584 	if (is_multicast_ether_addr(ea)) {
585 		if (is_broadcast_ether_addr(ea))
586 			vq->broadcast++;
587 		else
588 			vq->multicast++;
589 	}
590 }
591 
592 #define VIRTIO_MBUF_BURST_SZ 64
593 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
594 uint16_t
595 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
596 {
597 	struct virtqueue *rxvq = rx_queue;
598 	struct virtio_hw *hw;
599 	struct rte_mbuf *rxm, *new_mbuf;
600 	uint16_t nb_used, num, nb_rx;
601 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
602 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
603 	int error;
604 	uint32_t i, nb_enqueued;
605 	uint32_t hdr_size;
606 
607 	nb_used = VIRTQUEUE_NUSED(rxvq);
608 
609 	virtio_rmb();
610 
611 	num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
612 	num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
613 	if (likely(num > DESC_PER_CACHELINE))
614 		num = num - ((rxvq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
615 
616 	num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, num);
617 	PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
618 
619 	hw = rxvq->hw;
620 	nb_rx = 0;
621 	nb_enqueued = 0;
622 	hdr_size = hw->vtnet_hdr_size;
623 
624 	for (i = 0; i < num ; i++) {
625 		rxm = rcv_pkts[i];
626 
627 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
628 
629 		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
630 			PMD_RX_LOG(ERR, "Packet drop");
631 			nb_enqueued++;
632 			virtio_discard_rxbuf(rxvq, rxm);
633 			rxvq->errors++;
634 			continue;
635 		}
636 
637 		rxm->port = rxvq->port_id;
638 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
639 		rxm->ol_flags = 0;
640 		rxm->vlan_tci = 0;
641 
642 		rxm->nb_segs = 1;
643 		rxm->next = NULL;
644 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
645 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
646 
647 		if (hw->vlan_strip)
648 			rte_vlan_strip(rxm);
649 
650 		VIRTIO_DUMP_PACKET(rxm, rxm->data_len);
651 
652 		rx_pkts[nb_rx++] = rxm;
653 
654 		rxvq->bytes += rx_pkts[nb_rx - 1]->pkt_len;
655 		virtio_update_packet_stats(rxvq, rxm);
656 	}
657 
658 	rxvq->packets += nb_rx;
659 
660 	/* Allocate new mbuf for the used descriptor */
661 	error = ENOSPC;
662 	while (likely(!virtqueue_full(rxvq))) {
663 		new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
664 		if (unlikely(new_mbuf == NULL)) {
665 			struct rte_eth_dev *dev
666 				= &rte_eth_devices[rxvq->port_id];
667 			dev->data->rx_mbuf_alloc_failed++;
668 			break;
669 		}
670 		error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
671 		if (unlikely(error)) {
672 			rte_pktmbuf_free(new_mbuf);
673 			break;
674 		}
675 		nb_enqueued++;
676 	}
677 
678 	if (likely(nb_enqueued)) {
679 		vq_update_avail_idx(rxvq);
680 
681 		if (unlikely(virtqueue_kick_prepare(rxvq))) {
682 			virtqueue_notify(rxvq);
683 			PMD_RX_LOG(DEBUG, "Notified\n");
684 		}
685 	}
686 
687 	return nb_rx;
688 }
689 
690 uint16_t
691 virtio_recv_mergeable_pkts(void *rx_queue,
692 			struct rte_mbuf **rx_pkts,
693 			uint16_t nb_pkts)
694 {
695 	struct virtqueue *rxvq = rx_queue;
696 	struct virtio_hw *hw;
697 	struct rte_mbuf *rxm, *new_mbuf;
698 	uint16_t nb_used, num, nb_rx;
699 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
700 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
701 	struct rte_mbuf *prev;
702 	int error;
703 	uint32_t i, nb_enqueued;
704 	uint32_t seg_num;
705 	uint16_t extra_idx;
706 	uint32_t seg_res;
707 	uint32_t hdr_size;
708 
709 	nb_used = VIRTQUEUE_NUSED(rxvq);
710 
711 	virtio_rmb();
712 
713 	PMD_RX_LOG(DEBUG, "used:%d\n", nb_used);
714 
715 	hw = rxvq->hw;
716 	nb_rx = 0;
717 	i = 0;
718 	nb_enqueued = 0;
719 	seg_num = 0;
720 	extra_idx = 0;
721 	seg_res = 0;
722 	hdr_size = hw->vtnet_hdr_size;
723 
724 	while (i < nb_used) {
725 		struct virtio_net_hdr_mrg_rxbuf *header;
726 
727 		if (nb_rx == nb_pkts)
728 			break;
729 
730 		num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, 1);
731 		if (num != 1)
732 			continue;
733 
734 		i++;
735 
736 		PMD_RX_LOG(DEBUG, "dequeue:%d\n", num);
737 		PMD_RX_LOG(DEBUG, "packet len:%d\n", len[0]);
738 
739 		rxm = rcv_pkts[0];
740 
741 		if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
742 			PMD_RX_LOG(ERR, "Packet drop\n");
743 			nb_enqueued++;
744 			virtio_discard_rxbuf(rxvq, rxm);
745 			rxvq->errors++;
746 			continue;
747 		}
748 
749 		header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
750 			RTE_PKTMBUF_HEADROOM - hdr_size);
751 		seg_num = header->num_buffers;
752 
753 		if (seg_num == 0)
754 			seg_num = 1;
755 
756 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
757 		rxm->nb_segs = seg_num;
758 		rxm->next = NULL;
759 		rxm->ol_flags = 0;
760 		rxm->vlan_tci = 0;
761 		rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
762 		rxm->data_len = (uint16_t)(len[0] - hdr_size);
763 
764 		rxm->port = rxvq->port_id;
765 		rx_pkts[nb_rx] = rxm;
766 		prev = rxm;
767 
768 		seg_res = seg_num - 1;
769 
770 		while (seg_res != 0) {
771 			/*
772 			 * Get extra segments for current uncompleted packet.
773 			 */
774 			uint16_t  rcv_cnt =
775 				RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
776 			if (likely(VIRTQUEUE_NUSED(rxvq) >= rcv_cnt)) {
777 				uint32_t rx_num =
778 					virtqueue_dequeue_burst_rx(rxvq,
779 					rcv_pkts, len, rcv_cnt);
780 				i += rx_num;
781 				rcv_cnt = rx_num;
782 			} else {
783 				PMD_RX_LOG(ERR,
784 					"No enough segments for packet.\n");
785 				nb_enqueued++;
786 				virtio_discard_rxbuf(rxvq, rxm);
787 				rxvq->errors++;
788 				break;
789 			}
790 
791 			extra_idx = 0;
792 
793 			while (extra_idx < rcv_cnt) {
794 				rxm = rcv_pkts[extra_idx];
795 
796 				rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
797 				rxm->next = NULL;
798 				rxm->pkt_len = (uint32_t)(len[extra_idx]);
799 				rxm->data_len = (uint16_t)(len[extra_idx]);
800 
801 				if (prev)
802 					prev->next = rxm;
803 
804 				prev = rxm;
805 				rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
806 				extra_idx++;
807 			};
808 			seg_res -= rcv_cnt;
809 		}
810 
811 		if (hw->vlan_strip)
812 			rte_vlan_strip(rx_pkts[nb_rx]);
813 
814 		VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
815 			rx_pkts[nb_rx]->data_len);
816 
817 		rxvq->bytes += rx_pkts[nb_rx]->pkt_len;
818 		virtio_update_packet_stats(rxvq, rx_pkts[nb_rx]);
819 		nb_rx++;
820 	}
821 
822 	rxvq->packets += nb_rx;
823 
824 	/* Allocate new mbuf for the used descriptor */
825 	error = ENOSPC;
826 	while (likely(!virtqueue_full(rxvq))) {
827 		new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
828 		if (unlikely(new_mbuf == NULL)) {
829 			struct rte_eth_dev *dev
830 				= &rte_eth_devices[rxvq->port_id];
831 			dev->data->rx_mbuf_alloc_failed++;
832 			break;
833 		}
834 		error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
835 		if (unlikely(error)) {
836 			rte_pktmbuf_free(new_mbuf);
837 			break;
838 		}
839 		nb_enqueued++;
840 	}
841 
842 	if (likely(nb_enqueued)) {
843 		vq_update_avail_idx(rxvq);
844 
845 		if (unlikely(virtqueue_kick_prepare(rxvq))) {
846 			virtqueue_notify(rxvq);
847 			PMD_RX_LOG(DEBUG, "Notified");
848 		}
849 	}
850 
851 	return nb_rx;
852 }
853 
854 uint16_t
855 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
856 {
857 	struct virtqueue *txvq = tx_queue;
858 	struct virtio_hw *hw = txvq->hw;
859 	uint16_t hdr_size = hw->vtnet_hdr_size;
860 	uint16_t nb_used, nb_tx;
861 	int error;
862 
863 	if (unlikely(nb_pkts < 1))
864 		return nb_pkts;
865 
866 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
867 	nb_used = VIRTQUEUE_NUSED(txvq);
868 
869 	virtio_rmb();
870 	if (likely(nb_used > txvq->vq_nentries - txvq->vq_free_thresh))
871 		virtio_xmit_cleanup(txvq, nb_used);
872 
873 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
874 		struct rte_mbuf *txm = tx_pkts[nb_tx];
875 		int can_push = 0, use_indirect = 0, slots, need;
876 
877 		/* Do VLAN tag insertion */
878 		if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
879 			error = rte_vlan_insert(&txm);
880 			if (unlikely(error)) {
881 				rte_pktmbuf_free(txm);
882 				continue;
883 			}
884 		}
885 
886 		/* optimize ring usage */
887 		if (vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) &&
888 		    rte_mbuf_refcnt_read(txm) == 1 &&
889 		    txm->nb_segs == 1 &&
890 		    rte_pktmbuf_headroom(txm) >= hdr_size &&
891 		    rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
892 				   __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
893 			can_push = 1;
894 		else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
895 			 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
896 			use_indirect = 1;
897 
898 		/* How many main ring entries are needed to this Tx?
899 		 * any_layout => number of segments
900 		 * indirect   => 1
901 		 * default    => number of segments + 1
902 		 */
903 		slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
904 		need = slots - txvq->vq_free_cnt;
905 
906 		/* Positive value indicates it need free vring descriptors */
907 		if (unlikely(need > 0)) {
908 			nb_used = VIRTQUEUE_NUSED(txvq);
909 			virtio_rmb();
910 			need = RTE_MIN(need, (int)nb_used);
911 
912 			virtio_xmit_cleanup(txvq, need);
913 			need = slots - txvq->vq_free_cnt;
914 			if (unlikely(need > 0)) {
915 				PMD_TX_LOG(ERR,
916 					   "No free tx descriptors to transmit");
917 				break;
918 			}
919 		}
920 
921 		/* Enqueue Packet buffers */
922 		virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push);
923 
924 		txvq->bytes += txm->pkt_len;
925 		virtio_update_packet_stats(txvq, txm);
926 	}
927 
928 	txvq->packets += nb_tx;
929 
930 	if (likely(nb_tx)) {
931 		vq_update_avail_idx(txvq);
932 
933 		if (unlikely(virtqueue_kick_prepare(txvq))) {
934 			virtqueue_notify(txvq);
935 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
936 		}
937 	}
938 
939 	return nb_tx;
940 }
941