xref: /dpdk/drivers/net/virtio/virtio_rxtx.c (revision d0a6a32687cf12bc97ae26b6a3d2ce14e44aa2a5)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <stdint.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <errno.h>
39 
40 #include <rte_cycles.h>
41 #include <rte_memory.h>
42 #include <rte_memzone.h>
43 #include <rte_branch_prediction.h>
44 #include <rte_mempool.h>
45 #include <rte_malloc.h>
46 #include <rte_mbuf.h>
47 #include <rte_ether.h>
48 #include <rte_ethdev.h>
49 #include <rte_prefetch.h>
50 #include <rte_string_fns.h>
51 #include <rte_errno.h>
52 #include <rte_byteorder.h>
53 
54 #include "virtio_logs.h"
55 #include "virtio_ethdev.h"
56 #include "virtio_pci.h"
57 #include "virtqueue.h"
58 #include "virtio_rxtx.h"
59 
60 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
61 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
62 #else
63 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
64 #endif
65 
66 
67 #define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
68 	ETH_TXQ_FLAGS_NOOFFLOADS)
69 
70 static int use_simple_rxtx;
71 
72 static void
73 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
74 {
75 	struct vring_desc *dp, *dp_tail;
76 	struct vq_desc_extra *dxp;
77 	uint16_t desc_idx_last = desc_idx;
78 
79 	dp  = &vq->vq_ring.desc[desc_idx];
80 	dxp = &vq->vq_descx[desc_idx];
81 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
82 	if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
83 		while (dp->flags & VRING_DESC_F_NEXT) {
84 			desc_idx_last = dp->next;
85 			dp = &vq->vq_ring.desc[dp->next];
86 		}
87 	}
88 	dxp->ndescs = 0;
89 
90 	/*
91 	 * We must append the existing free chain, if any, to the end of
92 	 * newly freed chain. If the virtqueue was completely used, then
93 	 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
94 	 */
95 	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
96 		vq->vq_desc_head_idx = desc_idx;
97 	} else {
98 		dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
99 		dp_tail->next = desc_idx;
100 	}
101 
102 	vq->vq_desc_tail_idx = desc_idx_last;
103 	dp->next = VQ_RING_DESC_CHAIN_END;
104 }
105 
106 static uint16_t
107 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
108 			   uint32_t *len, uint16_t num)
109 {
110 	struct vring_used_elem *uep;
111 	struct rte_mbuf *cookie;
112 	uint16_t used_idx, desc_idx;
113 	uint16_t i;
114 
115 	/*  Caller does the check */
116 	for (i = 0; i < num ; i++) {
117 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
118 		uep = &vq->vq_ring.used->ring[used_idx];
119 		desc_idx = (uint16_t) uep->id;
120 		len[i] = uep->len;
121 		cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
122 
123 		if (unlikely(cookie == NULL)) {
124 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n",
125 				vq->vq_used_cons_idx);
126 			break;
127 		}
128 
129 		rte_prefetch0(cookie);
130 		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
131 		rx_pkts[i]  = cookie;
132 		vq->vq_used_cons_idx++;
133 		vq_ring_free_chain(vq, desc_idx);
134 		vq->vq_descx[desc_idx].cookie = NULL;
135 	}
136 
137 	return i;
138 }
139 
140 #ifndef DEFAULT_TX_FREE_THRESH
141 #define DEFAULT_TX_FREE_THRESH 32
142 #endif
143 
144 /* Cleanup from completed transmits. */
145 static void
146 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
147 {
148 	uint16_t i, used_idx, desc_idx;
149 	for (i = 0; i < num; i++) {
150 		struct vring_used_elem *uep;
151 		struct vq_desc_extra *dxp;
152 
153 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
154 		uep = &vq->vq_ring.used->ring[used_idx];
155 
156 		desc_idx = (uint16_t) uep->id;
157 		dxp = &vq->vq_descx[desc_idx];
158 		vq->vq_used_cons_idx++;
159 		vq_ring_free_chain(vq, desc_idx);
160 
161 		if (dxp->cookie != NULL) {
162 			rte_pktmbuf_free(dxp->cookie);
163 			dxp->cookie = NULL;
164 		}
165 	}
166 }
167 
168 
169 static inline int
170 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
171 {
172 	struct vq_desc_extra *dxp;
173 	struct virtio_hw *hw = vq->hw;
174 	struct vring_desc *start_dp;
175 	uint16_t needed = 1;
176 	uint16_t head_idx, idx;
177 
178 	if (unlikely(vq->vq_free_cnt == 0))
179 		return -ENOSPC;
180 	if (unlikely(vq->vq_free_cnt < needed))
181 		return -EMSGSIZE;
182 
183 	head_idx = vq->vq_desc_head_idx;
184 	if (unlikely(head_idx >= vq->vq_nentries))
185 		return -EFAULT;
186 
187 	idx = head_idx;
188 	dxp = &vq->vq_descx[idx];
189 	dxp->cookie = (void *)cookie;
190 	dxp->ndescs = needed;
191 
192 	start_dp = vq->vq_ring.desc;
193 	start_dp[idx].addr =
194 		(uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM
195 		- hw->vtnet_hdr_size);
196 	start_dp[idx].len =
197 		cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
198 	start_dp[idx].flags =  VRING_DESC_F_WRITE;
199 	idx = start_dp[idx].next;
200 	vq->vq_desc_head_idx = idx;
201 	if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
202 		vq->vq_desc_tail_idx = idx;
203 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
204 	vq_update_avail_ring(vq, head_idx);
205 
206 	return 0;
207 }
208 
209 static int
210 virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
211 {
212 	struct vq_desc_extra *dxp;
213 	struct vring_desc *start_dp;
214 	uint16_t seg_num = cookie->nb_segs;
215 	uint16_t needed = 1 + seg_num;
216 	uint16_t head_idx, idx;
217 	size_t head_size = txvq->hw->vtnet_hdr_size;
218 
219 	if (unlikely(txvq->vq_free_cnt == 0))
220 		return -ENOSPC;
221 	if (unlikely(txvq->vq_free_cnt < needed))
222 		return -EMSGSIZE;
223 	head_idx = txvq->vq_desc_head_idx;
224 	if (unlikely(head_idx >= txvq->vq_nentries))
225 		return -EFAULT;
226 
227 	idx = head_idx;
228 	dxp = &txvq->vq_descx[idx];
229 	dxp->cookie = (void *)cookie;
230 	dxp->ndescs = needed;
231 
232 	start_dp = txvq->vq_ring.desc;
233 	start_dp[idx].addr =
234 		txvq->virtio_net_hdr_mem + idx * head_size;
235 	start_dp[idx].len = head_size;
236 	start_dp[idx].flags = VRING_DESC_F_NEXT;
237 
238 	for (; ((seg_num > 0) && (cookie != NULL)); seg_num--) {
239 		idx = start_dp[idx].next;
240 		start_dp[idx].addr  = RTE_MBUF_DATA_DMA_ADDR(cookie);
241 		start_dp[idx].len   = cookie->data_len;
242 		start_dp[idx].flags = VRING_DESC_F_NEXT;
243 		cookie = cookie->next;
244 	}
245 
246 	start_dp[idx].flags &= ~VRING_DESC_F_NEXT;
247 	idx = start_dp[idx].next;
248 	txvq->vq_desc_head_idx = idx;
249 	if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
250 		txvq->vq_desc_tail_idx = idx;
251 	txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed);
252 	vq_update_avail_ring(txvq, head_idx);
253 
254 	return 0;
255 }
256 
257 static inline struct rte_mbuf *
258 rte_rxmbuf_alloc(struct rte_mempool *mp)
259 {
260 	struct rte_mbuf *m;
261 
262 	m = __rte_mbuf_raw_alloc(mp);
263 	__rte_mbuf_sanity_check_raw(m, 0);
264 
265 	return m;
266 }
267 
268 static void
269 virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
270 {
271 	struct rte_mbuf *m;
272 	int i, nbufs, error, size = vq->vq_nentries;
273 	struct vring *vr = &vq->vq_ring;
274 	uint8_t *ring_mem = vq->vq_ring_virt_mem;
275 
276 	PMD_INIT_FUNC_TRACE();
277 
278 	/*
279 	 * Reinitialise since virtio port might have been stopped and restarted
280 	 */
281 	memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size);
282 	vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN);
283 	vq->vq_used_cons_idx = 0;
284 	vq->vq_desc_head_idx = 0;
285 	vq->vq_avail_idx = 0;
286 	vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
287 	vq->vq_free_cnt = vq->vq_nentries;
288 	memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
289 
290 	/* Chain all the descriptors in the ring with an END */
291 	for (i = 0; i < size - 1; i++)
292 		vr->desc[i].next = (uint16_t)(i + 1);
293 	vr->desc[i].next = VQ_RING_DESC_CHAIN_END;
294 
295 	/*
296 	 * Disable device(host) interrupting guest
297 	 */
298 	virtqueue_disable_intr(vq);
299 
300 	/* Only rx virtqueue needs mbufs to be allocated at initialization */
301 	if (queue_type == VTNET_RQ) {
302 		if (vq->mpool == NULL)
303 			rte_exit(EXIT_FAILURE,
304 			"Cannot allocate initial mbufs for rx virtqueue");
305 
306 		/* Allocate blank mbufs for the each rx descriptor */
307 		nbufs = 0;
308 		error = ENOSPC;
309 
310 		if (use_simple_rxtx)
311 			for (i = 0; i < vq->vq_nentries; i++) {
312 				vq->vq_ring.avail->ring[i] = i;
313 				vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
314 			}
315 
316 		memset(&vq->fake_mbuf, 0, sizeof(vq->fake_mbuf));
317 		for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++)
318 			vq->sw_ring[vq->vq_nentries + i] = &vq->fake_mbuf;
319 
320 		while (!virtqueue_full(vq)) {
321 			m = rte_rxmbuf_alloc(vq->mpool);
322 			if (m == NULL)
323 				break;
324 
325 			/******************************************
326 			*         Enqueue allocated buffers        *
327 			*******************************************/
328 			if (use_simple_rxtx)
329 				error = virtqueue_enqueue_recv_refill_simple(vq, m);
330 			else
331 				error = virtqueue_enqueue_recv_refill(vq, m);
332 			if (error) {
333 				rte_pktmbuf_free(m);
334 				break;
335 			}
336 			nbufs++;
337 		}
338 
339 		vq_update_avail_idx(vq);
340 
341 		PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
342 	} else if (queue_type == VTNET_TQ) {
343 		if (use_simple_rxtx) {
344 			int mid_idx  = vq->vq_nentries >> 1;
345 			for (i = 0; i < mid_idx; i++) {
346 				vq->vq_ring.avail->ring[i] = i + mid_idx;
347 				vq->vq_ring.desc[i + mid_idx].next = i;
348 				vq->vq_ring.desc[i + mid_idx].addr =
349 					vq->virtio_net_hdr_mem +
350 						mid_idx * vq->hw->vtnet_hdr_size;
351 				vq->vq_ring.desc[i + mid_idx].len =
352 					vq->hw->vtnet_hdr_size;
353 				vq->vq_ring.desc[i + mid_idx].flags =
354 					VRING_DESC_F_NEXT;
355 				vq->vq_ring.desc[i].flags = 0;
356 			}
357 			for (i = mid_idx; i < vq->vq_nentries; i++)
358 				vq->vq_ring.avail->ring[i] = i;
359 		}
360 	}
361 }
362 
363 void
364 virtio_dev_cq_start(struct rte_eth_dev *dev)
365 {
366 	struct virtio_hw *hw = dev->data->dev_private;
367 
368 	if (hw->cvq) {
369 		virtio_dev_vring_start(hw->cvq, VTNET_CQ);
370 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq);
371 	}
372 }
373 
374 void
375 virtio_dev_rxtx_start(struct rte_eth_dev *dev)
376 {
377 	/*
378 	 * Start receive and transmit vrings
379 	 * -	Setup vring structure for all queues
380 	 * -	Initialize descriptor for the rx vring
381 	 * -	Allocate blank mbufs for the each rx descriptor
382 	 *
383 	 */
384 	int i;
385 
386 	PMD_INIT_FUNC_TRACE();
387 
388 	/* Start rx vring. */
389 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
390 		virtio_dev_vring_start(dev->data->rx_queues[i], VTNET_RQ);
391 		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
392 	}
393 
394 	/* Start tx vring. */
395 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
396 		virtio_dev_vring_start(dev->data->tx_queues[i], VTNET_TQ);
397 		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
398 	}
399 }
400 
401 int
402 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
403 			uint16_t queue_idx,
404 			uint16_t nb_desc,
405 			unsigned int socket_id,
406 			__rte_unused const struct rte_eth_rxconf *rx_conf,
407 			struct rte_mempool *mp)
408 {
409 	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
410 	struct virtqueue *vq;
411 	int ret;
412 
413 	PMD_INIT_FUNC_TRACE();
414 	ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx,
415 			nb_desc, socket_id, &vq);
416 	if (ret < 0) {
417 		PMD_INIT_LOG(ERR, "rvq initialization failed");
418 		return ret;
419 	}
420 
421 	/* Create mempool for rx mbuf allocation */
422 	vq->mpool = mp;
423 
424 	dev->data->rx_queues[queue_idx] = vq;
425 
426 	virtio_rxq_vec_setup(vq);
427 
428 	return 0;
429 }
430 
431 void
432 virtio_dev_rx_queue_release(void *rxq)
433 {
434 	virtio_dev_queue_release(rxq);
435 }
436 
437 /*
438  * struct rte_eth_dev *dev: Used to update dev
439  * uint16_t nb_desc: Defaults to values read from config space
440  * unsigned int socket_id: Used to allocate memzone
441  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
442  * uint16_t queue_idx: Just used as an index in dev txq list
443  */
444 int
445 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
446 			uint16_t queue_idx,
447 			uint16_t nb_desc,
448 			unsigned int socket_id,
449 			const struct rte_eth_txconf *tx_conf)
450 {
451 	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
452 	struct virtio_hw *hw = dev->data->dev_private;
453 	struct virtqueue *vq;
454 	uint16_t tx_free_thresh;
455 	int ret;
456 
457 	PMD_INIT_FUNC_TRACE();
458 
459 	if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS)
460 	    != ETH_TXQ_FLAGS_NOXSUMS) {
461 		PMD_INIT_LOG(ERR, "TX checksum offload not supported\n");
462 		return -EINVAL;
463 	}
464 
465 	/* Use simple rx/tx func if single segment and no offloads */
466 	if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
467 	     !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
468 		PMD_INIT_LOG(INFO, "Using simple rx/tx path");
469 		dev->tx_pkt_burst = virtio_xmit_pkts_simple;
470 		dev->rx_pkt_burst = virtio_recv_pkts_vec;
471 		use_simple_rxtx = 1;
472 	}
473 
474 	ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
475 			nb_desc, socket_id, &vq);
476 	if (ret < 0) {
477 		PMD_INIT_LOG(ERR, "rvq initialization failed");
478 		return ret;
479 	}
480 
481 	tx_free_thresh = tx_conf->tx_free_thresh;
482 	if (tx_free_thresh == 0)
483 		tx_free_thresh =
484 			RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
485 
486 	if (tx_free_thresh >= (vq->vq_nentries - 3)) {
487 		RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
488 			"number of TX entries minus 3 (%u)."
489 			" (tx_free_thresh=%u port=%u queue=%u)\n",
490 			vq->vq_nentries - 3,
491 			tx_free_thresh, dev->data->port_id, queue_idx);
492 		return -EINVAL;
493 	}
494 
495 	vq->vq_free_thresh = tx_free_thresh;
496 
497 	dev->data->tx_queues[queue_idx] = vq;
498 	return 0;
499 }
500 
501 void
502 virtio_dev_tx_queue_release(void *txq)
503 {
504 	virtio_dev_queue_release(txq);
505 }
506 
507 static void
508 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
509 {
510 	int error;
511 	/*
512 	 * Requeue the discarded mbuf. This should always be
513 	 * successful since it was just dequeued.
514 	 */
515 	error = virtqueue_enqueue_recv_refill(vq, m);
516 	if (unlikely(error)) {
517 		RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
518 		rte_pktmbuf_free(m);
519 	}
520 }
521 
522 static void
523 virtio_update_packet_stats(struct virtqueue *vq, struct rte_mbuf *mbuf)
524 {
525 	uint32_t s = mbuf->pkt_len;
526 	struct ether_addr *ea;
527 
528 	if (s == 64) {
529 		vq->size_bins[1]++;
530 	} else if (s > 64 && s < 1024) {
531 		uint32_t bin;
532 
533 		/* count zeros, and offset into correct bin */
534 		bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
535 		vq->size_bins[bin]++;
536 	} else {
537 		if (s < 64)
538 			vq->size_bins[0]++;
539 		else if (s < 1519)
540 			vq->size_bins[6]++;
541 		else if (s >= 1519)
542 			vq->size_bins[7]++;
543 	}
544 
545 	ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
546 	vq->multicast += is_multicast_ether_addr(ea);
547 	vq->broadcast += is_broadcast_ether_addr(ea);
548 }
549 
550 #define VIRTIO_MBUF_BURST_SZ 64
551 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
552 uint16_t
553 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
554 {
555 	struct virtqueue *rxvq = rx_queue;
556 	struct virtio_hw *hw;
557 	struct rte_mbuf *rxm, *new_mbuf;
558 	uint16_t nb_used, num, nb_rx;
559 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
560 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
561 	int error;
562 	uint32_t i, nb_enqueued;
563 	uint32_t hdr_size;
564 
565 	nb_used = VIRTQUEUE_NUSED(rxvq);
566 
567 	virtio_rmb();
568 
569 	num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
570 	num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
571 	if (likely(num > DESC_PER_CACHELINE))
572 		num = num - ((rxvq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
573 
574 	if (num == 0)
575 		return 0;
576 
577 	num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, num);
578 	PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
579 
580 	hw = rxvq->hw;
581 	nb_rx = 0;
582 	nb_enqueued = 0;
583 	hdr_size = hw->vtnet_hdr_size;
584 
585 	for (i = 0; i < num ; i++) {
586 		rxm = rcv_pkts[i];
587 
588 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
589 
590 		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
591 			PMD_RX_LOG(ERR, "Packet drop");
592 			nb_enqueued++;
593 			virtio_discard_rxbuf(rxvq, rxm);
594 			rxvq->errors++;
595 			continue;
596 		}
597 
598 		rxm->port = rxvq->port_id;
599 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
600 		rxm->ol_flags = 0;
601 		rxm->vlan_tci = 0;
602 
603 		rxm->nb_segs = 1;
604 		rxm->next = NULL;
605 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
606 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
607 
608 		if (hw->vlan_strip)
609 			rte_vlan_strip(rxm);
610 
611 		VIRTIO_DUMP_PACKET(rxm, rxm->data_len);
612 
613 		rx_pkts[nb_rx++] = rxm;
614 
615 		rxvq->bytes += rx_pkts[nb_rx - 1]->pkt_len;
616 		virtio_update_packet_stats(rxvq, rxm);
617 	}
618 
619 	rxvq->packets += nb_rx;
620 
621 	/* Allocate new mbuf for the used descriptor */
622 	error = ENOSPC;
623 	while (likely(!virtqueue_full(rxvq))) {
624 		new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
625 		if (unlikely(new_mbuf == NULL)) {
626 			struct rte_eth_dev *dev
627 				= &rte_eth_devices[rxvq->port_id];
628 			dev->data->rx_mbuf_alloc_failed++;
629 			break;
630 		}
631 		error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
632 		if (unlikely(error)) {
633 			rte_pktmbuf_free(new_mbuf);
634 			break;
635 		}
636 		nb_enqueued++;
637 	}
638 
639 	if (likely(nb_enqueued)) {
640 		vq_update_avail_idx(rxvq);
641 
642 		if (unlikely(virtqueue_kick_prepare(rxvq))) {
643 			virtqueue_notify(rxvq);
644 			PMD_RX_LOG(DEBUG, "Notified\n");
645 		}
646 	}
647 
648 	return nb_rx;
649 }
650 
651 uint16_t
652 virtio_recv_mergeable_pkts(void *rx_queue,
653 			struct rte_mbuf **rx_pkts,
654 			uint16_t nb_pkts)
655 {
656 	struct virtqueue *rxvq = rx_queue;
657 	struct virtio_hw *hw;
658 	struct rte_mbuf *rxm, *new_mbuf;
659 	uint16_t nb_used, num, nb_rx;
660 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
661 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
662 	struct rte_mbuf *prev;
663 	int error;
664 	uint32_t i, nb_enqueued;
665 	uint32_t seg_num;
666 	uint16_t extra_idx;
667 	uint32_t seg_res;
668 	uint32_t hdr_size;
669 
670 	nb_used = VIRTQUEUE_NUSED(rxvq);
671 
672 	virtio_rmb();
673 
674 	if (nb_used == 0)
675 		return 0;
676 
677 	PMD_RX_LOG(DEBUG, "used:%d\n", nb_used);
678 
679 	hw = rxvq->hw;
680 	nb_rx = 0;
681 	i = 0;
682 	nb_enqueued = 0;
683 	seg_num = 0;
684 	extra_idx = 0;
685 	seg_res = 0;
686 	hdr_size = hw->vtnet_hdr_size;
687 
688 	while (i < nb_used) {
689 		struct virtio_net_hdr_mrg_rxbuf *header;
690 
691 		if (nb_rx == nb_pkts)
692 			break;
693 
694 		num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, 1);
695 		if (num != 1)
696 			continue;
697 
698 		i++;
699 
700 		PMD_RX_LOG(DEBUG, "dequeue:%d\n", num);
701 		PMD_RX_LOG(DEBUG, "packet len:%d\n", len[0]);
702 
703 		rxm = rcv_pkts[0];
704 
705 		if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
706 			PMD_RX_LOG(ERR, "Packet drop\n");
707 			nb_enqueued++;
708 			virtio_discard_rxbuf(rxvq, rxm);
709 			rxvq->errors++;
710 			continue;
711 		}
712 
713 		header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
714 			RTE_PKTMBUF_HEADROOM - hdr_size);
715 		seg_num = header->num_buffers;
716 
717 		if (seg_num == 0)
718 			seg_num = 1;
719 
720 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
721 		rxm->nb_segs = seg_num;
722 		rxm->next = NULL;
723 		rxm->ol_flags = 0;
724 		rxm->vlan_tci = 0;
725 		rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
726 		rxm->data_len = (uint16_t)(len[0] - hdr_size);
727 
728 		rxm->port = rxvq->port_id;
729 		rx_pkts[nb_rx] = rxm;
730 		prev = rxm;
731 
732 		seg_res = seg_num - 1;
733 
734 		while (seg_res != 0) {
735 			/*
736 			 * Get extra segments for current uncompleted packet.
737 			 */
738 			uint16_t  rcv_cnt =
739 				RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
740 			if (likely(VIRTQUEUE_NUSED(rxvq) >= rcv_cnt)) {
741 				uint32_t rx_num =
742 					virtqueue_dequeue_burst_rx(rxvq,
743 					rcv_pkts, len, rcv_cnt);
744 				i += rx_num;
745 				rcv_cnt = rx_num;
746 			} else {
747 				PMD_RX_LOG(ERR,
748 					"No enough segments for packet.\n");
749 				nb_enqueued++;
750 				virtio_discard_rxbuf(rxvq, rxm);
751 				rxvq->errors++;
752 				break;
753 			}
754 
755 			extra_idx = 0;
756 
757 			while (extra_idx < rcv_cnt) {
758 				rxm = rcv_pkts[extra_idx];
759 
760 				rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
761 				rxm->next = NULL;
762 				rxm->pkt_len = (uint32_t)(len[extra_idx]);
763 				rxm->data_len = (uint16_t)(len[extra_idx]);
764 
765 				if (prev)
766 					prev->next = rxm;
767 
768 				prev = rxm;
769 				rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
770 				extra_idx++;
771 			};
772 			seg_res -= rcv_cnt;
773 		}
774 
775 		if (hw->vlan_strip)
776 			rte_vlan_strip(rx_pkts[nb_rx]);
777 
778 		VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
779 			rx_pkts[nb_rx]->data_len);
780 
781 		rxvq->bytes += rx_pkts[nb_rx]->pkt_len;
782 		virtio_update_packet_stats(rxvq, rx_pkts[nb_rx]);
783 		nb_rx++;
784 	}
785 
786 	rxvq->packets += nb_rx;
787 
788 	/* Allocate new mbuf for the used descriptor */
789 	error = ENOSPC;
790 	while (likely(!virtqueue_full(rxvq))) {
791 		new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
792 		if (unlikely(new_mbuf == NULL)) {
793 			struct rte_eth_dev *dev
794 				= &rte_eth_devices[rxvq->port_id];
795 			dev->data->rx_mbuf_alloc_failed++;
796 			break;
797 		}
798 		error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
799 		if (unlikely(error)) {
800 			rte_pktmbuf_free(new_mbuf);
801 			break;
802 		}
803 		nb_enqueued++;
804 	}
805 
806 	if (likely(nb_enqueued)) {
807 		vq_update_avail_idx(rxvq);
808 
809 		if (unlikely(virtqueue_kick_prepare(rxvq))) {
810 			virtqueue_notify(rxvq);
811 			PMD_RX_LOG(DEBUG, "Notified");
812 		}
813 	}
814 
815 	return nb_rx;
816 }
817 
818 uint16_t
819 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
820 {
821 	struct virtqueue *txvq = tx_queue;
822 	uint16_t nb_used, nb_tx;
823 	int error;
824 
825 	if (unlikely(nb_pkts < 1))
826 		return nb_pkts;
827 
828 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
829 	nb_used = VIRTQUEUE_NUSED(txvq);
830 
831 	virtio_rmb();
832 	if (likely(nb_used > txvq->vq_nentries - txvq->vq_free_thresh))
833 		virtio_xmit_cleanup(txvq, nb_used);
834 
835 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
836 		struct rte_mbuf *txm = tx_pkts[nb_tx];
837 		/* Need one more descriptor for virtio header. */
838 		int need = txm->nb_segs - txvq->vq_free_cnt + 1;
839 
840 		/* Positive value indicates it need free vring descriptors */
841 		if (unlikely(need > 0)) {
842 			nb_used = VIRTQUEUE_NUSED(txvq);
843 			virtio_rmb();
844 			need = RTE_MIN(need, (int)nb_used);
845 
846 			virtio_xmit_cleanup(txvq, need);
847 			need = txm->nb_segs - txvq->vq_free_cnt + 1;
848 			if (unlikely(need > 0)) {
849 				PMD_TX_LOG(ERR,
850 					   "No free tx descriptors to transmit");
851 				break;
852 			}
853 		}
854 
855 		/* Do VLAN tag insertion */
856 		if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
857 			error = rte_vlan_insert(&txm);
858 			if (unlikely(error)) {
859 				rte_pktmbuf_free(txm);
860 				continue;
861 			}
862 		}
863 
864 		/* Enqueue Packet buffers */
865 		error = virtqueue_enqueue_xmit(txvq, txm);
866 		if (unlikely(error)) {
867 			if (error == ENOSPC)
868 				PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0");
869 			else if (error == EMSGSIZE)
870 				PMD_TX_LOG(ERR, "virtqueue_enqueue Free count < 1");
871 			else
872 				PMD_TX_LOG(ERR, "virtqueue_enqueue error: %d", error);
873 			break;
874 		}
875 
876 		txvq->bytes += txm->pkt_len;
877 		virtio_update_packet_stats(txvq, txm);
878 	}
879 
880 	txvq->packets += nb_tx;
881 
882 	if (likely(nb_tx)) {
883 		vq_update_avail_idx(txvq);
884 
885 		if (unlikely(virtqueue_kick_prepare(txvq))) {
886 			virtqueue_notify(txvq);
887 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
888 		}
889 	}
890 
891 	return nb_tx;
892 }
893