xref: /dpdk/drivers/net/virtio/virtio_rxtx.c (revision e76d7a768ce085c140e41f338f45d50118964ae3)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <stdint.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <errno.h>
39 
40 #include <rte_cycles.h>
41 #include <rte_memory.h>
42 #include <rte_memzone.h>
43 #include <rte_branch_prediction.h>
44 #include <rte_mempool.h>
45 #include <rte_malloc.h>
46 #include <rte_mbuf.h>
47 #include <rte_ether.h>
48 #include <rte_ethdev.h>
49 #include <rte_prefetch.h>
50 #include <rte_string_fns.h>
51 #include <rte_errno.h>
52 #include <rte_byteorder.h>
53 
54 #include "virtio_logs.h"
55 #include "virtio_ethdev.h"
56 #include "virtqueue.h"
57 
58 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
59 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
60 #else
61 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
62 #endif
63 
64 static void
65 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
66 {
67 	struct vring_desc *dp, *dp_tail;
68 	struct vq_desc_extra *dxp;
69 	uint16_t desc_idx_last = desc_idx;
70 
71 	dp  = &vq->vq_ring.desc[desc_idx];
72 	dxp = &vq->vq_descx[desc_idx];
73 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
74 	if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
75 		while (dp->flags & VRING_DESC_F_NEXT) {
76 			desc_idx_last = dp->next;
77 			dp = &vq->vq_ring.desc[dp->next];
78 		}
79 	}
80 	dxp->ndescs = 0;
81 
82 	/*
83 	 * We must append the existing free chain, if any, to the end of
84 	 * newly freed chain. If the virtqueue was completely used, then
85 	 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
86 	 */
87 	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
88 		vq->vq_desc_head_idx = desc_idx;
89 	} else {
90 		dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
91 		dp_tail->next = desc_idx;
92 	}
93 
94 	vq->vq_desc_tail_idx = desc_idx_last;
95 	dp->next = VQ_RING_DESC_CHAIN_END;
96 }
97 
98 static uint16_t
99 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
100 			   uint32_t *len, uint16_t num)
101 {
102 	struct vring_used_elem *uep;
103 	struct rte_mbuf *cookie;
104 	uint16_t used_idx, desc_idx;
105 	uint16_t i;
106 
107 	/*  Caller does the check */
108 	for (i = 0; i < num ; i++) {
109 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
110 		uep = &vq->vq_ring.used->ring[used_idx];
111 		desc_idx = (uint16_t) uep->id;
112 		len[i] = uep->len;
113 		cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
114 
115 		if (unlikely(cookie == NULL)) {
116 			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n",
117 				vq->vq_used_cons_idx);
118 			break;
119 		}
120 
121 		rte_prefetch0(cookie);
122 		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
123 		rx_pkts[i]  = cookie;
124 		vq->vq_used_cons_idx++;
125 		vq_ring_free_chain(vq, desc_idx);
126 		vq->vq_descx[desc_idx].cookie = NULL;
127 	}
128 
129 	return i;
130 }
131 
132 #ifndef DEFAULT_TX_FREE_THRESH
133 #define DEFAULT_TX_FREE_THRESH 32
134 #endif
135 
136 /* Cleanup from completed transmits. */
137 static void
138 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
139 {
140 	uint16_t i, used_idx, desc_idx;
141 	for (i = 0; i < num; i++) {
142 		struct vring_used_elem *uep;
143 		struct vq_desc_extra *dxp;
144 
145 		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
146 		uep = &vq->vq_ring.used->ring[used_idx];
147 
148 		desc_idx = (uint16_t) uep->id;
149 		dxp = &vq->vq_descx[desc_idx];
150 		vq->vq_used_cons_idx++;
151 		vq_ring_free_chain(vq, desc_idx);
152 
153 		if (dxp->cookie != NULL) {
154 			rte_pktmbuf_free(dxp->cookie);
155 			dxp->cookie = NULL;
156 		}
157 	}
158 }
159 
160 
161 static inline int
162 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
163 {
164 	struct vq_desc_extra *dxp;
165 	struct virtio_hw *hw = vq->hw;
166 	struct vring_desc *start_dp;
167 	uint16_t needed = 1;
168 	uint16_t head_idx, idx;
169 
170 	if (unlikely(vq->vq_free_cnt == 0))
171 		return -ENOSPC;
172 	if (unlikely(vq->vq_free_cnt < needed))
173 		return -EMSGSIZE;
174 
175 	head_idx = vq->vq_desc_head_idx;
176 	if (unlikely(head_idx >= vq->vq_nentries))
177 		return -EFAULT;
178 
179 	idx = head_idx;
180 	dxp = &vq->vq_descx[idx];
181 	dxp->cookie = (void *)cookie;
182 	dxp->ndescs = needed;
183 
184 	start_dp = vq->vq_ring.desc;
185 	start_dp[idx].addr =
186 		(uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM
187 		- hw->vtnet_hdr_size);
188 	start_dp[idx].len =
189 		cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
190 	start_dp[idx].flags =  VRING_DESC_F_WRITE;
191 	idx = start_dp[idx].next;
192 	vq->vq_desc_head_idx = idx;
193 	if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
194 		vq->vq_desc_tail_idx = idx;
195 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
196 	vq_update_avail_ring(vq, head_idx);
197 
198 	return 0;
199 }
200 
201 static int
202 virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
203 {
204 	struct vq_desc_extra *dxp;
205 	struct vring_desc *start_dp;
206 	uint16_t seg_num = cookie->nb_segs;
207 	uint16_t needed = 1 + seg_num;
208 	uint16_t head_idx, idx;
209 	size_t head_size = txvq->hw->vtnet_hdr_size;
210 
211 	if (unlikely(txvq->vq_free_cnt == 0))
212 		return -ENOSPC;
213 	if (unlikely(txvq->vq_free_cnt < needed))
214 		return -EMSGSIZE;
215 	head_idx = txvq->vq_desc_head_idx;
216 	if (unlikely(head_idx >= txvq->vq_nentries))
217 		return -EFAULT;
218 
219 	idx = head_idx;
220 	dxp = &txvq->vq_descx[idx];
221 	dxp->cookie = (void *)cookie;
222 	dxp->ndescs = needed;
223 
224 	start_dp = txvq->vq_ring.desc;
225 	start_dp[idx].addr =
226 		txvq->virtio_net_hdr_mem + idx * head_size;
227 	start_dp[idx].len = head_size;
228 	start_dp[idx].flags = VRING_DESC_F_NEXT;
229 
230 	for (; ((seg_num > 0) && (cookie != NULL)); seg_num--) {
231 		idx = start_dp[idx].next;
232 		start_dp[idx].addr  = RTE_MBUF_DATA_DMA_ADDR(cookie);
233 		start_dp[idx].len   = cookie->data_len;
234 		start_dp[idx].flags = VRING_DESC_F_NEXT;
235 		cookie = cookie->next;
236 	}
237 
238 	start_dp[idx].flags &= ~VRING_DESC_F_NEXT;
239 	idx = start_dp[idx].next;
240 	txvq->vq_desc_head_idx = idx;
241 	if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
242 		txvq->vq_desc_tail_idx = idx;
243 	txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed);
244 	vq_update_avail_ring(txvq, head_idx);
245 
246 	return 0;
247 }
248 
249 static inline struct rte_mbuf *
250 rte_rxmbuf_alloc(struct rte_mempool *mp)
251 {
252 	struct rte_mbuf *m;
253 
254 	m = __rte_mbuf_raw_alloc(mp);
255 	__rte_mbuf_sanity_check_raw(m, 0);
256 
257 	return m;
258 }
259 
260 static void
261 virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
262 {
263 	struct rte_mbuf *m;
264 	int i, nbufs, error, size = vq->vq_nentries;
265 	struct vring *vr = &vq->vq_ring;
266 	uint8_t *ring_mem = vq->vq_ring_virt_mem;
267 
268 	PMD_INIT_FUNC_TRACE();
269 
270 	/*
271 	 * Reinitialise since virtio port might have been stopped and restarted
272 	 */
273 	memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size);
274 	vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN);
275 	vq->vq_used_cons_idx = 0;
276 	vq->vq_desc_head_idx = 0;
277 	vq->vq_avail_idx = 0;
278 	vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
279 	vq->vq_free_cnt = vq->vq_nentries;
280 	memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
281 
282 	/* Chain all the descriptors in the ring with an END */
283 	for (i = 0; i < size - 1; i++)
284 		vr->desc[i].next = (uint16_t)(i + 1);
285 	vr->desc[i].next = VQ_RING_DESC_CHAIN_END;
286 
287 	/*
288 	 * Disable device(host) interrupting guest
289 	 */
290 	virtqueue_disable_intr(vq);
291 
292 	/* Only rx virtqueue needs mbufs to be allocated at initialization */
293 	if (queue_type == VTNET_RQ) {
294 		if (vq->mpool == NULL)
295 			rte_exit(EXIT_FAILURE,
296 			"Cannot allocate initial mbufs for rx virtqueue");
297 
298 		/* Allocate blank mbufs for the each rx descriptor */
299 		nbufs = 0;
300 		error = ENOSPC;
301 		while (!virtqueue_full(vq)) {
302 			m = rte_rxmbuf_alloc(vq->mpool);
303 			if (m == NULL)
304 				break;
305 
306 			/******************************************
307 			*         Enqueue allocated buffers        *
308 			*******************************************/
309 			error = virtqueue_enqueue_recv_refill(vq, m);
310 
311 			if (error) {
312 				rte_pktmbuf_free(m);
313 				break;
314 			}
315 			nbufs++;
316 		}
317 
318 		vq_update_avail_idx(vq);
319 
320 		PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
321 
322 		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
323 			vq->vq_queue_index);
324 		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
325 			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
326 	} else if (queue_type == VTNET_TQ) {
327 		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
328 			vq->vq_queue_index);
329 		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
330 			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
331 	} else {
332 		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
333 			vq->vq_queue_index);
334 		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
335 			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
336 	}
337 }
338 
339 void
340 virtio_dev_cq_start(struct rte_eth_dev *dev)
341 {
342 	struct virtio_hw *hw = dev->data->dev_private;
343 
344 	if (hw->cvq) {
345 		virtio_dev_vring_start(hw->cvq, VTNET_CQ);
346 		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq);
347 	}
348 }
349 
350 void
351 virtio_dev_rxtx_start(struct rte_eth_dev *dev)
352 {
353 	/*
354 	 * Start receive and transmit vrings
355 	 * -	Setup vring structure for all queues
356 	 * -	Initialize descriptor for the rx vring
357 	 * -	Allocate blank mbufs for the each rx descriptor
358 	 *
359 	 */
360 	int i;
361 
362 	PMD_INIT_FUNC_TRACE();
363 
364 	/* Start rx vring. */
365 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
366 		virtio_dev_vring_start(dev->data->rx_queues[i], VTNET_RQ);
367 		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
368 	}
369 
370 	/* Start tx vring. */
371 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
372 		virtio_dev_vring_start(dev->data->tx_queues[i], VTNET_TQ);
373 		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
374 	}
375 }
376 
377 int
378 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
379 			uint16_t queue_idx,
380 			uint16_t nb_desc,
381 			unsigned int socket_id,
382 			__rte_unused const struct rte_eth_rxconf *rx_conf,
383 			struct rte_mempool *mp)
384 {
385 	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
386 	struct virtqueue *vq;
387 	int ret;
388 
389 	PMD_INIT_FUNC_TRACE();
390 	ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx,
391 			nb_desc, socket_id, &vq);
392 	if (ret < 0) {
393 		PMD_INIT_LOG(ERR, "rvq initialization failed");
394 		return ret;
395 	}
396 
397 	/* Create mempool for rx mbuf allocation */
398 	vq->mpool = mp;
399 
400 	dev->data->rx_queues[queue_idx] = vq;
401 	return 0;
402 }
403 
404 void
405 virtio_dev_rx_queue_release(void *rxq)
406 {
407 	virtio_dev_queue_release(rxq);
408 }
409 
410 /*
411  * struct rte_eth_dev *dev: Used to update dev
412  * uint16_t nb_desc: Defaults to values read from config space
413  * unsigned int socket_id: Used to allocate memzone
414  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
415  * uint16_t queue_idx: Just used as an index in dev txq list
416  */
417 int
418 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
419 			uint16_t queue_idx,
420 			uint16_t nb_desc,
421 			unsigned int socket_id,
422 			const struct rte_eth_txconf *tx_conf)
423 {
424 	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
425 	struct virtqueue *vq;
426 	uint16_t tx_free_thresh;
427 	int ret;
428 
429 	PMD_INIT_FUNC_TRACE();
430 
431 	if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS)
432 	    != ETH_TXQ_FLAGS_NOXSUMS) {
433 		PMD_INIT_LOG(ERR, "TX checksum offload not supported\n");
434 		return -EINVAL;
435 	}
436 
437 	ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
438 			nb_desc, socket_id, &vq);
439 	if (ret < 0) {
440 		PMD_INIT_LOG(ERR, "rvq initialization failed");
441 		return ret;
442 	}
443 
444 	tx_free_thresh = tx_conf->tx_free_thresh;
445 	if (tx_free_thresh == 0)
446 		tx_free_thresh =
447 			RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
448 
449 	if (tx_free_thresh >= (vq->vq_nentries - 3)) {
450 		RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
451 			"number of TX entries minus 3 (%u)."
452 			" (tx_free_thresh=%u port=%u queue=%u)\n",
453 			vq->vq_nentries - 3,
454 			tx_free_thresh, dev->data->port_id, queue_idx);
455 		return -EINVAL;
456 	}
457 
458 	vq->vq_free_thresh = tx_free_thresh;
459 
460 	dev->data->tx_queues[queue_idx] = vq;
461 	return 0;
462 }
463 
464 void
465 virtio_dev_tx_queue_release(void *txq)
466 {
467 	virtio_dev_queue_release(txq);
468 }
469 
470 static void
471 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
472 {
473 	int error;
474 	/*
475 	 * Requeue the discarded mbuf. This should always be
476 	 * successful since it was just dequeued.
477 	 */
478 	error = virtqueue_enqueue_recv_refill(vq, m);
479 	if (unlikely(error)) {
480 		RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
481 		rte_pktmbuf_free(m);
482 	}
483 }
484 
485 #define VIRTIO_MBUF_BURST_SZ 64
486 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
487 uint16_t
488 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
489 {
490 	struct virtqueue *rxvq = rx_queue;
491 	struct virtio_hw *hw;
492 	struct rte_mbuf *rxm, *new_mbuf;
493 	uint16_t nb_used, num, nb_rx;
494 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
495 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
496 	int error;
497 	uint32_t i, nb_enqueued;
498 	const uint32_t hdr_size = sizeof(struct virtio_net_hdr);
499 
500 	nb_used = VIRTQUEUE_NUSED(rxvq);
501 
502 	virtio_rmb();
503 
504 	num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
505 	num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
506 	if (likely(num > DESC_PER_CACHELINE))
507 		num = num - ((rxvq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
508 
509 	if (num == 0)
510 		return 0;
511 
512 	num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, num);
513 	PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
514 
515 	hw = rxvq->hw;
516 	nb_rx = 0;
517 	nb_enqueued = 0;
518 
519 	for (i = 0; i < num ; i++) {
520 		rxm = rcv_pkts[i];
521 
522 		PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
523 
524 		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
525 			PMD_RX_LOG(ERR, "Packet drop");
526 			nb_enqueued++;
527 			virtio_discard_rxbuf(rxvq, rxm);
528 			rxvq->errors++;
529 			continue;
530 		}
531 
532 		rxm->port = rxvq->port_id;
533 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
534 
535 		rxm->nb_segs = 1;
536 		rxm->next = NULL;
537 		rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
538 		rxm->data_len = (uint16_t)(len[i] - hdr_size);
539 
540 		if (hw->vlan_strip)
541 			rte_vlan_strip(rxm);
542 
543 		VIRTIO_DUMP_PACKET(rxm, rxm->data_len);
544 
545 		rx_pkts[nb_rx++] = rxm;
546 		rxvq->bytes += rx_pkts[nb_rx - 1]->pkt_len;
547 	}
548 
549 	rxvq->packets += nb_rx;
550 
551 	/* Allocate new mbuf for the used descriptor */
552 	error = ENOSPC;
553 	while (likely(!virtqueue_full(rxvq))) {
554 		new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
555 		if (unlikely(new_mbuf == NULL)) {
556 			struct rte_eth_dev *dev
557 				= &rte_eth_devices[rxvq->port_id];
558 			dev->data->rx_mbuf_alloc_failed++;
559 			break;
560 		}
561 		error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
562 		if (unlikely(error)) {
563 			rte_pktmbuf_free(new_mbuf);
564 			break;
565 		}
566 		nb_enqueued++;
567 	}
568 
569 	if (likely(nb_enqueued)) {
570 		vq_update_avail_idx(rxvq);
571 
572 		if (unlikely(virtqueue_kick_prepare(rxvq))) {
573 			virtqueue_notify(rxvq);
574 			PMD_RX_LOG(DEBUG, "Notified\n");
575 		}
576 	}
577 
578 	return nb_rx;
579 }
580 
581 uint16_t
582 virtio_recv_mergeable_pkts(void *rx_queue,
583 			struct rte_mbuf **rx_pkts,
584 			uint16_t nb_pkts)
585 {
586 	struct virtqueue *rxvq = rx_queue;
587 	struct virtio_hw *hw;
588 	struct rte_mbuf *rxm, *new_mbuf;
589 	uint16_t nb_used, num, nb_rx;
590 	uint32_t len[VIRTIO_MBUF_BURST_SZ];
591 	struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
592 	struct rte_mbuf *prev;
593 	int error;
594 	uint32_t i, nb_enqueued;
595 	uint32_t seg_num;
596 	uint16_t extra_idx;
597 	uint32_t seg_res;
598 	const uint32_t hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
599 
600 	nb_used = VIRTQUEUE_NUSED(rxvq);
601 
602 	virtio_rmb();
603 
604 	if (nb_used == 0)
605 		return 0;
606 
607 	PMD_RX_LOG(DEBUG, "used:%d\n", nb_used);
608 
609 	hw = rxvq->hw;
610 	nb_rx = 0;
611 	i = 0;
612 	nb_enqueued = 0;
613 	seg_num = 0;
614 	extra_idx = 0;
615 	seg_res = 0;
616 
617 	while (i < nb_used) {
618 		struct virtio_net_hdr_mrg_rxbuf *header;
619 
620 		if (nb_rx == nb_pkts)
621 			break;
622 
623 		num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, 1);
624 		if (num != 1)
625 			continue;
626 
627 		i++;
628 
629 		PMD_RX_LOG(DEBUG, "dequeue:%d\n", num);
630 		PMD_RX_LOG(DEBUG, "packet len:%d\n", len[0]);
631 
632 		rxm = rcv_pkts[0];
633 
634 		if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
635 			PMD_RX_LOG(ERR, "Packet drop\n");
636 			nb_enqueued++;
637 			virtio_discard_rxbuf(rxvq, rxm);
638 			rxvq->errors++;
639 			continue;
640 		}
641 
642 		header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
643 			RTE_PKTMBUF_HEADROOM - hdr_size);
644 		seg_num = header->num_buffers;
645 
646 		if (seg_num == 0)
647 			seg_num = 1;
648 
649 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
650 		rxm->nb_segs = seg_num;
651 		rxm->next = NULL;
652 		rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
653 		rxm->data_len = (uint16_t)(len[0] - hdr_size);
654 
655 		rxm->port = rxvq->port_id;
656 		rx_pkts[nb_rx] = rxm;
657 		prev = rxm;
658 
659 		seg_res = seg_num - 1;
660 
661 		while (seg_res != 0) {
662 			/*
663 			 * Get extra segments for current uncompleted packet.
664 			 */
665 			uint16_t  rcv_cnt =
666 				RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
667 			if (likely(VIRTQUEUE_NUSED(rxvq) >= rcv_cnt)) {
668 				uint32_t rx_num =
669 					virtqueue_dequeue_burst_rx(rxvq,
670 					rcv_pkts, len, rcv_cnt);
671 				i += rx_num;
672 				rcv_cnt = rx_num;
673 			} else {
674 				PMD_RX_LOG(ERR,
675 					"No enough segments for packet.\n");
676 				nb_enqueued++;
677 				virtio_discard_rxbuf(rxvq, rxm);
678 				rxvq->errors++;
679 				break;
680 			}
681 
682 			extra_idx = 0;
683 
684 			while (extra_idx < rcv_cnt) {
685 				rxm = rcv_pkts[extra_idx];
686 
687 				rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
688 				rxm->next = NULL;
689 				rxm->pkt_len = (uint32_t)(len[extra_idx]);
690 				rxm->data_len = (uint16_t)(len[extra_idx]);
691 
692 				if (prev)
693 					prev->next = rxm;
694 
695 				prev = rxm;
696 				rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
697 				extra_idx++;
698 			};
699 			seg_res -= rcv_cnt;
700 		}
701 
702 		if (hw->vlan_strip)
703 			rte_vlan_strip(rx_pkts[nb_rx]);
704 
705 		VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
706 			rx_pkts[nb_rx]->data_len);
707 
708 		rxvq->bytes += rx_pkts[nb_rx]->pkt_len;
709 		nb_rx++;
710 	}
711 
712 	rxvq->packets += nb_rx;
713 
714 	/* Allocate new mbuf for the used descriptor */
715 	error = ENOSPC;
716 	while (likely(!virtqueue_full(rxvq))) {
717 		new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
718 		if (unlikely(new_mbuf == NULL)) {
719 			struct rte_eth_dev *dev
720 				= &rte_eth_devices[rxvq->port_id];
721 			dev->data->rx_mbuf_alloc_failed++;
722 			break;
723 		}
724 		error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
725 		if (unlikely(error)) {
726 			rte_pktmbuf_free(new_mbuf);
727 			break;
728 		}
729 		nb_enqueued++;
730 	}
731 
732 	if (likely(nb_enqueued)) {
733 		vq_update_avail_idx(rxvq);
734 
735 		if (unlikely(virtqueue_kick_prepare(rxvq))) {
736 			virtqueue_notify(rxvq);
737 			PMD_RX_LOG(DEBUG, "Notified");
738 		}
739 	}
740 
741 	return nb_rx;
742 }
743 
744 uint16_t
745 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
746 {
747 	struct virtqueue *txvq = tx_queue;
748 	struct rte_mbuf *txm;
749 	uint16_t nb_used, nb_tx;
750 	int error;
751 
752 	if (unlikely(nb_pkts < 1))
753 		return nb_pkts;
754 
755 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
756 	nb_used = VIRTQUEUE_NUSED(txvq);
757 
758 	virtio_rmb();
759 	if (likely(nb_used > txvq->vq_nentries - txvq->vq_free_thresh))
760 		virtio_xmit_cleanup(txvq, nb_used);
761 
762 	nb_tx = 0;
763 
764 	while (nb_tx < nb_pkts) {
765 		/* Need one more descriptor for virtio header. */
766 		int need = tx_pkts[nb_tx]->nb_segs - txvq->vq_free_cnt + 1;
767 
768 		/*Positive value indicates it need free vring descriptors */
769 		if (unlikely(need > 0)) {
770 			nb_used = VIRTQUEUE_NUSED(txvq);
771 			virtio_rmb();
772 			need = RTE_MIN(need, (int)nb_used);
773 
774 			virtio_xmit_cleanup(txvq, need);
775 			need = (int)tx_pkts[nb_tx]->nb_segs -
776 				txvq->vq_free_cnt + 1;
777 		}
778 
779 		/*
780 		 * Zero or negative value indicates it has enough free
781 		 * descriptors to use for transmitting.
782 		 */
783 		if (likely(need <= 0)) {
784 			txm = tx_pkts[nb_tx];
785 
786 			/* Do VLAN tag insertion */
787 			if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
788 				error = rte_vlan_insert(&txm);
789 				if (unlikely(error)) {
790 					rte_pktmbuf_free(txm);
791 					++nb_tx;
792 					continue;
793 				}
794 			}
795 
796 			/* Enqueue Packet buffers */
797 			error = virtqueue_enqueue_xmit(txvq, txm);
798 			if (unlikely(error)) {
799 				if (error == ENOSPC)
800 					PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0");
801 				else if (error == EMSGSIZE)
802 					PMD_TX_LOG(ERR, "virtqueue_enqueue Free count < 1");
803 				else
804 					PMD_TX_LOG(ERR, "virtqueue_enqueue error: %d", error);
805 				break;
806 			}
807 			nb_tx++;
808 			txvq->bytes += txm->pkt_len;
809 		} else {
810 			PMD_TX_LOG(ERR, "No free tx descriptors to transmit");
811 			break;
812 		}
813 	}
814 
815 	txvq->packets += nb_tx;
816 
817 	if (likely(nb_tx)) {
818 		vq_update_avail_idx(txvq);
819 
820 		if (unlikely(virtqueue_kick_prepare(txvq))) {
821 			virtqueue_notify(txvq);
822 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
823 		}
824 	}
825 
826 	return nb_tx;
827 }
828