xref: /dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c (revision 68a03efeed657e6e05f281479b33b51102797e15)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4 
5 #include <sys/queue.h>
6 
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15 
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
22 #include <rte_pci.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
33 #include <rte_mbuf.h>
34 #include <rte_ether.h>
35 #include <ethdev_driver.h>
36 #include <rte_prefetch.h>
37 #include <rte_ip.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_net.h>
44 
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
47 
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
50 
51 #define	VMXNET3_TX_OFFLOAD_MASK	( \
52 		PKT_TX_VLAN_PKT | \
53 		PKT_TX_IPV6 |     \
54 		PKT_TX_IPV4 |     \
55 		PKT_TX_L4_MASK |  \
56 		PKT_TX_TCP_SEG)
57 
58 #define	VMXNET3_TX_OFFLOAD_NOTSUP_MASK	\
59 	(PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
60 
61 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
62 
63 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
64 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
65 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
66 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
67 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
68 #endif
69 
70 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
71 static void
72 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
73 {
74 	uint32_t avail = 0;
75 
76 	if (rxq == NULL)
77 		return;
78 
79 	PMD_RX_LOG(DEBUG,
80 		   "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
81 		   rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
82 	PMD_RX_LOG(DEBUG,
83 		   "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
84 		   (unsigned long)rxq->cmd_ring[0].basePA,
85 		   (unsigned long)rxq->cmd_ring[1].basePA,
86 		   (unsigned long)rxq->comp_ring.basePA);
87 
88 	avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
89 	PMD_RX_LOG(DEBUG,
90 		   "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
91 		   (uint32_t)rxq->cmd_ring[0].size, avail,
92 		   rxq->comp_ring.next2proc,
93 		   rxq->cmd_ring[0].size - avail);
94 
95 	avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
96 	PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
97 		   (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
98 		   rxq->cmd_ring[1].size - avail);
99 
100 }
101 
102 static void
103 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
104 {
105 	uint32_t avail = 0;
106 
107 	if (txq == NULL)
108 		return;
109 
110 	PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
111 		   txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
112 	PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
113 		   (unsigned long)txq->cmd_ring.basePA,
114 		   (unsigned long)txq->comp_ring.basePA,
115 		   (unsigned long)txq->data_ring.basePA);
116 
117 	avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
118 	PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
119 		   (uint32_t)txq->cmd_ring.size, avail,
120 		   txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
121 }
122 #endif
123 
124 static void
125 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
126 {
127 	while (ring->next2comp != ring->next2fill) {
128 		/* No need to worry about desc ownership, device is quiesced by now. */
129 		vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
130 
131 		if (buf_info->m) {
132 			rte_pktmbuf_free(buf_info->m);
133 			buf_info->m = NULL;
134 			buf_info->bufPA = 0;
135 			buf_info->len = 0;
136 		}
137 		vmxnet3_cmd_ring_adv_next2comp(ring);
138 	}
139 }
140 
141 static void
142 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
143 {
144 	uint32_t i;
145 
146 	for (i = 0; i < ring->size; i++) {
147 		/* No need to worry about desc ownership, device is quiesced by now. */
148 		vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
149 
150 		if (buf_info->m) {
151 			rte_pktmbuf_free_seg(buf_info->m);
152 			buf_info->m = NULL;
153 			buf_info->bufPA = 0;
154 			buf_info->len = 0;
155 		}
156 		vmxnet3_cmd_ring_adv_next2comp(ring);
157 	}
158 }
159 
160 static void
161 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
162 {
163 	rte_free(ring->buf_info);
164 	ring->buf_info = NULL;
165 }
166 
167 void
168 vmxnet3_dev_tx_queue_release(void *txq)
169 {
170 	vmxnet3_tx_queue_t *tq = txq;
171 
172 	if (tq != NULL) {
173 		/* Release mbufs */
174 		vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
175 		/* Release the cmd_ring */
176 		vmxnet3_cmd_ring_release(&tq->cmd_ring);
177 		/* Release the memzone */
178 		rte_memzone_free(tq->mz);
179 		/* Release the queue */
180 		rte_free(tq);
181 	}
182 }
183 
184 void
185 vmxnet3_dev_rx_queue_release(void *rxq)
186 {
187 	int i;
188 	vmxnet3_rx_queue_t *rq = rxq;
189 
190 	if (rq != NULL) {
191 		/* Release mbufs */
192 		for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
193 			vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
194 
195 		/* Release both the cmd_rings */
196 		for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
197 			vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
198 
199 		/* Release the memzone */
200 		rte_memzone_free(rq->mz);
201 
202 		/* Release the queue */
203 		rte_free(rq);
204 	}
205 }
206 
207 static void
208 vmxnet3_dev_tx_queue_reset(void *txq)
209 {
210 	vmxnet3_tx_queue_t *tq = txq;
211 	struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
212 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
213 	struct vmxnet3_data_ring *data_ring = &tq->data_ring;
214 	int size;
215 
216 	if (tq != NULL) {
217 		/* Release the cmd_ring mbufs */
218 		vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
219 	}
220 
221 	/* Tx vmxnet rings structure initialization*/
222 	ring->next2fill = 0;
223 	ring->next2comp = 0;
224 	ring->gen = VMXNET3_INIT_GEN;
225 	comp_ring->next2proc = 0;
226 	comp_ring->gen = VMXNET3_INIT_GEN;
227 
228 	size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
229 	size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
230 	size += tq->txdata_desc_size * data_ring->size;
231 
232 	memset(ring->base, 0, size);
233 }
234 
235 static void
236 vmxnet3_dev_rx_queue_reset(void *rxq)
237 {
238 	int i;
239 	vmxnet3_rx_queue_t *rq = rxq;
240 	struct vmxnet3_hw *hw = rq->hw;
241 	struct vmxnet3_cmd_ring *ring0, *ring1;
242 	struct vmxnet3_comp_ring *comp_ring;
243 	struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
244 	int size;
245 
246 	/* Release both the cmd_rings mbufs */
247 	for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
248 		vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
249 
250 	ring0 = &rq->cmd_ring[0];
251 	ring1 = &rq->cmd_ring[1];
252 	comp_ring = &rq->comp_ring;
253 
254 	/* Rx vmxnet rings structure initialization */
255 	ring0->next2fill = 0;
256 	ring1->next2fill = 0;
257 	ring0->next2comp = 0;
258 	ring1->next2comp = 0;
259 	ring0->gen = VMXNET3_INIT_GEN;
260 	ring1->gen = VMXNET3_INIT_GEN;
261 	comp_ring->next2proc = 0;
262 	comp_ring->gen = VMXNET3_INIT_GEN;
263 
264 	size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
265 	size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
266 	if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
267 		size += rq->data_desc_size * data_ring->size;
268 
269 	memset(ring0->base, 0, size);
270 }
271 
272 void
273 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
274 {
275 	unsigned i;
276 
277 	PMD_INIT_FUNC_TRACE();
278 
279 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
280 		struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
281 
282 		if (txq != NULL) {
283 			txq->stopped = TRUE;
284 			vmxnet3_dev_tx_queue_reset(txq);
285 		}
286 	}
287 
288 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
289 		struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
290 
291 		if (rxq != NULL) {
292 			rxq->stopped = TRUE;
293 			vmxnet3_dev_rx_queue_reset(rxq);
294 		}
295 	}
296 }
297 
298 static int
299 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
300 {
301 	int completed = 0;
302 	struct rte_mbuf *mbuf;
303 
304 	/* Release cmd_ring descriptor and free mbuf */
305 	RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
306 
307 	mbuf = txq->cmd_ring.buf_info[eop_idx].m;
308 	if (mbuf == NULL)
309 		rte_panic("EOP desc does not point to a valid mbuf");
310 	rte_pktmbuf_free(mbuf);
311 
312 	txq->cmd_ring.buf_info[eop_idx].m = NULL;
313 
314 	while (txq->cmd_ring.next2comp != eop_idx) {
315 		/* no out-of-order completion */
316 		RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
317 		vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
318 		completed++;
319 	}
320 
321 	/* Mark the txd for which tcd was generated as completed */
322 	vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
323 
324 	return completed + 1;
325 }
326 
327 static void
328 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
329 {
330 	int completed = 0;
331 	vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
332 	struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
333 		(comp_ring->base + comp_ring->next2proc);
334 
335 	while (tcd->gen == comp_ring->gen) {
336 		completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
337 
338 		vmxnet3_comp_ring_adv_next2proc(comp_ring);
339 		tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
340 						    comp_ring->next2proc);
341 	}
342 
343 	PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
344 }
345 
346 uint16_t
347 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
348 	uint16_t nb_pkts)
349 {
350 	int32_t ret;
351 	uint32_t i;
352 	uint64_t ol_flags;
353 	struct rte_mbuf *m;
354 
355 	for (i = 0; i != nb_pkts; i++) {
356 		m = tx_pkts[i];
357 		ol_flags = m->ol_flags;
358 
359 		/* Non-TSO packet cannot occupy more than
360 		 * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
361 		 */
362 		if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
363 				m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
364 			rte_errno = EINVAL;
365 			return i;
366 		}
367 
368 		/* check that only supported TX offloads are requested. */
369 		if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
370 				(ol_flags & PKT_TX_L4_MASK) ==
371 				PKT_TX_SCTP_CKSUM) {
372 			rte_errno = ENOTSUP;
373 			return i;
374 		}
375 
376 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
377 		ret = rte_validate_tx_offload(m);
378 		if (ret != 0) {
379 			rte_errno = -ret;
380 			return i;
381 		}
382 #endif
383 		ret = rte_net_intel_cksum_prepare(m);
384 		if (ret != 0) {
385 			rte_errno = -ret;
386 			return i;
387 		}
388 	}
389 
390 	return i;
391 }
392 
393 uint16_t
394 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
395 		  uint16_t nb_pkts)
396 {
397 	uint16_t nb_tx;
398 	vmxnet3_tx_queue_t *txq = tx_queue;
399 	struct vmxnet3_hw *hw = txq->hw;
400 	Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
401 	uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
402 
403 	if (unlikely(txq->stopped)) {
404 		PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
405 		return 0;
406 	}
407 
408 	/* Free up the comp_descriptors aggressively */
409 	vmxnet3_tq_tx_complete(txq);
410 
411 	nb_tx = 0;
412 	while (nb_tx < nb_pkts) {
413 		Vmxnet3_GenericDesc *gdesc;
414 		vmxnet3_buf_info_t *tbi;
415 		uint32_t first2fill, avail, dw2;
416 		struct rte_mbuf *txm = tx_pkts[nb_tx];
417 		struct rte_mbuf *m_seg = txm;
418 		int copy_size = 0;
419 		bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
420 		/* # of descriptors needed for a packet. */
421 		unsigned count = txm->nb_segs;
422 
423 		avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
424 		if (count > avail) {
425 			/* Is command ring full? */
426 			if (unlikely(avail == 0)) {
427 				PMD_TX_LOG(DEBUG, "No free ring descriptors");
428 				txq->stats.tx_ring_full++;
429 				txq->stats.drop_total += (nb_pkts - nb_tx);
430 				break;
431 			}
432 
433 			/* Command ring is not full but cannot handle the
434 			 * multi-segmented packet. Let's try the next packet
435 			 * in this case.
436 			 */
437 			PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
438 				   "(avail %d needed %d)", avail, count);
439 			txq->stats.drop_total++;
440 			if (tso)
441 				txq->stats.drop_tso++;
442 			rte_pktmbuf_free(txm);
443 			nb_tx++;
444 			continue;
445 		}
446 
447 		/* Drop non-TSO packet that is excessively fragmented */
448 		if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
449 			PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
450 				   "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
451 			txq->stats.drop_too_many_segs++;
452 			txq->stats.drop_total++;
453 			rte_pktmbuf_free(txm);
454 			nb_tx++;
455 			continue;
456 		}
457 
458 		if (txm->nb_segs == 1 &&
459 		    rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
460 			struct Vmxnet3_TxDataDesc *tdd;
461 
462 			/* Skip empty packets */
463 			if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
464 				txq->stats.drop_total++;
465 				rte_pktmbuf_free(txm);
466 				nb_tx++;
467 				continue;
468 			}
469 
470 			tdd = (struct Vmxnet3_TxDataDesc *)
471 				((uint8 *)txq->data_ring.base +
472 				 txq->cmd_ring.next2fill *
473 				 txq->txdata_desc_size);
474 			copy_size = rte_pktmbuf_pkt_len(txm);
475 			rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
476 		}
477 
478 		/* use the previous gen bit for the SOP desc */
479 		dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
480 		first2fill = txq->cmd_ring.next2fill;
481 		do {
482 			/* Remember the transmit buffer for cleanup */
483 			tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
484 
485 			/* NB: the following assumes that VMXNET3 maximum
486 			 * transmit buffer size (16K) is greater than
487 			 * maximum size of mbuf segment size.
488 			 */
489 			gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
490 
491 			/* Skip empty segments */
492 			if (unlikely(m_seg->data_len == 0))
493 				continue;
494 
495 			if (copy_size) {
496 				uint64 offset =
497 					(uint64)txq->cmd_ring.next2fill *
498 							txq->txdata_desc_size;
499 				gdesc->txd.addr =
500 					rte_cpu_to_le_64(txq->data_ring.basePA +
501 							 offset);
502 			} else {
503 				gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
504 			}
505 
506 			gdesc->dword[2] = dw2 | m_seg->data_len;
507 			gdesc->dword[3] = 0;
508 
509 			/* move to the next2fill descriptor */
510 			vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
511 
512 			/* use the right gen for non-SOP desc */
513 			dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
514 		} while ((m_seg = m_seg->next) != NULL);
515 
516 		/* set the last buf_info for the pkt */
517 		tbi->m = txm;
518 		/* Update the EOP descriptor */
519 		gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
520 
521 		/* Add VLAN tag if present */
522 		gdesc = txq->cmd_ring.base + first2fill;
523 		if (txm->ol_flags & PKT_TX_VLAN_PKT) {
524 			gdesc->txd.ti = 1;
525 			gdesc->txd.tci = txm->vlan_tci;
526 		}
527 
528 		if (tso) {
529 			uint16_t mss = txm->tso_segsz;
530 
531 			RTE_ASSERT(mss > 0);
532 
533 			gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
534 			gdesc->txd.om = VMXNET3_OM_TSO;
535 			gdesc->txd.msscof = mss;
536 
537 			deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
538 		} else if (txm->ol_flags & PKT_TX_L4_MASK) {
539 			gdesc->txd.om = VMXNET3_OM_CSUM;
540 			gdesc->txd.hlen = txm->l2_len + txm->l3_len;
541 
542 			switch (txm->ol_flags & PKT_TX_L4_MASK) {
543 			case PKT_TX_TCP_CKSUM:
544 				gdesc->txd.msscof = gdesc->txd.hlen +
545 					offsetof(struct rte_tcp_hdr, cksum);
546 				break;
547 			case PKT_TX_UDP_CKSUM:
548 				gdesc->txd.msscof = gdesc->txd.hlen +
549 					offsetof(struct rte_udp_hdr,
550 						dgram_cksum);
551 				break;
552 			default:
553 				PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
554 					   txm->ol_flags & PKT_TX_L4_MASK);
555 				abort();
556 			}
557 			deferred++;
558 		} else {
559 			gdesc->txd.hlen = 0;
560 			gdesc->txd.om = VMXNET3_OM_NONE;
561 			gdesc->txd.msscof = 0;
562 			deferred++;
563 		}
564 
565 		/* flip the GEN bit on the SOP */
566 		rte_compiler_barrier();
567 		gdesc->dword[2] ^= VMXNET3_TXD_GEN;
568 
569 		txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
570 		nb_tx++;
571 	}
572 
573 	PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
574 
575 	if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
576 		txq_ctrl->txNumDeferred = 0;
577 		/* Notify vSwitch that packets are available. */
578 		VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
579 				       txq->cmd_ring.next2fill);
580 	}
581 
582 	return nb_tx;
583 }
584 
585 static inline void
586 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
587 		   struct rte_mbuf *mbuf)
588 {
589 	uint32_t val;
590 	struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
591 	struct Vmxnet3_RxDesc *rxd =
592 		(struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
593 	vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
594 
595 	if (ring_id == 0) {
596 		/* Usually: One HEAD type buf per packet
597 		 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
598 		 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
599 		 */
600 
601 		/* We use single packet buffer so all heads here */
602 		val = VMXNET3_RXD_BTYPE_HEAD;
603 	} else {
604 		/* All BODY type buffers for 2nd ring */
605 		val = VMXNET3_RXD_BTYPE_BODY;
606 	}
607 
608 	/*
609 	 * Load mbuf pointer into buf_info[ring_size]
610 	 * buf_info structure is equivalent to cookie for virtio-virtqueue
611 	 */
612 	buf_info->m = mbuf;
613 	buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
614 	buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
615 
616 	/* Load Rx Descriptor with the buffer's GPA */
617 	rxd->addr = buf_info->bufPA;
618 
619 	/* After this point rxd->addr MUST not be NULL */
620 	rxd->btype = val;
621 	rxd->len = buf_info->len;
622 	/* Flip gen bit at the end to change ownership */
623 	rxd->gen = ring->gen;
624 
625 	vmxnet3_cmd_ring_adv_next2fill(ring);
626 }
627 /*
628  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
629  *  so that device can receive packets in those buffers.
630  *  Ring layout:
631  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
632  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
633  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
634  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
635  *      only for LRO.
636  */
637 static int
638 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
639 {
640 	int err = 0;
641 	uint32_t i = 0;
642 	struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
643 
644 	while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
645 		struct rte_mbuf *mbuf;
646 
647 		/* Allocate blank mbuf for the current Rx Descriptor */
648 		mbuf = rte_mbuf_raw_alloc(rxq->mp);
649 		if (unlikely(mbuf == NULL)) {
650 			PMD_RX_LOG(ERR, "Error allocating mbuf");
651 			rxq->stats.rx_buf_alloc_failure++;
652 			err = ENOMEM;
653 			break;
654 		}
655 
656 		vmxnet3_renew_desc(rxq, ring_id, mbuf);
657 		i++;
658 	}
659 
660 	/* Return error only if no buffers are posted at present */
661 	if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
662 		return -err;
663 	else
664 		return i;
665 }
666 
667 /* MSS not provided by vmxnet3, guess one with available information */
668 static uint16_t
669 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
670 		struct rte_mbuf *rxm)
671 {
672 	uint32_t hlen, slen;
673 	struct rte_ipv4_hdr *ipv4_hdr;
674 	struct rte_ipv6_hdr *ipv6_hdr;
675 	struct rte_tcp_hdr *tcp_hdr;
676 	char *ptr;
677 	uint8_t segs;
678 
679 	RTE_ASSERT(rcd->tcp);
680 
681 	ptr = rte_pktmbuf_mtod(rxm, char *);
682 	slen = rte_pktmbuf_data_len(rxm);
683 	hlen = sizeof(struct rte_ether_hdr);
684 
685 	if (rcd->v4) {
686 		if (unlikely(slen < hlen + sizeof(struct rte_ipv4_hdr)))
687 			return hw->mtu - sizeof(struct rte_ipv4_hdr)
688 					- sizeof(struct rte_tcp_hdr);
689 
690 		ipv4_hdr = (struct rte_ipv4_hdr *)(ptr + hlen);
691 		hlen += rte_ipv4_hdr_len(ipv4_hdr);
692 	} else if (rcd->v6) {
693 		if (unlikely(slen < hlen + sizeof(struct rte_ipv6_hdr)))
694 			return hw->mtu - sizeof(struct rte_ipv6_hdr) -
695 					sizeof(struct rte_tcp_hdr);
696 
697 		ipv6_hdr = (struct rte_ipv6_hdr *)(ptr + hlen);
698 		hlen += sizeof(struct rte_ipv6_hdr);
699 		if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
700 			int frag;
701 
702 			rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
703 					&hlen, &frag);
704 		}
705 	}
706 
707 	if (unlikely(slen < hlen + sizeof(struct rte_tcp_hdr)))
708 		return hw->mtu - hlen - sizeof(struct rte_tcp_hdr) +
709 				sizeof(struct rte_ether_hdr);
710 
711 	tcp_hdr = (struct rte_tcp_hdr *)(ptr + hlen);
712 	hlen += (tcp_hdr->data_off & 0xf0) >> 2;
713 
714 	segs = *vmxnet3_segs_dynfield(rxm);
715 	if (segs > 1)
716 		return (rte_pktmbuf_pkt_len(rxm) - hlen + segs - 1) / segs;
717 	else
718 		return hw->mtu - hlen + sizeof(struct rte_ether_hdr);
719 }
720 
721 /* Receive side checksum and other offloads */
722 static inline void
723 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
724 		struct rte_mbuf *rxm, const uint8_t sop)
725 {
726 	uint64_t ol_flags = rxm->ol_flags;
727 	uint32_t packet_type = rxm->packet_type;
728 
729 	/* Offloads set in sop */
730 	if (sop) {
731 		/* Set packet type */
732 		packet_type |= RTE_PTYPE_L2_ETHER;
733 
734 		/* Check large packet receive */
735 		if (VMXNET3_VERSION_GE_2(hw) &&
736 		    rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
737 			const Vmxnet3_RxCompDescExt *rcde =
738 					(const Vmxnet3_RxCompDescExt *)rcd;
739 
740 			rxm->tso_segsz = rcde->mss;
741 			*vmxnet3_segs_dynfield(rxm) = rcde->segCnt;
742 			ol_flags |= PKT_RX_LRO;
743 		}
744 	} else { /* Offloads set in eop */
745 		/* Check for RSS */
746 		if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
747 			ol_flags |= PKT_RX_RSS_HASH;
748 			rxm->hash.rss = rcd->rssHash;
749 		}
750 
751 		/* Check for hardware stripped VLAN tag */
752 		if (rcd->ts) {
753 			ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
754 			rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
755 		}
756 
757 		/* Check packet type, checksum errors, etc. */
758 		if (rcd->cnc) {
759 			ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
760 		} else {
761 			if (rcd->v4) {
762 				packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
763 
764 				if (rcd->ipc)
765 					ol_flags |= PKT_RX_IP_CKSUM_GOOD;
766 				else
767 					ol_flags |= PKT_RX_IP_CKSUM_BAD;
768 
769 				if (rcd->tuc) {
770 					ol_flags |= PKT_RX_L4_CKSUM_GOOD;
771 					if (rcd->tcp)
772 						packet_type |= RTE_PTYPE_L4_TCP;
773 					else
774 						packet_type |= RTE_PTYPE_L4_UDP;
775 				} else {
776 					if (rcd->tcp) {
777 						packet_type |= RTE_PTYPE_L4_TCP;
778 						ol_flags |= PKT_RX_L4_CKSUM_BAD;
779 					} else if (rcd->udp) {
780 						packet_type |= RTE_PTYPE_L4_UDP;
781 						ol_flags |= PKT_RX_L4_CKSUM_BAD;
782 					}
783 				}
784 			} else if (rcd->v6) {
785 				packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
786 
787 				if (rcd->tuc) {
788 					ol_flags |= PKT_RX_L4_CKSUM_GOOD;
789 					if (rcd->tcp)
790 						packet_type |= RTE_PTYPE_L4_TCP;
791 					else
792 						packet_type |= RTE_PTYPE_L4_UDP;
793 				} else {
794 					if (rcd->tcp) {
795 						packet_type |= RTE_PTYPE_L4_TCP;
796 						ol_flags |= PKT_RX_L4_CKSUM_BAD;
797 					} else if (rcd->udp) {
798 						packet_type |= RTE_PTYPE_L4_UDP;
799 						ol_flags |= PKT_RX_L4_CKSUM_BAD;
800 					}
801 				}
802 			} else {
803 				packet_type |= RTE_PTYPE_UNKNOWN;
804 			}
805 
806 			/* Old variants of vmxnet3 do not provide MSS */
807 			if ((ol_flags & PKT_RX_LRO) && rxm->tso_segsz == 0)
808 				rxm->tso_segsz = vmxnet3_guess_mss(hw,
809 						rcd, rxm);
810 		}
811 	}
812 
813 	rxm->ol_flags = ol_flags;
814 	rxm->packet_type = packet_type;
815 }
816 
817 /*
818  * Process the Rx Completion Ring of given vmxnet3_rx_queue
819  * for nb_pkts burst and return the number of packets received
820  */
821 uint16_t
822 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
823 {
824 	uint16_t nb_rx;
825 	uint32_t nb_rxd, idx;
826 	uint8_t ring_idx;
827 	vmxnet3_rx_queue_t *rxq;
828 	Vmxnet3_RxCompDesc *rcd;
829 	vmxnet3_buf_info_t *rbi;
830 	Vmxnet3_RxDesc *rxd;
831 	struct rte_mbuf *rxm = NULL;
832 	struct vmxnet3_hw *hw;
833 
834 	nb_rx = 0;
835 	ring_idx = 0;
836 	nb_rxd = 0;
837 	idx = 0;
838 
839 	rxq = rx_queue;
840 	hw = rxq->hw;
841 
842 	rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
843 
844 	if (unlikely(rxq->stopped)) {
845 		PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
846 		return 0;
847 	}
848 
849 	while (rcd->gen == rxq->comp_ring.gen) {
850 		struct rte_mbuf *newm;
851 
852 		if (nb_rx >= nb_pkts)
853 			break;
854 
855 		newm = rte_mbuf_raw_alloc(rxq->mp);
856 		if (unlikely(newm == NULL)) {
857 			PMD_RX_LOG(ERR, "Error allocating mbuf");
858 			rxq->stats.rx_buf_alloc_failure++;
859 			break;
860 		}
861 
862 		idx = rcd->rxdIdx;
863 		ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
864 		rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
865 		RTE_SET_USED(rxd); /* used only for assert when enabled */
866 		rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
867 
868 		PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
869 
870 		RTE_ASSERT(rcd->len <= rxd->len);
871 		RTE_ASSERT(rbi->m);
872 
873 		/* Get the packet buffer pointer from buf_info */
874 		rxm = rbi->m;
875 
876 		/* Clear descriptor associated buf_info to be reused */
877 		rbi->m = NULL;
878 		rbi->bufPA = 0;
879 
880 		/* Update the index that we received a packet */
881 		rxq->cmd_ring[ring_idx].next2comp = idx;
882 
883 		/* For RCD with EOP set, check if there is frame error */
884 		if (unlikely(rcd->eop && rcd->err)) {
885 			rxq->stats.drop_total++;
886 			rxq->stats.drop_err++;
887 
888 			if (!rcd->fcs) {
889 				rxq->stats.drop_fcs++;
890 				PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
891 			}
892 			PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
893 				   (int)(rcd - (struct Vmxnet3_RxCompDesc *)
894 					 rxq->comp_ring.base), rcd->rxdIdx);
895 			rte_pktmbuf_free_seg(rxm);
896 			if (rxq->start_seg) {
897 				struct rte_mbuf *start = rxq->start_seg;
898 
899 				rxq->start_seg = NULL;
900 				rte_pktmbuf_free(start);
901 			}
902 			goto rcd_done;
903 		}
904 
905 		/* Initialize newly received packet buffer */
906 		rxm->port = rxq->port_id;
907 		rxm->nb_segs = 1;
908 		rxm->next = NULL;
909 		rxm->pkt_len = (uint16_t)rcd->len;
910 		rxm->data_len = (uint16_t)rcd->len;
911 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
912 		rxm->ol_flags = 0;
913 		rxm->vlan_tci = 0;
914 		rxm->packet_type = 0;
915 
916 		/*
917 		 * If this is the first buffer of the received packet,
918 		 * set the pointer to the first mbuf of the packet
919 		 * Otherwise, update the total length and the number of segments
920 		 * of the current scattered packet, and update the pointer to
921 		 * the last mbuf of the current packet.
922 		 */
923 		if (rcd->sop) {
924 			RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
925 
926 			if (unlikely(rcd->len == 0)) {
927 				RTE_ASSERT(rcd->eop);
928 
929 				PMD_RX_LOG(DEBUG,
930 					   "Rx buf was skipped. rxring[%d][%d])",
931 					   ring_idx, idx);
932 				rte_pktmbuf_free_seg(rxm);
933 				goto rcd_done;
934 			}
935 
936 			if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
937 				uint8_t *rdd = rxq->data_ring.base +
938 					idx * rxq->data_desc_size;
939 
940 				RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
941 				rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
942 					   rdd, rcd->len);
943 			}
944 
945 			rxq->start_seg = rxm;
946 			rxq->last_seg = rxm;
947 			vmxnet3_rx_offload(hw, rcd, rxm, 1);
948 		} else {
949 			struct rte_mbuf *start = rxq->start_seg;
950 
951 			RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
952 
953 			if (likely(start && rxm->data_len > 0)) {
954 				start->pkt_len += rxm->data_len;
955 				start->nb_segs++;
956 
957 				rxq->last_seg->next = rxm;
958 				rxq->last_seg = rxm;
959 			} else {
960 				PMD_RX_LOG(ERR, "Error received empty or out of order frame.");
961 				rxq->stats.drop_total++;
962 				rxq->stats.drop_err++;
963 
964 				rte_pktmbuf_free_seg(rxm);
965 			}
966 		}
967 
968 		if (rcd->eop) {
969 			struct rte_mbuf *start = rxq->start_seg;
970 
971 			vmxnet3_rx_offload(hw, rcd, start, 0);
972 			rx_pkts[nb_rx++] = start;
973 			rxq->start_seg = NULL;
974 		}
975 
976 rcd_done:
977 		rxq->cmd_ring[ring_idx].next2comp = idx;
978 		VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
979 					  rxq->cmd_ring[ring_idx].size);
980 
981 		/* It's time to renew descriptors */
982 		vmxnet3_renew_desc(rxq, ring_idx, newm);
983 		if (unlikely(rxq->shared->ctrl.updateRxProd)) {
984 			VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
985 					       rxq->cmd_ring[ring_idx].next2fill);
986 		}
987 
988 		/* Advance to the next descriptor in comp_ring */
989 		vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
990 
991 		rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
992 		nb_rxd++;
993 		if (nb_rxd > rxq->cmd_ring[0].size) {
994 			PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
995 				   " relinquish control.");
996 			break;
997 		}
998 	}
999 
1000 	if (unlikely(nb_rxd == 0)) {
1001 		uint32_t avail;
1002 		for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1003 			avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
1004 			if (unlikely(avail > 0)) {
1005 				/* try to alloc new buf and renew descriptors */
1006 				vmxnet3_post_rx_bufs(rxq, ring_idx);
1007 			}
1008 		}
1009 		if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1010 			for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1011 				VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1012 						       rxq->cmd_ring[ring_idx].next2fill);
1013 			}
1014 		}
1015 	}
1016 
1017 	return nb_rx;
1018 }
1019 
1020 int
1021 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1022 			   uint16_t queue_idx,
1023 			   uint16_t nb_desc,
1024 			   unsigned int socket_id,
1025 			   const struct rte_eth_txconf *tx_conf __rte_unused)
1026 {
1027 	struct vmxnet3_hw *hw = dev->data->dev_private;
1028 	const struct rte_memzone *mz;
1029 	struct vmxnet3_tx_queue *txq;
1030 	struct vmxnet3_cmd_ring *ring;
1031 	struct vmxnet3_comp_ring *comp_ring;
1032 	struct vmxnet3_data_ring *data_ring;
1033 	int size;
1034 
1035 	PMD_INIT_FUNC_TRACE();
1036 
1037 	txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1038 			  RTE_CACHE_LINE_SIZE);
1039 	if (txq == NULL) {
1040 		PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1041 		return -ENOMEM;
1042 	}
1043 
1044 	txq->queue_id = queue_idx;
1045 	txq->port_id = dev->data->port_id;
1046 	txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1047 	txq->hw = hw;
1048 	txq->qid = queue_idx;
1049 	txq->stopped = TRUE;
1050 	txq->txdata_desc_size = hw->txdata_desc_size;
1051 
1052 	ring = &txq->cmd_ring;
1053 	comp_ring = &txq->comp_ring;
1054 	data_ring = &txq->data_ring;
1055 
1056 	/* Tx vmxnet ring length should be between 512-4096 */
1057 	if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1058 		PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1059 			     VMXNET3_DEF_TX_RING_SIZE);
1060 		return -EINVAL;
1061 	} else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1062 		PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1063 			     VMXNET3_TX_RING_MAX_SIZE);
1064 		return -EINVAL;
1065 	} else {
1066 		ring->size = nb_desc;
1067 		ring->size &= ~VMXNET3_RING_SIZE_MASK;
1068 	}
1069 	comp_ring->size = data_ring->size = ring->size;
1070 
1071 	/* Tx vmxnet rings structure initialization*/
1072 	ring->next2fill = 0;
1073 	ring->next2comp = 0;
1074 	ring->gen = VMXNET3_INIT_GEN;
1075 	comp_ring->next2proc = 0;
1076 	comp_ring->gen = VMXNET3_INIT_GEN;
1077 
1078 	size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1079 	size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1080 	size += txq->txdata_desc_size * data_ring->size;
1081 
1082 	mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1083 				      VMXNET3_RING_BA_ALIGN, socket_id);
1084 	if (mz == NULL) {
1085 		PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1086 		return -ENOMEM;
1087 	}
1088 	txq->mz = mz;
1089 	memset(mz->addr, 0, mz->len);
1090 
1091 	/* cmd_ring initialization */
1092 	ring->base = mz->addr;
1093 	ring->basePA = mz->iova;
1094 
1095 	/* comp_ring initialization */
1096 	comp_ring->base = ring->base + ring->size;
1097 	comp_ring->basePA = ring->basePA +
1098 		(sizeof(struct Vmxnet3_TxDesc) * ring->size);
1099 
1100 	/* data_ring initialization */
1101 	data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1102 	data_ring->basePA = comp_ring->basePA +
1103 			(sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1104 
1105 	/* cmd_ring0 buf_info allocation */
1106 	ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1107 				     ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1108 	if (ring->buf_info == NULL) {
1109 		PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1110 		return -ENOMEM;
1111 	}
1112 
1113 	/* Update the data portion with txq */
1114 	dev->data->tx_queues[queue_idx] = txq;
1115 
1116 	return 0;
1117 }
1118 
1119 int
1120 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1121 			   uint16_t queue_idx,
1122 			   uint16_t nb_desc,
1123 			   unsigned int socket_id,
1124 			   __rte_unused const struct rte_eth_rxconf *rx_conf,
1125 			   struct rte_mempool *mp)
1126 {
1127 	const struct rte_memzone *mz;
1128 	struct vmxnet3_rx_queue *rxq;
1129 	struct vmxnet3_hw *hw = dev->data->dev_private;
1130 	struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1131 	struct vmxnet3_comp_ring *comp_ring;
1132 	struct vmxnet3_rx_data_ring *data_ring;
1133 	int size;
1134 	uint8_t i;
1135 	char mem_name[32];
1136 
1137 	PMD_INIT_FUNC_TRACE();
1138 
1139 	rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1140 			  RTE_CACHE_LINE_SIZE);
1141 	if (rxq == NULL) {
1142 		PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1143 		return -ENOMEM;
1144 	}
1145 
1146 	rxq->mp = mp;
1147 	rxq->queue_id = queue_idx;
1148 	rxq->port_id = dev->data->port_id;
1149 	rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1150 	rxq->hw = hw;
1151 	rxq->qid1 = queue_idx;
1152 	rxq->qid2 = queue_idx + hw->num_rx_queues;
1153 	rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1154 	rxq->data_desc_size = hw->rxdata_desc_size;
1155 	rxq->stopped = TRUE;
1156 
1157 	ring0 = &rxq->cmd_ring[0];
1158 	ring1 = &rxq->cmd_ring[1];
1159 	comp_ring = &rxq->comp_ring;
1160 	data_ring = &rxq->data_ring;
1161 
1162 	/* Rx vmxnet rings length should be between 256-4096 */
1163 	if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1164 		PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1165 		return -EINVAL;
1166 	} else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1167 		PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1168 		return -EINVAL;
1169 	} else {
1170 		ring0->size = nb_desc;
1171 		ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1172 		ring1->size = ring0->size;
1173 	}
1174 
1175 	comp_ring->size = ring0->size + ring1->size;
1176 	data_ring->size = ring0->size;
1177 
1178 	/* Rx vmxnet rings structure initialization */
1179 	ring0->next2fill = 0;
1180 	ring1->next2fill = 0;
1181 	ring0->next2comp = 0;
1182 	ring1->next2comp = 0;
1183 	ring0->gen = VMXNET3_INIT_GEN;
1184 	ring1->gen = VMXNET3_INIT_GEN;
1185 	comp_ring->next2proc = 0;
1186 	comp_ring->gen = VMXNET3_INIT_GEN;
1187 
1188 	size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1189 	size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1190 	if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1191 		size += rxq->data_desc_size * data_ring->size;
1192 
1193 	mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1194 				      VMXNET3_RING_BA_ALIGN, socket_id);
1195 	if (mz == NULL) {
1196 		PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1197 		return -ENOMEM;
1198 	}
1199 	rxq->mz = mz;
1200 	memset(mz->addr, 0, mz->len);
1201 
1202 	/* cmd_ring0 initialization */
1203 	ring0->base = mz->addr;
1204 	ring0->basePA = mz->iova;
1205 
1206 	/* cmd_ring1 initialization */
1207 	ring1->base = ring0->base + ring0->size;
1208 	ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1209 
1210 	/* comp_ring initialization */
1211 	comp_ring->base = ring1->base + ring1->size;
1212 	comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1213 		ring1->size;
1214 
1215 	/* data_ring initialization */
1216 	if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1217 		data_ring->base =
1218 			(uint8_t *)(comp_ring->base + comp_ring->size);
1219 		data_ring->basePA = comp_ring->basePA +
1220 			sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1221 	}
1222 
1223 	/* cmd_ring0-cmd_ring1 buf_info allocation */
1224 	for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1225 
1226 		ring = &rxq->cmd_ring[i];
1227 		ring->rid = i;
1228 		snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1229 
1230 		ring->buf_info = rte_zmalloc(mem_name,
1231 					     ring->size * sizeof(vmxnet3_buf_info_t),
1232 					     RTE_CACHE_LINE_SIZE);
1233 		if (ring->buf_info == NULL) {
1234 			PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1235 			return -ENOMEM;
1236 		}
1237 	}
1238 
1239 	/* Update the data portion with rxq */
1240 	dev->data->rx_queues[queue_idx] = rxq;
1241 
1242 	return 0;
1243 }
1244 
1245 /*
1246  * Initializes Receive Unit
1247  * Load mbufs in rx queue in advance
1248  */
1249 int
1250 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1251 {
1252 	struct vmxnet3_hw *hw = dev->data->dev_private;
1253 
1254 	int i, ret;
1255 	uint8_t j;
1256 
1257 	PMD_INIT_FUNC_TRACE();
1258 
1259 	for (i = 0; i < hw->num_rx_queues; i++) {
1260 		vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1261 
1262 		for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1263 			/* Passing 0 as alloc_num will allocate full ring */
1264 			ret = vmxnet3_post_rx_bufs(rxq, j);
1265 			if (ret <= 0) {
1266 				PMD_INIT_LOG(ERR,
1267 					     "ERROR: Posting Rxq: %d buffers ring: %d",
1268 					     i, j);
1269 				return -ret;
1270 			}
1271 			/*
1272 			 * Updating device with the index:next2fill to fill the
1273 			 * mbufs for coming packets.
1274 			 */
1275 			if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1276 				VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1277 						       rxq->cmd_ring[j].next2fill);
1278 			}
1279 		}
1280 		rxq->stopped = FALSE;
1281 		rxq->start_seg = NULL;
1282 	}
1283 
1284 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1285 		struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1286 
1287 		txq->stopped = FALSE;
1288 	}
1289 
1290 	return 0;
1291 }
1292 
1293 static uint8_t rss_intel_key[40] = {
1294 	0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1295 	0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1296 	0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1297 	0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1298 	0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1299 };
1300 
1301 /*
1302  * Additional RSS configurations based on vmxnet v4+ APIs
1303  */
1304 int
1305 vmxnet3_v4_rss_configure(struct rte_eth_dev *dev)
1306 {
1307 	struct vmxnet3_hw *hw = dev->data->dev_private;
1308 	Vmxnet3_DriverShared *shared = hw->shared;
1309 	Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
1310 	struct rte_eth_rss_conf *port_rss_conf;
1311 	uint64_t rss_hf;
1312 	uint32_t ret;
1313 
1314 	PMD_INIT_FUNC_TRACE();
1315 
1316 	cmdInfo->setRSSFields = 0;
1317 	port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1318 
1319 	if ((port_rss_conf->rss_hf & VMXNET3_MANDATORY_V4_RSS) !=
1320 	    VMXNET3_MANDATORY_V4_RSS) {
1321 		PMD_INIT_LOG(WARNING, "RSS: IPv4/6 TCP is required for vmxnet3 v4 RSS,"
1322 			     "automatically setting it");
1323 		port_rss_conf->rss_hf |= VMXNET3_MANDATORY_V4_RSS;
1324 	}
1325 
1326 	rss_hf = port_rss_conf->rss_hf &
1327 		(VMXNET3_V4_RSS_MASK | VMXNET3_RSS_OFFLOAD_ALL);
1328 
1329 	if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1330 		cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP4;
1331 	if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1332 		cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP6;
1333 	if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
1334 		cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP4;
1335 	if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
1336 		cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP6;
1337 
1338 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1339 			       VMXNET3_CMD_SET_RSS_FIELDS);
1340 	ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
1341 
1342 	if (ret != VMXNET3_SUCCESS) {
1343 		PMD_DRV_LOG(ERR, "Set RSS fields (v4) failed: %d", ret);
1344 	}
1345 
1346 	return ret;
1347 }
1348 
1349 /*
1350  * Configure RSS feature
1351  */
1352 int
1353 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1354 {
1355 	struct vmxnet3_hw *hw = dev->data->dev_private;
1356 	struct VMXNET3_RSSConf *dev_rss_conf;
1357 	struct rte_eth_rss_conf *port_rss_conf;
1358 	uint64_t rss_hf;
1359 	uint8_t i, j;
1360 
1361 	PMD_INIT_FUNC_TRACE();
1362 
1363 	dev_rss_conf = hw->rss_conf;
1364 	port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1365 
1366 	/* loading hashFunc */
1367 	dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1368 	/* loading hashKeySize */
1369 	dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1370 	/* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1371 	dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1372 
1373 	if (port_rss_conf->rss_key == NULL) {
1374 		/* Default hash key */
1375 		port_rss_conf->rss_key = rss_intel_key;
1376 	}
1377 
1378 	/* loading hashKey */
1379 	memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1380 	       dev_rss_conf->hashKeySize);
1381 
1382 	/* loading indTable */
1383 	for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1384 		if (j == dev->data->nb_rx_queues)
1385 			j = 0;
1386 		dev_rss_conf->indTable[i] = j;
1387 	}
1388 
1389 	/* loading hashType */
1390 	dev_rss_conf->hashType = 0;
1391 	rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1392 	if (rss_hf & ETH_RSS_IPV4)
1393 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1394 	if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1395 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1396 	if (rss_hf & ETH_RSS_IPV6)
1397 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1398 	if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1399 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1400 
1401 	return VMXNET3_SUCCESS;
1402 }
1403