xref: /dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c (revision 0857b942113874c69dc3db5df11a828ee3cc9b6b)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/queue.h>
35 
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44 
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_eal.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
62 #include <rte_mbuf.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
66 #include <rte_ip.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_net.h>
73 
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76 
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79 
80 #define	VMXNET3_TX_OFFLOAD_MASK	( \
81 		PKT_TX_VLAN_PKT | \
82 		PKT_TX_L4_MASK |  \
83 		PKT_TX_TCP_SEG)
84 
85 #define	VMXNET3_TX_OFFLOAD_NOTSUP_MASK	\
86 	(PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
87 
88 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
89 
90 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
91 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
92 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
93 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
94 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
95 #endif
96 
97 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
98 static void
99 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
100 {
101 	uint32_t avail = 0;
102 
103 	if (rxq == NULL)
104 		return;
105 
106 	PMD_RX_LOG(DEBUG,
107 		   "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
108 		   rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
109 	PMD_RX_LOG(DEBUG,
110 		   "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
111 		   (unsigned long)rxq->cmd_ring[0].basePA,
112 		   (unsigned long)rxq->cmd_ring[1].basePA,
113 		   (unsigned long)rxq->comp_ring.basePA);
114 
115 	avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
116 	PMD_RX_LOG(DEBUG,
117 		   "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
118 		   (uint32_t)rxq->cmd_ring[0].size, avail,
119 		   rxq->comp_ring.next2proc,
120 		   rxq->cmd_ring[0].size - avail);
121 
122 	avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
123 	PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
124 		   (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
125 		   rxq->cmd_ring[1].size - avail);
126 
127 }
128 
129 static void
130 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
131 {
132 	uint32_t avail = 0;
133 
134 	if (txq == NULL)
135 		return;
136 
137 	PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
138 		   txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
139 	PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
140 		   (unsigned long)txq->cmd_ring.basePA,
141 		   (unsigned long)txq->comp_ring.basePA,
142 		   (unsigned long)txq->data_ring.basePA);
143 
144 	avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
145 	PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
146 		   (uint32_t)txq->cmd_ring.size, avail,
147 		   txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
148 }
149 #endif
150 
151 static void
152 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
153 {
154 	while (ring->next2comp != ring->next2fill) {
155 		/* No need to worry about desc ownership, device is quiesced by now. */
156 		vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
157 
158 		if (buf_info->m) {
159 			rte_pktmbuf_free(buf_info->m);
160 			buf_info->m = NULL;
161 			buf_info->bufPA = 0;
162 			buf_info->len = 0;
163 		}
164 		vmxnet3_cmd_ring_adv_next2comp(ring);
165 	}
166 }
167 
168 static void
169 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
170 {
171 	uint32_t i;
172 
173 	for (i = 0; i < ring->size; i++) {
174 		/* No need to worry about desc ownership, device is quiesced by now. */
175 		vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
176 
177 		if (buf_info->m) {
178 			rte_pktmbuf_free_seg(buf_info->m);
179 			buf_info->m = NULL;
180 			buf_info->bufPA = 0;
181 			buf_info->len = 0;
182 		}
183 		vmxnet3_cmd_ring_adv_next2comp(ring);
184 	}
185 }
186 
187 static void
188 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
189 {
190 	rte_free(ring->buf_info);
191 	ring->buf_info = NULL;
192 }
193 
194 void
195 vmxnet3_dev_tx_queue_release(void *txq)
196 {
197 	vmxnet3_tx_queue_t *tq = txq;
198 
199 	if (tq != NULL) {
200 		/* Release mbufs */
201 		vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
202 		/* Release the cmd_ring */
203 		vmxnet3_cmd_ring_release(&tq->cmd_ring);
204 		/* Release the memzone */
205 		rte_memzone_free(tq->mz);
206 	}
207 }
208 
209 void
210 vmxnet3_dev_rx_queue_release(void *rxq)
211 {
212 	int i;
213 	vmxnet3_rx_queue_t *rq = rxq;
214 
215 	if (rq != NULL) {
216 		/* Release mbufs */
217 		for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
218 			vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
219 
220 		/* Release both the cmd_rings */
221 		for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
222 			vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
223 
224 		/* Release the memzone */
225 		rte_memzone_free(rq->mz);
226 	}
227 }
228 
229 static void
230 vmxnet3_dev_tx_queue_reset(void *txq)
231 {
232 	vmxnet3_tx_queue_t *tq = txq;
233 	struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
234 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
235 	struct vmxnet3_data_ring *data_ring = &tq->data_ring;
236 	int size;
237 
238 	if (tq != NULL) {
239 		/* Release the cmd_ring mbufs */
240 		vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
241 	}
242 
243 	/* Tx vmxnet rings structure initialization*/
244 	ring->next2fill = 0;
245 	ring->next2comp = 0;
246 	ring->gen = VMXNET3_INIT_GEN;
247 	comp_ring->next2proc = 0;
248 	comp_ring->gen = VMXNET3_INIT_GEN;
249 
250 	size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
251 	size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
252 	size += tq->txdata_desc_size * data_ring->size;
253 
254 	memset(ring->base, 0, size);
255 }
256 
257 static void
258 vmxnet3_dev_rx_queue_reset(void *rxq)
259 {
260 	int i;
261 	vmxnet3_rx_queue_t *rq = rxq;
262 	struct vmxnet3_hw *hw = rq->hw;
263 	struct vmxnet3_cmd_ring *ring0, *ring1;
264 	struct vmxnet3_comp_ring *comp_ring;
265 	struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
266 	int size;
267 
268 	if (rq != NULL) {
269 		/* Release both the cmd_rings mbufs */
270 		for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
271 			vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
272 	}
273 
274 	ring0 = &rq->cmd_ring[0];
275 	ring1 = &rq->cmd_ring[1];
276 	comp_ring = &rq->comp_ring;
277 
278 	/* Rx vmxnet rings structure initialization */
279 	ring0->next2fill = 0;
280 	ring1->next2fill = 0;
281 	ring0->next2comp = 0;
282 	ring1->next2comp = 0;
283 	ring0->gen = VMXNET3_INIT_GEN;
284 	ring1->gen = VMXNET3_INIT_GEN;
285 	comp_ring->next2proc = 0;
286 	comp_ring->gen = VMXNET3_INIT_GEN;
287 
288 	size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
289 	size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
290 	if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
291 		size += rq->data_desc_size * data_ring->size;
292 
293 	memset(ring0->base, 0, size);
294 }
295 
296 void
297 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
298 {
299 	unsigned i;
300 
301 	PMD_INIT_FUNC_TRACE();
302 
303 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
304 		struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
305 
306 		if (txq != NULL) {
307 			txq->stopped = TRUE;
308 			vmxnet3_dev_tx_queue_reset(txq);
309 		}
310 	}
311 
312 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
313 		struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
314 
315 		if (rxq != NULL) {
316 			rxq->stopped = TRUE;
317 			vmxnet3_dev_rx_queue_reset(rxq);
318 		}
319 	}
320 }
321 
322 static int
323 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
324 {
325 	int completed = 0;
326 	struct rte_mbuf *mbuf;
327 
328 	/* Release cmd_ring descriptor and free mbuf */
329 	RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
330 
331 	mbuf = txq->cmd_ring.buf_info[eop_idx].m;
332 	if (mbuf == NULL)
333 		rte_panic("EOP desc does not point to a valid mbuf");
334 	rte_pktmbuf_free(mbuf);
335 
336 	txq->cmd_ring.buf_info[eop_idx].m = NULL;
337 
338 	while (txq->cmd_ring.next2comp != eop_idx) {
339 		/* no out-of-order completion */
340 		RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
341 		vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
342 		completed++;
343 	}
344 
345 	/* Mark the txd for which tcd was generated as completed */
346 	vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
347 
348 	return completed + 1;
349 }
350 
351 static void
352 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
353 {
354 	int completed = 0;
355 	vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
356 	struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
357 		(comp_ring->base + comp_ring->next2proc);
358 
359 	while (tcd->gen == comp_ring->gen) {
360 		completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
361 
362 		vmxnet3_comp_ring_adv_next2proc(comp_ring);
363 		tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
364 						    comp_ring->next2proc);
365 	}
366 
367 	PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
368 }
369 
370 uint16_t
371 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
372 	uint16_t nb_pkts)
373 {
374 	int32_t ret;
375 	uint32_t i;
376 	uint64_t ol_flags;
377 	struct rte_mbuf *m;
378 
379 	for (i = 0; i != nb_pkts; i++) {
380 		m = tx_pkts[i];
381 		ol_flags = m->ol_flags;
382 
383 		/* Non-TSO packet cannot occupy more than
384 		 * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
385 		 */
386 		if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
387 				m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
388 			rte_errno = -EINVAL;
389 			return i;
390 		}
391 
392 		/* check that only supported TX offloads are requested. */
393 		if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
394 				(ol_flags & PKT_TX_L4_MASK) ==
395 				PKT_TX_SCTP_CKSUM) {
396 			rte_errno = -ENOTSUP;
397 			return i;
398 		}
399 
400 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
401 		ret = rte_validate_tx_offload(m);
402 		if (ret != 0) {
403 			rte_errno = ret;
404 			return i;
405 		}
406 #endif
407 		ret = rte_net_intel_cksum_prepare(m);
408 		if (ret != 0) {
409 			rte_errno = ret;
410 			return i;
411 		}
412 	}
413 
414 	return i;
415 }
416 
417 uint16_t
418 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
419 		  uint16_t nb_pkts)
420 {
421 	uint16_t nb_tx;
422 	vmxnet3_tx_queue_t *txq = tx_queue;
423 	struct vmxnet3_hw *hw = txq->hw;
424 	Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
425 	uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
426 
427 	if (unlikely(txq->stopped)) {
428 		PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
429 		return 0;
430 	}
431 
432 	/* Free up the comp_descriptors aggressively */
433 	vmxnet3_tq_tx_complete(txq);
434 
435 	nb_tx = 0;
436 	while (nb_tx < nb_pkts) {
437 		Vmxnet3_GenericDesc *gdesc;
438 		vmxnet3_buf_info_t *tbi;
439 		uint32_t first2fill, avail, dw2;
440 		struct rte_mbuf *txm = tx_pkts[nb_tx];
441 		struct rte_mbuf *m_seg = txm;
442 		int copy_size = 0;
443 		bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
444 		/* # of descriptors needed for a packet. */
445 		unsigned count = txm->nb_segs;
446 
447 		avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
448 		if (count > avail) {
449 			/* Is command ring full? */
450 			if (unlikely(avail == 0)) {
451 				PMD_TX_LOG(DEBUG, "No free ring descriptors");
452 				txq->stats.tx_ring_full++;
453 				txq->stats.drop_total += (nb_pkts - nb_tx);
454 				break;
455 			}
456 
457 			/* Command ring is not full but cannot handle the
458 			 * multi-segmented packet. Let's try the next packet
459 			 * in this case.
460 			 */
461 			PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
462 				   "(avail %d needed %d)", avail, count);
463 			txq->stats.drop_total++;
464 			if (tso)
465 				txq->stats.drop_tso++;
466 			rte_pktmbuf_free(txm);
467 			nb_tx++;
468 			continue;
469 		}
470 
471 		/* Drop non-TSO packet that is excessively fragmented */
472 		if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
473 			PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
474 				   "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
475 			txq->stats.drop_too_many_segs++;
476 			txq->stats.drop_total++;
477 			rte_pktmbuf_free(txm);
478 			nb_tx++;
479 			continue;
480 		}
481 
482 		if (txm->nb_segs == 1 &&
483 		    rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
484 			struct Vmxnet3_TxDataDesc *tdd;
485 
486 			tdd = (struct Vmxnet3_TxDataDesc *)
487 				((uint8 *)txq->data_ring.base +
488 				 txq->cmd_ring.next2fill *
489 				 txq->txdata_desc_size);
490 			copy_size = rte_pktmbuf_pkt_len(txm);
491 			rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
492 		}
493 
494 		/* use the previous gen bit for the SOP desc */
495 		dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
496 		first2fill = txq->cmd_ring.next2fill;
497 		do {
498 			/* Remember the transmit buffer for cleanup */
499 			tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
500 
501 			/* NB: the following assumes that VMXNET3 maximum
502 			 * transmit buffer size (16K) is greater than
503 			 * maximum size of mbuf segment size.
504 			 */
505 			gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
506 			if (copy_size) {
507 				uint64 offset = txq->cmd_ring.next2fill *
508 						txq->txdata_desc_size;
509 				gdesc->txd.addr =
510 					rte_cpu_to_le_64(txq->data_ring.basePA +
511 							 offset);
512 			} else {
513 				gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
514 			}
515 
516 			gdesc->dword[2] = dw2 | m_seg->data_len;
517 			gdesc->dword[3] = 0;
518 
519 			/* move to the next2fill descriptor */
520 			vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
521 
522 			/* use the right gen for non-SOP desc */
523 			dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
524 		} while ((m_seg = m_seg->next) != NULL);
525 
526 		/* set the last buf_info for the pkt */
527 		tbi->m = txm;
528 		/* Update the EOP descriptor */
529 		gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
530 
531 		/* Add VLAN tag if present */
532 		gdesc = txq->cmd_ring.base + first2fill;
533 		if (txm->ol_flags & PKT_TX_VLAN_PKT) {
534 			gdesc->txd.ti = 1;
535 			gdesc->txd.tci = txm->vlan_tci;
536 		}
537 
538 		if (tso) {
539 			uint16_t mss = txm->tso_segsz;
540 
541 			RTE_ASSERT(mss > 0);
542 
543 			gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
544 			gdesc->txd.om = VMXNET3_OM_TSO;
545 			gdesc->txd.msscof = mss;
546 
547 			deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
548 		} else if (txm->ol_flags & PKT_TX_L4_MASK) {
549 			gdesc->txd.om = VMXNET3_OM_CSUM;
550 			gdesc->txd.hlen = txm->l2_len + txm->l3_len;
551 
552 			switch (txm->ol_flags & PKT_TX_L4_MASK) {
553 			case PKT_TX_TCP_CKSUM:
554 				gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
555 				break;
556 			case PKT_TX_UDP_CKSUM:
557 				gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
558 				break;
559 			default:
560 				PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
561 					   txm->ol_flags & PKT_TX_L4_MASK);
562 				abort();
563 			}
564 			deferred++;
565 		} else {
566 			gdesc->txd.hlen = 0;
567 			gdesc->txd.om = VMXNET3_OM_NONE;
568 			gdesc->txd.msscof = 0;
569 			deferred++;
570 		}
571 
572 		/* flip the GEN bit on the SOP */
573 		rte_compiler_barrier();
574 		gdesc->dword[2] ^= VMXNET3_TXD_GEN;
575 
576 		txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
577 		nb_tx++;
578 	}
579 
580 	PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
581 
582 	if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
583 		txq_ctrl->txNumDeferred = 0;
584 		/* Notify vSwitch that packets are available. */
585 		VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
586 				       txq->cmd_ring.next2fill);
587 	}
588 
589 	return nb_tx;
590 }
591 
592 static inline void
593 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
594 		   struct rte_mbuf *mbuf)
595 {
596 	uint32_t val = 0;
597 	struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
598 	struct Vmxnet3_RxDesc *rxd =
599 		(struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
600 	vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
601 
602 	if (ring_id == 0)
603 		val = VMXNET3_RXD_BTYPE_HEAD;
604 	else
605 		val = VMXNET3_RXD_BTYPE_BODY;
606 
607 	buf_info->m = mbuf;
608 	buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
609 	buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
610 
611 	rxd->addr = buf_info->bufPA;
612 	rxd->btype = val;
613 	rxd->len = buf_info->len;
614 	rxd->gen = ring->gen;
615 
616 	vmxnet3_cmd_ring_adv_next2fill(ring);
617 }
618 /*
619  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
620  *  so that device can receive packets in those buffers.
621  *  Ring layout:
622  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
623  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
624  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
625  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
626  *      only for LRO.
627  */
628 static int
629 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
630 {
631 	int err = 0;
632 	uint32_t i = 0, val = 0;
633 	struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
634 
635 	if (ring_id == 0) {
636 		/* Usually: One HEAD type buf per packet
637 		 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
638 		 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
639 		 */
640 
641 		/* We use single packet buffer so all heads here */
642 		val = VMXNET3_RXD_BTYPE_HEAD;
643 	} else {
644 		/* All BODY type buffers for 2nd ring */
645 		val = VMXNET3_RXD_BTYPE_BODY;
646 	}
647 
648 	while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
649 		struct Vmxnet3_RxDesc *rxd;
650 		struct rte_mbuf *mbuf;
651 		vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
652 
653 		rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
654 
655 		/* Allocate blank mbuf for the current Rx Descriptor */
656 		mbuf = rte_mbuf_raw_alloc(rxq->mp);
657 		if (unlikely(mbuf == NULL)) {
658 			PMD_RX_LOG(ERR, "Error allocating mbuf");
659 			rxq->stats.rx_buf_alloc_failure++;
660 			err = ENOMEM;
661 			break;
662 		}
663 
664 		/*
665 		 * Load mbuf pointer into buf_info[ring_size]
666 		 * buf_info structure is equivalent to cookie for virtio-virtqueue
667 		 */
668 		buf_info->m = mbuf;
669 		buf_info->len = (uint16_t)(mbuf->buf_len -
670 					   RTE_PKTMBUF_HEADROOM);
671 		buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
672 
673 		/* Load Rx Descriptor with the buffer's GPA */
674 		rxd->addr = buf_info->bufPA;
675 
676 		/* After this point rxd->addr MUST not be NULL */
677 		rxd->btype = val;
678 		rxd->len = buf_info->len;
679 		/* Flip gen bit at the end to change ownership */
680 		rxd->gen = ring->gen;
681 
682 		vmxnet3_cmd_ring_adv_next2fill(ring);
683 		i++;
684 	}
685 
686 	/* Return error only if no buffers are posted at present */
687 	if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
688 		return -err;
689 	else
690 		return i;
691 }
692 
693 
694 /* Receive side checksum and other offloads */
695 static void
696 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
697 {
698 	/* Check for RSS */
699 	if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
700 		rxm->ol_flags |= PKT_RX_RSS_HASH;
701 		rxm->hash.rss = rcd->rssHash;
702 	}
703 
704 	/* Check packet type, checksum errors, etc. Only support IPv4 for now. */
705 	if (rcd->v4) {
706 		struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
707 		struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
708 
709 		if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
710 			rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
711 		else
712 			rxm->packet_type = RTE_PTYPE_L3_IPV4;
713 
714 		if (!rcd->cnc) {
715 			if (!rcd->ipc)
716 				rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
717 
718 			if ((rcd->tcp || rcd->udp) && !rcd->tuc)
719 				rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
720 		}
721 	}
722 }
723 
724 /*
725  * Process the Rx Completion Ring of given vmxnet3_rx_queue
726  * for nb_pkts burst and return the number of packets received
727  */
728 uint16_t
729 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
730 {
731 	uint16_t nb_rx;
732 	uint32_t nb_rxd, idx;
733 	uint8_t ring_idx;
734 	vmxnet3_rx_queue_t *rxq;
735 	Vmxnet3_RxCompDesc *rcd;
736 	vmxnet3_buf_info_t *rbi;
737 	Vmxnet3_RxDesc *rxd;
738 	struct rte_mbuf *rxm = NULL;
739 	struct vmxnet3_hw *hw;
740 
741 	nb_rx = 0;
742 	ring_idx = 0;
743 	nb_rxd = 0;
744 	idx = 0;
745 
746 	rxq = rx_queue;
747 	hw = rxq->hw;
748 
749 	rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
750 
751 	if (unlikely(rxq->stopped)) {
752 		PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
753 		return 0;
754 	}
755 
756 	while (rcd->gen == rxq->comp_ring.gen) {
757 		struct rte_mbuf *newm;
758 
759 		if (nb_rx >= nb_pkts)
760 			break;
761 
762 		newm = rte_mbuf_raw_alloc(rxq->mp);
763 		if (unlikely(newm == NULL)) {
764 			PMD_RX_LOG(ERR, "Error allocating mbuf");
765 			rxq->stats.rx_buf_alloc_failure++;
766 			break;
767 		}
768 
769 		idx = rcd->rxdIdx;
770 		ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
771 		rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
772 		RTE_SET_USED(rxd); /* used only for assert when enabled */
773 		rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
774 
775 		PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
776 
777 		RTE_ASSERT(rcd->len <= rxd->len);
778 		RTE_ASSERT(rbi->m);
779 
780 		/* Get the packet buffer pointer from buf_info */
781 		rxm = rbi->m;
782 
783 		/* Clear descriptor associated buf_info to be reused */
784 		rbi->m = NULL;
785 		rbi->bufPA = 0;
786 
787 		/* Update the index that we received a packet */
788 		rxq->cmd_ring[ring_idx].next2comp = idx;
789 
790 		/* For RCD with EOP set, check if there is frame error */
791 		if (unlikely(rcd->eop && rcd->err)) {
792 			rxq->stats.drop_total++;
793 			rxq->stats.drop_err++;
794 
795 			if (!rcd->fcs) {
796 				rxq->stats.drop_fcs++;
797 				PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
798 			}
799 			PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
800 				   (int)(rcd - (struct Vmxnet3_RxCompDesc *)
801 					 rxq->comp_ring.base), rcd->rxdIdx);
802 			rte_pktmbuf_free_seg(rxm);
803 			goto rcd_done;
804 		}
805 
806 		/* Initialize newly received packet buffer */
807 		rxm->port = rxq->port_id;
808 		rxm->nb_segs = 1;
809 		rxm->next = NULL;
810 		rxm->pkt_len = (uint16_t)rcd->len;
811 		rxm->data_len = (uint16_t)rcd->len;
812 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
813 		rxm->ol_flags = 0;
814 		rxm->vlan_tci = 0;
815 
816 		/*
817 		 * If this is the first buffer of the received packet,
818 		 * set the pointer to the first mbuf of the packet
819 		 * Otherwise, update the total length and the number of segments
820 		 * of the current scattered packet, and update the pointer to
821 		 * the last mbuf of the current packet.
822 		 */
823 		if (rcd->sop) {
824 			RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
825 
826 			if (unlikely(rcd->len == 0)) {
827 				RTE_ASSERT(rcd->eop);
828 
829 				PMD_RX_LOG(DEBUG,
830 					   "Rx buf was skipped. rxring[%d][%d])",
831 					   ring_idx, idx);
832 				rte_pktmbuf_free_seg(rxm);
833 				goto rcd_done;
834 			}
835 
836 			if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
837 				uint8_t *rdd = rxq->data_ring.base +
838 					idx * rxq->data_desc_size;
839 
840 				RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
841 				rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
842 					   rdd, rcd->len);
843 			}
844 
845 			rxq->start_seg = rxm;
846 			vmxnet3_rx_offload(rcd, rxm);
847 		} else {
848 			struct rte_mbuf *start = rxq->start_seg;
849 
850 			RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
851 
852 			start->pkt_len += rxm->data_len;
853 			start->nb_segs++;
854 
855 			rxq->last_seg->next = rxm;
856 		}
857 		rxq->last_seg = rxm;
858 
859 		if (rcd->eop) {
860 			struct rte_mbuf *start = rxq->start_seg;
861 
862 			/* Check for hardware stripped VLAN tag */
863 			if (rcd->ts) {
864 				start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
865 				start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
866 			}
867 
868 			rx_pkts[nb_rx++] = start;
869 			rxq->start_seg = NULL;
870 		}
871 
872 rcd_done:
873 		rxq->cmd_ring[ring_idx].next2comp = idx;
874 		VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
875 					  rxq->cmd_ring[ring_idx].size);
876 
877 		/* It's time to renew descriptors */
878 		vmxnet3_renew_desc(rxq, ring_idx, newm);
879 		if (unlikely(rxq->shared->ctrl.updateRxProd)) {
880 			VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
881 					       rxq->cmd_ring[ring_idx].next2fill);
882 		}
883 
884 		/* Advance to the next descriptor in comp_ring */
885 		vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
886 
887 		rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
888 		nb_rxd++;
889 		if (nb_rxd > rxq->cmd_ring[0].size) {
890 			PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
891 				   " relinquish control.");
892 			break;
893 		}
894 	}
895 
896 	return nb_rx;
897 }
898 
899 int
900 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
901 			   uint16_t queue_idx,
902 			   uint16_t nb_desc,
903 			   unsigned int socket_id,
904 			   __rte_unused const struct rte_eth_txconf *tx_conf)
905 {
906 	struct vmxnet3_hw *hw = dev->data->dev_private;
907 	const struct rte_memzone *mz;
908 	struct vmxnet3_tx_queue *txq;
909 	struct vmxnet3_cmd_ring *ring;
910 	struct vmxnet3_comp_ring *comp_ring;
911 	struct vmxnet3_data_ring *data_ring;
912 	int size;
913 
914 	PMD_INIT_FUNC_TRACE();
915 
916 	if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
917 	    ETH_TXQ_FLAGS_NOXSUMSCTP) {
918 		PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
919 		return -EINVAL;
920 	}
921 
922 	txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
923 			  RTE_CACHE_LINE_SIZE);
924 	if (txq == NULL) {
925 		PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
926 		return -ENOMEM;
927 	}
928 
929 	txq->queue_id = queue_idx;
930 	txq->port_id = dev->data->port_id;
931 	txq->shared = &hw->tqd_start[queue_idx];
932 	txq->hw = hw;
933 	txq->qid = queue_idx;
934 	txq->stopped = TRUE;
935 	txq->txdata_desc_size = hw->txdata_desc_size;
936 
937 	ring = &txq->cmd_ring;
938 	comp_ring = &txq->comp_ring;
939 	data_ring = &txq->data_ring;
940 
941 	/* Tx vmxnet ring length should be between 512-4096 */
942 	if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
943 		PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
944 			     VMXNET3_DEF_TX_RING_SIZE);
945 		return -EINVAL;
946 	} else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
947 		PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
948 			     VMXNET3_TX_RING_MAX_SIZE);
949 		return -EINVAL;
950 	} else {
951 		ring->size = nb_desc;
952 		ring->size &= ~VMXNET3_RING_SIZE_MASK;
953 	}
954 	comp_ring->size = data_ring->size = ring->size;
955 
956 	/* Tx vmxnet rings structure initialization*/
957 	ring->next2fill = 0;
958 	ring->next2comp = 0;
959 	ring->gen = VMXNET3_INIT_GEN;
960 	comp_ring->next2proc = 0;
961 	comp_ring->gen = VMXNET3_INIT_GEN;
962 
963 	size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
964 	size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
965 	size += txq->txdata_desc_size * data_ring->size;
966 
967 	mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
968 				      VMXNET3_RING_BA_ALIGN, socket_id);
969 	if (mz == NULL) {
970 		PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
971 		return -ENOMEM;
972 	}
973 	txq->mz = mz;
974 	memset(mz->addr, 0, mz->len);
975 
976 	/* cmd_ring initialization */
977 	ring->base = mz->addr;
978 	ring->basePA = mz->phys_addr;
979 
980 	/* comp_ring initialization */
981 	comp_ring->base = ring->base + ring->size;
982 	comp_ring->basePA = ring->basePA +
983 		(sizeof(struct Vmxnet3_TxDesc) * ring->size);
984 
985 	/* data_ring initialization */
986 	data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
987 	data_ring->basePA = comp_ring->basePA +
988 			(sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
989 
990 	/* cmd_ring0 buf_info allocation */
991 	ring->buf_info = rte_zmalloc("tx_ring_buf_info",
992 				     ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
993 	if (ring->buf_info == NULL) {
994 		PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
995 		return -ENOMEM;
996 	}
997 
998 	/* Update the data portion with txq */
999 	dev->data->tx_queues[queue_idx] = txq;
1000 
1001 	return 0;
1002 }
1003 
1004 int
1005 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1006 			   uint16_t queue_idx,
1007 			   uint16_t nb_desc,
1008 			   unsigned int socket_id,
1009 			   __rte_unused const struct rte_eth_rxconf *rx_conf,
1010 			   struct rte_mempool *mp)
1011 {
1012 	const struct rte_memzone *mz;
1013 	struct vmxnet3_rx_queue *rxq;
1014 	struct vmxnet3_hw *hw = dev->data->dev_private;
1015 	struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1016 	struct vmxnet3_comp_ring *comp_ring;
1017 	struct vmxnet3_rx_data_ring *data_ring;
1018 	int size;
1019 	uint8_t i;
1020 	char mem_name[32];
1021 
1022 	PMD_INIT_FUNC_TRACE();
1023 
1024 	rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1025 			  RTE_CACHE_LINE_SIZE);
1026 	if (rxq == NULL) {
1027 		PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1028 		return -ENOMEM;
1029 	}
1030 
1031 	rxq->mp = mp;
1032 	rxq->queue_id = queue_idx;
1033 	rxq->port_id = dev->data->port_id;
1034 	rxq->shared = &hw->rqd_start[queue_idx];
1035 	rxq->hw = hw;
1036 	rxq->qid1 = queue_idx;
1037 	rxq->qid2 = queue_idx + hw->num_rx_queues;
1038 	rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1039 	rxq->data_desc_size = hw->rxdata_desc_size;
1040 	rxq->stopped = TRUE;
1041 
1042 	ring0 = &rxq->cmd_ring[0];
1043 	ring1 = &rxq->cmd_ring[1];
1044 	comp_ring = &rxq->comp_ring;
1045 	data_ring = &rxq->data_ring;
1046 
1047 	/* Rx vmxnet rings length should be between 256-4096 */
1048 	if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1049 		PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1050 		return -EINVAL;
1051 	} else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1052 		PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1053 		return -EINVAL;
1054 	} else {
1055 		ring0->size = nb_desc;
1056 		ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1057 		ring1->size = ring0->size;
1058 	}
1059 
1060 	comp_ring->size = ring0->size + ring1->size;
1061 	data_ring->size = ring0->size;
1062 
1063 	/* Rx vmxnet rings structure initialization */
1064 	ring0->next2fill = 0;
1065 	ring1->next2fill = 0;
1066 	ring0->next2comp = 0;
1067 	ring1->next2comp = 0;
1068 	ring0->gen = VMXNET3_INIT_GEN;
1069 	ring1->gen = VMXNET3_INIT_GEN;
1070 	comp_ring->next2proc = 0;
1071 	comp_ring->gen = VMXNET3_INIT_GEN;
1072 
1073 	size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1074 	size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1075 	if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1076 		size += rxq->data_desc_size * data_ring->size;
1077 
1078 	mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1079 				      VMXNET3_RING_BA_ALIGN, socket_id);
1080 	if (mz == NULL) {
1081 		PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1082 		return -ENOMEM;
1083 	}
1084 	rxq->mz = mz;
1085 	memset(mz->addr, 0, mz->len);
1086 
1087 	/* cmd_ring0 initialization */
1088 	ring0->base = mz->addr;
1089 	ring0->basePA = mz->phys_addr;
1090 
1091 	/* cmd_ring1 initialization */
1092 	ring1->base = ring0->base + ring0->size;
1093 	ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1094 
1095 	/* comp_ring initialization */
1096 	comp_ring->base = ring1->base + ring1->size;
1097 	comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1098 		ring1->size;
1099 
1100 	/* data_ring initialization */
1101 	if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1102 		data_ring->base =
1103 			(uint8_t *)(comp_ring->base + comp_ring->size);
1104 		data_ring->basePA = comp_ring->basePA +
1105 			sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1106 	}
1107 
1108 	/* cmd_ring0-cmd_ring1 buf_info allocation */
1109 	for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1110 
1111 		ring = &rxq->cmd_ring[i];
1112 		ring->rid = i;
1113 		snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1114 
1115 		ring->buf_info = rte_zmalloc(mem_name,
1116 					     ring->size * sizeof(vmxnet3_buf_info_t),
1117 					     RTE_CACHE_LINE_SIZE);
1118 		if (ring->buf_info == NULL) {
1119 			PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1120 			return -ENOMEM;
1121 		}
1122 	}
1123 
1124 	/* Update the data portion with rxq */
1125 	dev->data->rx_queues[queue_idx] = rxq;
1126 
1127 	return 0;
1128 }
1129 
1130 /*
1131  * Initializes Receive Unit
1132  * Load mbufs in rx queue in advance
1133  */
1134 int
1135 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1136 {
1137 	struct vmxnet3_hw *hw = dev->data->dev_private;
1138 
1139 	int i, ret;
1140 	uint8_t j;
1141 
1142 	PMD_INIT_FUNC_TRACE();
1143 
1144 	for (i = 0; i < hw->num_rx_queues; i++) {
1145 		vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1146 
1147 		for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1148 			/* Passing 0 as alloc_num will allocate full ring */
1149 			ret = vmxnet3_post_rx_bufs(rxq, j);
1150 			if (ret <= 0) {
1151 				PMD_INIT_LOG(ERR,
1152 					     "ERROR: Posting Rxq: %d buffers ring: %d",
1153 					     i, j);
1154 				return -ret;
1155 			}
1156 			/*
1157 			 * Updating device with the index:next2fill to fill the
1158 			 * mbufs for coming packets.
1159 			 */
1160 			if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1161 				VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1162 						       rxq->cmd_ring[j].next2fill);
1163 			}
1164 		}
1165 		rxq->stopped = FALSE;
1166 		rxq->start_seg = NULL;
1167 	}
1168 
1169 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1170 		struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1171 
1172 		txq->stopped = FALSE;
1173 	}
1174 
1175 	return 0;
1176 }
1177 
1178 static uint8_t rss_intel_key[40] = {
1179 	0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1180 	0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1181 	0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1182 	0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1183 	0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1184 };
1185 
1186 /*
1187  * Configure RSS feature
1188  */
1189 int
1190 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1191 {
1192 	struct vmxnet3_hw *hw = dev->data->dev_private;
1193 	struct VMXNET3_RSSConf *dev_rss_conf;
1194 	struct rte_eth_rss_conf *port_rss_conf;
1195 	uint64_t rss_hf;
1196 	uint8_t i, j;
1197 
1198 	PMD_INIT_FUNC_TRACE();
1199 
1200 	dev_rss_conf = hw->rss_conf;
1201 	port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1202 
1203 	/* loading hashFunc */
1204 	dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1205 	/* loading hashKeySize */
1206 	dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1207 	/* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1208 	dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1209 
1210 	if (port_rss_conf->rss_key == NULL) {
1211 		/* Default hash key */
1212 		port_rss_conf->rss_key = rss_intel_key;
1213 	}
1214 
1215 	/* loading hashKey */
1216 	memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1217 	       dev_rss_conf->hashKeySize);
1218 
1219 	/* loading indTable */
1220 	for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1221 		if (j == dev->data->nb_rx_queues)
1222 			j = 0;
1223 		dev_rss_conf->indTable[i] = j;
1224 	}
1225 
1226 	/* loading hashType */
1227 	dev_rss_conf->hashType = 0;
1228 	rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1229 	if (rss_hf & ETH_RSS_IPV4)
1230 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1231 	if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1232 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1233 	if (rss_hf & ETH_RSS_IPV6)
1234 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1235 	if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1236 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1237 
1238 	return VMXNET3_SUCCESS;
1239 }
1240