xref: /dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c (revision 089e5ed727a15da2729cfee9b63533dd120bd04c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4 
5 #include <sys/queue.h>
6 
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15 
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
22 #include <rte_pci.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
33 #include <rte_mbuf.h>
34 #include <rte_ether.h>
35 #include <rte_ethdev_driver.h>
36 #include <rte_prefetch.h>
37 #include <rte_ip.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_net.h>
44 
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
47 
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
50 
51 #define	VMXNET3_TX_OFFLOAD_MASK	( \
52 		PKT_TX_VLAN_PKT | \
53 		PKT_TX_IPV6 |     \
54 		PKT_TX_IPV4 |     \
55 		PKT_TX_L4_MASK |  \
56 		PKT_TX_TCP_SEG)
57 
58 #define	VMXNET3_TX_OFFLOAD_NOTSUP_MASK	\
59 	(PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
60 
61 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
62 
63 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
64 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
65 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
66 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
67 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
68 #endif
69 
70 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
71 static void
72 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
73 {
74 	uint32_t avail = 0;
75 
76 	if (rxq == NULL)
77 		return;
78 
79 	PMD_RX_LOG(DEBUG,
80 		   "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
81 		   rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
82 	PMD_RX_LOG(DEBUG,
83 		   "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
84 		   (unsigned long)rxq->cmd_ring[0].basePA,
85 		   (unsigned long)rxq->cmd_ring[1].basePA,
86 		   (unsigned long)rxq->comp_ring.basePA);
87 
88 	avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
89 	PMD_RX_LOG(DEBUG,
90 		   "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
91 		   (uint32_t)rxq->cmd_ring[0].size, avail,
92 		   rxq->comp_ring.next2proc,
93 		   rxq->cmd_ring[0].size - avail);
94 
95 	avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
96 	PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
97 		   (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
98 		   rxq->cmd_ring[1].size - avail);
99 
100 }
101 
102 static void
103 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
104 {
105 	uint32_t avail = 0;
106 
107 	if (txq == NULL)
108 		return;
109 
110 	PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
111 		   txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
112 	PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
113 		   (unsigned long)txq->cmd_ring.basePA,
114 		   (unsigned long)txq->comp_ring.basePA,
115 		   (unsigned long)txq->data_ring.basePA);
116 
117 	avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
118 	PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
119 		   (uint32_t)txq->cmd_ring.size, avail,
120 		   txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
121 }
122 #endif
123 
124 static void
125 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
126 {
127 	while (ring->next2comp != ring->next2fill) {
128 		/* No need to worry about desc ownership, device is quiesced by now. */
129 		vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
130 
131 		if (buf_info->m) {
132 			rte_pktmbuf_free(buf_info->m);
133 			buf_info->m = NULL;
134 			buf_info->bufPA = 0;
135 			buf_info->len = 0;
136 		}
137 		vmxnet3_cmd_ring_adv_next2comp(ring);
138 	}
139 }
140 
141 static void
142 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
143 {
144 	uint32_t i;
145 
146 	for (i = 0; i < ring->size; i++) {
147 		/* No need to worry about desc ownership, device is quiesced by now. */
148 		vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
149 
150 		if (buf_info->m) {
151 			rte_pktmbuf_free_seg(buf_info->m);
152 			buf_info->m = NULL;
153 			buf_info->bufPA = 0;
154 			buf_info->len = 0;
155 		}
156 		vmxnet3_cmd_ring_adv_next2comp(ring);
157 	}
158 }
159 
160 static void
161 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
162 {
163 	rte_free(ring->buf_info);
164 	ring->buf_info = NULL;
165 }
166 
167 void
168 vmxnet3_dev_tx_queue_release(void *txq)
169 {
170 	vmxnet3_tx_queue_t *tq = txq;
171 
172 	if (tq != NULL) {
173 		/* Release mbufs */
174 		vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
175 		/* Release the cmd_ring */
176 		vmxnet3_cmd_ring_release(&tq->cmd_ring);
177 		/* Release the memzone */
178 		rte_memzone_free(tq->mz);
179 		/* Release the queue */
180 		rte_free(tq);
181 	}
182 }
183 
184 void
185 vmxnet3_dev_rx_queue_release(void *rxq)
186 {
187 	int i;
188 	vmxnet3_rx_queue_t *rq = rxq;
189 
190 	if (rq != NULL) {
191 		/* Release mbufs */
192 		for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
193 			vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
194 
195 		/* Release both the cmd_rings */
196 		for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
197 			vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
198 
199 		/* Release the memzone */
200 		rte_memzone_free(rq->mz);
201 
202 		/* Release the queue */
203 		rte_free(rq);
204 	}
205 }
206 
207 static void
208 vmxnet3_dev_tx_queue_reset(void *txq)
209 {
210 	vmxnet3_tx_queue_t *tq = txq;
211 	struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
212 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
213 	struct vmxnet3_data_ring *data_ring = &tq->data_ring;
214 	int size;
215 
216 	if (tq != NULL) {
217 		/* Release the cmd_ring mbufs */
218 		vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
219 	}
220 
221 	/* Tx vmxnet rings structure initialization*/
222 	ring->next2fill = 0;
223 	ring->next2comp = 0;
224 	ring->gen = VMXNET3_INIT_GEN;
225 	comp_ring->next2proc = 0;
226 	comp_ring->gen = VMXNET3_INIT_GEN;
227 
228 	size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
229 	size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
230 	size += tq->txdata_desc_size * data_ring->size;
231 
232 	memset(ring->base, 0, size);
233 }
234 
235 static void
236 vmxnet3_dev_rx_queue_reset(void *rxq)
237 {
238 	int i;
239 	vmxnet3_rx_queue_t *rq = rxq;
240 	struct vmxnet3_hw *hw = rq->hw;
241 	struct vmxnet3_cmd_ring *ring0, *ring1;
242 	struct vmxnet3_comp_ring *comp_ring;
243 	struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
244 	int size;
245 
246 	/* Release both the cmd_rings mbufs */
247 	for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
248 		vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
249 
250 	ring0 = &rq->cmd_ring[0];
251 	ring1 = &rq->cmd_ring[1];
252 	comp_ring = &rq->comp_ring;
253 
254 	/* Rx vmxnet rings structure initialization */
255 	ring0->next2fill = 0;
256 	ring1->next2fill = 0;
257 	ring0->next2comp = 0;
258 	ring1->next2comp = 0;
259 	ring0->gen = VMXNET3_INIT_GEN;
260 	ring1->gen = VMXNET3_INIT_GEN;
261 	comp_ring->next2proc = 0;
262 	comp_ring->gen = VMXNET3_INIT_GEN;
263 
264 	size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
265 	size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
266 	if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
267 		size += rq->data_desc_size * data_ring->size;
268 
269 	memset(ring0->base, 0, size);
270 }
271 
272 void
273 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
274 {
275 	unsigned i;
276 
277 	PMD_INIT_FUNC_TRACE();
278 
279 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
280 		struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
281 
282 		if (txq != NULL) {
283 			txq->stopped = TRUE;
284 			vmxnet3_dev_tx_queue_reset(txq);
285 		}
286 	}
287 
288 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
289 		struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
290 
291 		if (rxq != NULL) {
292 			rxq->stopped = TRUE;
293 			vmxnet3_dev_rx_queue_reset(rxq);
294 		}
295 	}
296 }
297 
298 static int
299 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
300 {
301 	int completed = 0;
302 	struct rte_mbuf *mbuf;
303 
304 	/* Release cmd_ring descriptor and free mbuf */
305 	RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
306 
307 	mbuf = txq->cmd_ring.buf_info[eop_idx].m;
308 	if (mbuf == NULL)
309 		rte_panic("EOP desc does not point to a valid mbuf");
310 	rte_pktmbuf_free(mbuf);
311 
312 	txq->cmd_ring.buf_info[eop_idx].m = NULL;
313 
314 	while (txq->cmd_ring.next2comp != eop_idx) {
315 		/* no out-of-order completion */
316 		RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
317 		vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
318 		completed++;
319 	}
320 
321 	/* Mark the txd for which tcd was generated as completed */
322 	vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
323 
324 	return completed + 1;
325 }
326 
327 static void
328 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
329 {
330 	int completed = 0;
331 	vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
332 	struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
333 		(comp_ring->base + comp_ring->next2proc);
334 
335 	while (tcd->gen == comp_ring->gen) {
336 		completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
337 
338 		vmxnet3_comp_ring_adv_next2proc(comp_ring);
339 		tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
340 						    comp_ring->next2proc);
341 	}
342 
343 	PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
344 }
345 
346 uint16_t
347 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
348 	uint16_t nb_pkts)
349 {
350 	int32_t ret;
351 	uint32_t i;
352 	uint64_t ol_flags;
353 	struct rte_mbuf *m;
354 
355 	for (i = 0; i != nb_pkts; i++) {
356 		m = tx_pkts[i];
357 		ol_flags = m->ol_flags;
358 
359 		/* Non-TSO packet cannot occupy more than
360 		 * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
361 		 */
362 		if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
363 				m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
364 			rte_errno = EINVAL;
365 			return i;
366 		}
367 
368 		/* check that only supported TX offloads are requested. */
369 		if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
370 				(ol_flags & PKT_TX_L4_MASK) ==
371 				PKT_TX_SCTP_CKSUM) {
372 			rte_errno = ENOTSUP;
373 			return i;
374 		}
375 
376 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
377 		ret = rte_validate_tx_offload(m);
378 		if (ret != 0) {
379 			rte_errno = -ret;
380 			return i;
381 		}
382 #endif
383 		ret = rte_net_intel_cksum_prepare(m);
384 		if (ret != 0) {
385 			rte_errno = -ret;
386 			return i;
387 		}
388 	}
389 
390 	return i;
391 }
392 
393 uint16_t
394 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
395 		  uint16_t nb_pkts)
396 {
397 	uint16_t nb_tx;
398 	vmxnet3_tx_queue_t *txq = tx_queue;
399 	struct vmxnet3_hw *hw = txq->hw;
400 	Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
401 	uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
402 
403 	if (unlikely(txq->stopped)) {
404 		PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
405 		return 0;
406 	}
407 
408 	/* Free up the comp_descriptors aggressively */
409 	vmxnet3_tq_tx_complete(txq);
410 
411 	nb_tx = 0;
412 	while (nb_tx < nb_pkts) {
413 		Vmxnet3_GenericDesc *gdesc;
414 		vmxnet3_buf_info_t *tbi;
415 		uint32_t first2fill, avail, dw2;
416 		struct rte_mbuf *txm = tx_pkts[nb_tx];
417 		struct rte_mbuf *m_seg = txm;
418 		int copy_size = 0;
419 		bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
420 		/* # of descriptors needed for a packet. */
421 		unsigned count = txm->nb_segs;
422 
423 		avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
424 		if (count > avail) {
425 			/* Is command ring full? */
426 			if (unlikely(avail == 0)) {
427 				PMD_TX_LOG(DEBUG, "No free ring descriptors");
428 				txq->stats.tx_ring_full++;
429 				txq->stats.drop_total += (nb_pkts - nb_tx);
430 				break;
431 			}
432 
433 			/* Command ring is not full but cannot handle the
434 			 * multi-segmented packet. Let's try the next packet
435 			 * in this case.
436 			 */
437 			PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
438 				   "(avail %d needed %d)", avail, count);
439 			txq->stats.drop_total++;
440 			if (tso)
441 				txq->stats.drop_tso++;
442 			rte_pktmbuf_free(txm);
443 			nb_tx++;
444 			continue;
445 		}
446 
447 		/* Drop non-TSO packet that is excessively fragmented */
448 		if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
449 			PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
450 				   "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
451 			txq->stats.drop_too_many_segs++;
452 			txq->stats.drop_total++;
453 			rte_pktmbuf_free(txm);
454 			nb_tx++;
455 			continue;
456 		}
457 
458 		if (txm->nb_segs == 1 &&
459 		    rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
460 			struct Vmxnet3_TxDataDesc *tdd;
461 
462 			/* Skip empty packets */
463 			if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
464 				txq->stats.drop_total++;
465 				rte_pktmbuf_free(txm);
466 				nb_tx++;
467 				continue;
468 			}
469 
470 			tdd = (struct Vmxnet3_TxDataDesc *)
471 				((uint8 *)txq->data_ring.base +
472 				 txq->cmd_ring.next2fill *
473 				 txq->txdata_desc_size);
474 			copy_size = rte_pktmbuf_pkt_len(txm);
475 			rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
476 		}
477 
478 		/* use the previous gen bit for the SOP desc */
479 		dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
480 		first2fill = txq->cmd_ring.next2fill;
481 		do {
482 			/* Remember the transmit buffer for cleanup */
483 			tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
484 
485 			/* NB: the following assumes that VMXNET3 maximum
486 			 * transmit buffer size (16K) is greater than
487 			 * maximum size of mbuf segment size.
488 			 */
489 			gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
490 
491 			/* Skip empty segments */
492 			if (unlikely(m_seg->data_len == 0))
493 				continue;
494 
495 			if (copy_size) {
496 				uint64 offset =
497 					(uint64)txq->cmd_ring.next2fill *
498 							txq->txdata_desc_size;
499 				gdesc->txd.addr =
500 					rte_cpu_to_le_64(txq->data_ring.basePA +
501 							 offset);
502 			} else {
503 				gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
504 			}
505 
506 			gdesc->dword[2] = dw2 | m_seg->data_len;
507 			gdesc->dword[3] = 0;
508 
509 			/* move to the next2fill descriptor */
510 			vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
511 
512 			/* use the right gen for non-SOP desc */
513 			dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
514 		} while ((m_seg = m_seg->next) != NULL);
515 
516 		/* set the last buf_info for the pkt */
517 		tbi->m = txm;
518 		/* Update the EOP descriptor */
519 		gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
520 
521 		/* Add VLAN tag if present */
522 		gdesc = txq->cmd_ring.base + first2fill;
523 		if (txm->ol_flags & PKT_TX_VLAN_PKT) {
524 			gdesc->txd.ti = 1;
525 			gdesc->txd.tci = txm->vlan_tci;
526 		}
527 
528 		if (tso) {
529 			uint16_t mss = txm->tso_segsz;
530 
531 			RTE_ASSERT(mss > 0);
532 
533 			gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
534 			gdesc->txd.om = VMXNET3_OM_TSO;
535 			gdesc->txd.msscof = mss;
536 
537 			deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
538 		} else if (txm->ol_flags & PKT_TX_L4_MASK) {
539 			gdesc->txd.om = VMXNET3_OM_CSUM;
540 			gdesc->txd.hlen = txm->l2_len + txm->l3_len;
541 
542 			switch (txm->ol_flags & PKT_TX_L4_MASK) {
543 			case PKT_TX_TCP_CKSUM:
544 				gdesc->txd.msscof = gdesc->txd.hlen +
545 					offsetof(struct rte_tcp_hdr, cksum);
546 				break;
547 			case PKT_TX_UDP_CKSUM:
548 				gdesc->txd.msscof = gdesc->txd.hlen +
549 					offsetof(struct rte_udp_hdr,
550 						dgram_cksum);
551 				break;
552 			default:
553 				PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
554 					   txm->ol_flags & PKT_TX_L4_MASK);
555 				abort();
556 			}
557 			deferred++;
558 		} else {
559 			gdesc->txd.hlen = 0;
560 			gdesc->txd.om = VMXNET3_OM_NONE;
561 			gdesc->txd.msscof = 0;
562 			deferred++;
563 		}
564 
565 		/* flip the GEN bit on the SOP */
566 		rte_compiler_barrier();
567 		gdesc->dword[2] ^= VMXNET3_TXD_GEN;
568 
569 		txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
570 		nb_tx++;
571 	}
572 
573 	PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
574 
575 	if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
576 		txq_ctrl->txNumDeferred = 0;
577 		/* Notify vSwitch that packets are available. */
578 		VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
579 				       txq->cmd_ring.next2fill);
580 	}
581 
582 	return nb_tx;
583 }
584 
585 static inline void
586 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
587 		   struct rte_mbuf *mbuf)
588 {
589 	uint32_t val;
590 	struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
591 	struct Vmxnet3_RxDesc *rxd =
592 		(struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
593 	vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
594 
595 	if (ring_id == 0) {
596 		/* Usually: One HEAD type buf per packet
597 		 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
598 		 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
599 		 */
600 
601 		/* We use single packet buffer so all heads here */
602 		val = VMXNET3_RXD_BTYPE_HEAD;
603 	} else {
604 		/* All BODY type buffers for 2nd ring */
605 		val = VMXNET3_RXD_BTYPE_BODY;
606 	}
607 
608 	/*
609 	 * Load mbuf pointer into buf_info[ring_size]
610 	 * buf_info structure is equivalent to cookie for virtio-virtqueue
611 	 */
612 	buf_info->m = mbuf;
613 	buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
614 	buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
615 
616 	/* Load Rx Descriptor with the buffer's GPA */
617 	rxd->addr = buf_info->bufPA;
618 
619 	/* After this point rxd->addr MUST not be NULL */
620 	rxd->btype = val;
621 	rxd->len = buf_info->len;
622 	/* Flip gen bit at the end to change ownership */
623 	rxd->gen = ring->gen;
624 
625 	vmxnet3_cmd_ring_adv_next2fill(ring);
626 }
627 /*
628  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
629  *  so that device can receive packets in those buffers.
630  *  Ring layout:
631  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
632  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
633  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
634  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
635  *      only for LRO.
636  */
637 static int
638 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
639 {
640 	int err = 0;
641 	uint32_t i = 0;
642 	struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
643 
644 	while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
645 		struct rte_mbuf *mbuf;
646 
647 		/* Allocate blank mbuf for the current Rx Descriptor */
648 		mbuf = rte_mbuf_raw_alloc(rxq->mp);
649 		if (unlikely(mbuf == NULL)) {
650 			PMD_RX_LOG(ERR, "Error allocating mbuf");
651 			rxq->stats.rx_buf_alloc_failure++;
652 			err = ENOMEM;
653 			break;
654 		}
655 
656 		vmxnet3_renew_desc(rxq, ring_id, mbuf);
657 		i++;
658 	}
659 
660 	/* Return error only if no buffers are posted at present */
661 	if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
662 		return -err;
663 	else
664 		return i;
665 }
666 
667 /* MSS not provided by vmxnet3, guess one with available information */
668 static uint16_t
669 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
670 		struct rte_mbuf *rxm)
671 {
672 	uint32_t hlen, slen;
673 	struct rte_ipv4_hdr *ipv4_hdr;
674 	struct rte_ipv6_hdr *ipv6_hdr;
675 	struct rte_tcp_hdr *tcp_hdr;
676 	char *ptr;
677 
678 	RTE_ASSERT(rcd->tcp);
679 
680 	ptr = rte_pktmbuf_mtod(rxm, char *);
681 	slen = rte_pktmbuf_data_len(rxm);
682 	hlen = sizeof(struct rte_ether_hdr);
683 
684 	if (rcd->v4) {
685 		if (unlikely(slen < hlen + sizeof(struct rte_ipv4_hdr)))
686 			return hw->mtu - sizeof(struct rte_ipv4_hdr)
687 					- sizeof(struct rte_tcp_hdr);
688 
689 		ipv4_hdr = (struct rte_ipv4_hdr *)(ptr + hlen);
690 		hlen += (ipv4_hdr->version_ihl & RTE_IPV4_HDR_IHL_MASK) *
691 				RTE_IPV4_IHL_MULTIPLIER;
692 	} else if (rcd->v6) {
693 		if (unlikely(slen < hlen + sizeof(struct rte_ipv6_hdr)))
694 			return hw->mtu - sizeof(struct rte_ipv6_hdr) -
695 					sizeof(struct rte_tcp_hdr);
696 
697 		ipv6_hdr = (struct rte_ipv6_hdr *)(ptr + hlen);
698 		hlen += sizeof(struct rte_ipv6_hdr);
699 		if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
700 			int frag;
701 
702 			rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
703 					&hlen, &frag);
704 		}
705 	}
706 
707 	if (unlikely(slen < hlen + sizeof(struct rte_tcp_hdr)))
708 		return hw->mtu - hlen - sizeof(struct rte_tcp_hdr) +
709 				sizeof(struct rte_ether_hdr);
710 
711 	tcp_hdr = (struct rte_tcp_hdr *)(ptr + hlen);
712 	hlen += (tcp_hdr->data_off & 0xf0) >> 2;
713 
714 	if (rxm->udata64 > 1)
715 		return (rte_pktmbuf_pkt_len(rxm) - hlen +
716 				rxm->udata64 - 1) / rxm->udata64;
717 	else
718 		return hw->mtu - hlen + sizeof(struct rte_ether_hdr);
719 }
720 
721 /* Receive side checksum and other offloads */
722 static inline void
723 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
724 		struct rte_mbuf *rxm, const uint8_t sop)
725 {
726 	uint64_t ol_flags = rxm->ol_flags;
727 	uint32_t packet_type = rxm->packet_type;
728 
729 	/* Offloads set in sop */
730 	if (sop) {
731 		/* Set packet type */
732 		packet_type |= RTE_PTYPE_L2_ETHER;
733 
734 		/* Check large packet receive */
735 		if (VMXNET3_VERSION_GE_2(hw) &&
736 		    rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
737 			const Vmxnet3_RxCompDescExt *rcde =
738 					(const Vmxnet3_RxCompDescExt *)rcd;
739 
740 			rxm->tso_segsz = rcde->mss;
741 			rxm->udata64 = rcde->segCnt;
742 			ol_flags |= PKT_RX_LRO;
743 		}
744 	} else { /* Offloads set in eop */
745 		/* Check for RSS */
746 		if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
747 			ol_flags |= PKT_RX_RSS_HASH;
748 			rxm->hash.rss = rcd->rssHash;
749 		}
750 
751 		/* Check for hardware stripped VLAN tag */
752 		if (rcd->ts) {
753 			ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
754 			rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
755 		}
756 
757 		/* Check packet type, checksum errors, etc. */
758 		if (rcd->cnc) {
759 			ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
760 		} else {
761 			if (rcd->v4) {
762 				packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
763 
764 				if (rcd->ipc)
765 					ol_flags |= PKT_RX_IP_CKSUM_GOOD;
766 				else
767 					ol_flags |= PKT_RX_IP_CKSUM_BAD;
768 
769 				if (rcd->tuc) {
770 					ol_flags |= PKT_RX_L4_CKSUM_GOOD;
771 					if (rcd->tcp)
772 						packet_type |= RTE_PTYPE_L4_TCP;
773 					else
774 						packet_type |= RTE_PTYPE_L4_UDP;
775 				} else {
776 					if (rcd->tcp) {
777 						packet_type |= RTE_PTYPE_L4_TCP;
778 						ol_flags |= PKT_RX_L4_CKSUM_BAD;
779 					} else if (rcd->udp) {
780 						packet_type |= RTE_PTYPE_L4_UDP;
781 						ol_flags |= PKT_RX_L4_CKSUM_BAD;
782 					}
783 				}
784 			} else if (rcd->v6) {
785 				packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
786 
787 				if (rcd->tuc) {
788 					ol_flags |= PKT_RX_L4_CKSUM_GOOD;
789 					if (rcd->tcp)
790 						packet_type |= RTE_PTYPE_L4_TCP;
791 					else
792 						packet_type |= RTE_PTYPE_L4_UDP;
793 				} else {
794 					if (rcd->tcp) {
795 						packet_type |= RTE_PTYPE_L4_TCP;
796 						ol_flags |= PKT_RX_L4_CKSUM_BAD;
797 					} else if (rcd->udp) {
798 						packet_type |= RTE_PTYPE_L4_UDP;
799 						ol_flags |= PKT_RX_L4_CKSUM_BAD;
800 					}
801 				}
802 			} else {
803 				packet_type |= RTE_PTYPE_UNKNOWN;
804 			}
805 
806 			/* Old variants of vmxnet3 do not provide MSS */
807 			if ((ol_flags & PKT_RX_LRO) && rxm->tso_segsz == 0)
808 				rxm->tso_segsz = vmxnet3_guess_mss(hw,
809 						rcd, rxm);
810 		}
811 	}
812 
813 	rxm->ol_flags = ol_flags;
814 	rxm->packet_type = packet_type;
815 }
816 
817 /*
818  * Process the Rx Completion Ring of given vmxnet3_rx_queue
819  * for nb_pkts burst and return the number of packets received
820  */
821 uint16_t
822 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
823 {
824 	uint16_t nb_rx;
825 	uint32_t nb_rxd, idx;
826 	uint8_t ring_idx;
827 	vmxnet3_rx_queue_t *rxq;
828 	Vmxnet3_RxCompDesc *rcd;
829 	vmxnet3_buf_info_t *rbi;
830 	Vmxnet3_RxDesc *rxd;
831 	struct rte_mbuf *rxm = NULL;
832 	struct vmxnet3_hw *hw;
833 
834 	nb_rx = 0;
835 	ring_idx = 0;
836 	nb_rxd = 0;
837 	idx = 0;
838 
839 	rxq = rx_queue;
840 	hw = rxq->hw;
841 
842 	rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
843 
844 	if (unlikely(rxq->stopped)) {
845 		PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
846 		return 0;
847 	}
848 
849 	while (rcd->gen == rxq->comp_ring.gen) {
850 		struct rte_mbuf *newm;
851 
852 		if (nb_rx >= nb_pkts)
853 			break;
854 
855 		newm = rte_mbuf_raw_alloc(rxq->mp);
856 		if (unlikely(newm == NULL)) {
857 			PMD_RX_LOG(ERR, "Error allocating mbuf");
858 			rxq->stats.rx_buf_alloc_failure++;
859 			break;
860 		}
861 
862 		idx = rcd->rxdIdx;
863 		ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
864 		rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
865 		RTE_SET_USED(rxd); /* used only for assert when enabled */
866 		rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
867 
868 		PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
869 
870 		RTE_ASSERT(rcd->len <= rxd->len);
871 		RTE_ASSERT(rbi->m);
872 
873 		/* Get the packet buffer pointer from buf_info */
874 		rxm = rbi->m;
875 
876 		/* Clear descriptor associated buf_info to be reused */
877 		rbi->m = NULL;
878 		rbi->bufPA = 0;
879 
880 		/* Update the index that we received a packet */
881 		rxq->cmd_ring[ring_idx].next2comp = idx;
882 
883 		/* For RCD with EOP set, check if there is frame error */
884 		if (unlikely(rcd->eop && rcd->err)) {
885 			rxq->stats.drop_total++;
886 			rxq->stats.drop_err++;
887 
888 			if (!rcd->fcs) {
889 				rxq->stats.drop_fcs++;
890 				PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
891 			}
892 			PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
893 				   (int)(rcd - (struct Vmxnet3_RxCompDesc *)
894 					 rxq->comp_ring.base), rcd->rxdIdx);
895 			rte_pktmbuf_free_seg(rxm);
896 			if (rxq->start_seg) {
897 				struct rte_mbuf *start = rxq->start_seg;
898 
899 				rxq->start_seg = NULL;
900 				rte_pktmbuf_free(start);
901 			}
902 			goto rcd_done;
903 		}
904 
905 		/* Initialize newly received packet buffer */
906 		rxm->port = rxq->port_id;
907 		rxm->nb_segs = 1;
908 		rxm->next = NULL;
909 		rxm->pkt_len = (uint16_t)rcd->len;
910 		rxm->data_len = (uint16_t)rcd->len;
911 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
912 		rxm->ol_flags = 0;
913 		rxm->vlan_tci = 0;
914 		rxm->packet_type = 0;
915 
916 		/*
917 		 * If this is the first buffer of the received packet,
918 		 * set the pointer to the first mbuf of the packet
919 		 * Otherwise, update the total length and the number of segments
920 		 * of the current scattered packet, and update the pointer to
921 		 * the last mbuf of the current packet.
922 		 */
923 		if (rcd->sop) {
924 			RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
925 
926 			if (unlikely(rcd->len == 0)) {
927 				RTE_ASSERT(rcd->eop);
928 
929 				PMD_RX_LOG(DEBUG,
930 					   "Rx buf was skipped. rxring[%d][%d])",
931 					   ring_idx, idx);
932 				rte_pktmbuf_free_seg(rxm);
933 				goto rcd_done;
934 			}
935 
936 			if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
937 				uint8_t *rdd = rxq->data_ring.base +
938 					idx * rxq->data_desc_size;
939 
940 				RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
941 				rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
942 					   rdd, rcd->len);
943 			}
944 
945 			rxq->start_seg = rxm;
946 			rxq->last_seg = rxm;
947 			vmxnet3_rx_offload(hw, rcd, rxm, 1);
948 		} else {
949 			struct rte_mbuf *start = rxq->start_seg;
950 
951 			RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
952 
953 			if (rxm->data_len) {
954 				start->pkt_len += rxm->data_len;
955 				start->nb_segs++;
956 
957 				rxq->last_seg->next = rxm;
958 				rxq->last_seg = rxm;
959 			} else {
960 				rte_pktmbuf_free_seg(rxm);
961 			}
962 		}
963 
964 		if (rcd->eop) {
965 			struct rte_mbuf *start = rxq->start_seg;
966 
967 			vmxnet3_rx_offload(hw, rcd, start, 0);
968 			rx_pkts[nb_rx++] = start;
969 			rxq->start_seg = NULL;
970 		}
971 
972 rcd_done:
973 		rxq->cmd_ring[ring_idx].next2comp = idx;
974 		VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
975 					  rxq->cmd_ring[ring_idx].size);
976 
977 		/* It's time to renew descriptors */
978 		vmxnet3_renew_desc(rxq, ring_idx, newm);
979 		if (unlikely(rxq->shared->ctrl.updateRxProd)) {
980 			VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
981 					       rxq->cmd_ring[ring_idx].next2fill);
982 		}
983 
984 		/* Advance to the next descriptor in comp_ring */
985 		vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
986 
987 		rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
988 		nb_rxd++;
989 		if (nb_rxd > rxq->cmd_ring[0].size) {
990 			PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
991 				   " relinquish control.");
992 			break;
993 		}
994 	}
995 
996 	if (unlikely(nb_rxd == 0)) {
997 		uint32_t avail;
998 		for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
999 			avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
1000 			if (unlikely(avail > 0)) {
1001 				/* try to alloc new buf and renew descriptors */
1002 				vmxnet3_post_rx_bufs(rxq, ring_idx);
1003 			}
1004 		}
1005 		if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1006 			for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1007 				VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1008 						       rxq->cmd_ring[ring_idx].next2fill);
1009 			}
1010 		}
1011 	}
1012 
1013 	return nb_rx;
1014 }
1015 
1016 int
1017 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1018 			   uint16_t queue_idx,
1019 			   uint16_t nb_desc,
1020 			   unsigned int socket_id,
1021 			   const struct rte_eth_txconf *tx_conf __rte_unused)
1022 {
1023 	struct vmxnet3_hw *hw = dev->data->dev_private;
1024 	const struct rte_memzone *mz;
1025 	struct vmxnet3_tx_queue *txq;
1026 	struct vmxnet3_cmd_ring *ring;
1027 	struct vmxnet3_comp_ring *comp_ring;
1028 	struct vmxnet3_data_ring *data_ring;
1029 	int size;
1030 
1031 	PMD_INIT_FUNC_TRACE();
1032 
1033 	txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1034 			  RTE_CACHE_LINE_SIZE);
1035 	if (txq == NULL) {
1036 		PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1037 		return -ENOMEM;
1038 	}
1039 
1040 	txq->queue_id = queue_idx;
1041 	txq->port_id = dev->data->port_id;
1042 	txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1043 	txq->hw = hw;
1044 	txq->qid = queue_idx;
1045 	txq->stopped = TRUE;
1046 	txq->txdata_desc_size = hw->txdata_desc_size;
1047 
1048 	ring = &txq->cmd_ring;
1049 	comp_ring = &txq->comp_ring;
1050 	data_ring = &txq->data_ring;
1051 
1052 	/* Tx vmxnet ring length should be between 512-4096 */
1053 	if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1054 		PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1055 			     VMXNET3_DEF_TX_RING_SIZE);
1056 		return -EINVAL;
1057 	} else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1058 		PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1059 			     VMXNET3_TX_RING_MAX_SIZE);
1060 		return -EINVAL;
1061 	} else {
1062 		ring->size = nb_desc;
1063 		ring->size &= ~VMXNET3_RING_SIZE_MASK;
1064 	}
1065 	comp_ring->size = data_ring->size = ring->size;
1066 
1067 	/* Tx vmxnet rings structure initialization*/
1068 	ring->next2fill = 0;
1069 	ring->next2comp = 0;
1070 	ring->gen = VMXNET3_INIT_GEN;
1071 	comp_ring->next2proc = 0;
1072 	comp_ring->gen = VMXNET3_INIT_GEN;
1073 
1074 	size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1075 	size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1076 	size += txq->txdata_desc_size * data_ring->size;
1077 
1078 	mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1079 				      VMXNET3_RING_BA_ALIGN, socket_id);
1080 	if (mz == NULL) {
1081 		PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1082 		return -ENOMEM;
1083 	}
1084 	txq->mz = mz;
1085 	memset(mz->addr, 0, mz->len);
1086 
1087 	/* cmd_ring initialization */
1088 	ring->base = mz->addr;
1089 	ring->basePA = mz->iova;
1090 
1091 	/* comp_ring initialization */
1092 	comp_ring->base = ring->base + ring->size;
1093 	comp_ring->basePA = ring->basePA +
1094 		(sizeof(struct Vmxnet3_TxDesc) * ring->size);
1095 
1096 	/* data_ring initialization */
1097 	data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1098 	data_ring->basePA = comp_ring->basePA +
1099 			(sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1100 
1101 	/* cmd_ring0 buf_info allocation */
1102 	ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1103 				     ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1104 	if (ring->buf_info == NULL) {
1105 		PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1106 		return -ENOMEM;
1107 	}
1108 
1109 	/* Update the data portion with txq */
1110 	dev->data->tx_queues[queue_idx] = txq;
1111 
1112 	return 0;
1113 }
1114 
1115 int
1116 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1117 			   uint16_t queue_idx,
1118 			   uint16_t nb_desc,
1119 			   unsigned int socket_id,
1120 			   __rte_unused const struct rte_eth_rxconf *rx_conf,
1121 			   struct rte_mempool *mp)
1122 {
1123 	const struct rte_memzone *mz;
1124 	struct vmxnet3_rx_queue *rxq;
1125 	struct vmxnet3_hw *hw = dev->data->dev_private;
1126 	struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1127 	struct vmxnet3_comp_ring *comp_ring;
1128 	struct vmxnet3_rx_data_ring *data_ring;
1129 	int size;
1130 	uint8_t i;
1131 	char mem_name[32];
1132 
1133 	PMD_INIT_FUNC_TRACE();
1134 
1135 	rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1136 			  RTE_CACHE_LINE_SIZE);
1137 	if (rxq == NULL) {
1138 		PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1139 		return -ENOMEM;
1140 	}
1141 
1142 	rxq->mp = mp;
1143 	rxq->queue_id = queue_idx;
1144 	rxq->port_id = dev->data->port_id;
1145 	rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1146 	rxq->hw = hw;
1147 	rxq->qid1 = queue_idx;
1148 	rxq->qid2 = queue_idx + hw->num_rx_queues;
1149 	rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1150 	rxq->data_desc_size = hw->rxdata_desc_size;
1151 	rxq->stopped = TRUE;
1152 
1153 	ring0 = &rxq->cmd_ring[0];
1154 	ring1 = &rxq->cmd_ring[1];
1155 	comp_ring = &rxq->comp_ring;
1156 	data_ring = &rxq->data_ring;
1157 
1158 	/* Rx vmxnet rings length should be between 256-4096 */
1159 	if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1160 		PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1161 		return -EINVAL;
1162 	} else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1163 		PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1164 		return -EINVAL;
1165 	} else {
1166 		ring0->size = nb_desc;
1167 		ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1168 		ring1->size = ring0->size;
1169 	}
1170 
1171 	comp_ring->size = ring0->size + ring1->size;
1172 	data_ring->size = ring0->size;
1173 
1174 	/* Rx vmxnet rings structure initialization */
1175 	ring0->next2fill = 0;
1176 	ring1->next2fill = 0;
1177 	ring0->next2comp = 0;
1178 	ring1->next2comp = 0;
1179 	ring0->gen = VMXNET3_INIT_GEN;
1180 	ring1->gen = VMXNET3_INIT_GEN;
1181 	comp_ring->next2proc = 0;
1182 	comp_ring->gen = VMXNET3_INIT_GEN;
1183 
1184 	size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1185 	size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1186 	if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1187 		size += rxq->data_desc_size * data_ring->size;
1188 
1189 	mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1190 				      VMXNET3_RING_BA_ALIGN, socket_id);
1191 	if (mz == NULL) {
1192 		PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1193 		return -ENOMEM;
1194 	}
1195 	rxq->mz = mz;
1196 	memset(mz->addr, 0, mz->len);
1197 
1198 	/* cmd_ring0 initialization */
1199 	ring0->base = mz->addr;
1200 	ring0->basePA = mz->iova;
1201 
1202 	/* cmd_ring1 initialization */
1203 	ring1->base = ring0->base + ring0->size;
1204 	ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1205 
1206 	/* comp_ring initialization */
1207 	comp_ring->base = ring1->base + ring1->size;
1208 	comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1209 		ring1->size;
1210 
1211 	/* data_ring initialization */
1212 	if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1213 		data_ring->base =
1214 			(uint8_t *)(comp_ring->base + comp_ring->size);
1215 		data_ring->basePA = comp_ring->basePA +
1216 			sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1217 	}
1218 
1219 	/* cmd_ring0-cmd_ring1 buf_info allocation */
1220 	for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1221 
1222 		ring = &rxq->cmd_ring[i];
1223 		ring->rid = i;
1224 		snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1225 
1226 		ring->buf_info = rte_zmalloc(mem_name,
1227 					     ring->size * sizeof(vmxnet3_buf_info_t),
1228 					     RTE_CACHE_LINE_SIZE);
1229 		if (ring->buf_info == NULL) {
1230 			PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1231 			return -ENOMEM;
1232 		}
1233 	}
1234 
1235 	/* Update the data portion with rxq */
1236 	dev->data->rx_queues[queue_idx] = rxq;
1237 
1238 	return 0;
1239 }
1240 
1241 /*
1242  * Initializes Receive Unit
1243  * Load mbufs in rx queue in advance
1244  */
1245 int
1246 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1247 {
1248 	struct vmxnet3_hw *hw = dev->data->dev_private;
1249 
1250 	int i, ret;
1251 	uint8_t j;
1252 
1253 	PMD_INIT_FUNC_TRACE();
1254 
1255 	for (i = 0; i < hw->num_rx_queues; i++) {
1256 		vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1257 
1258 		for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1259 			/* Passing 0 as alloc_num will allocate full ring */
1260 			ret = vmxnet3_post_rx_bufs(rxq, j);
1261 			if (ret <= 0) {
1262 				PMD_INIT_LOG(ERR,
1263 					     "ERROR: Posting Rxq: %d buffers ring: %d",
1264 					     i, j);
1265 				return -ret;
1266 			}
1267 			/*
1268 			 * Updating device with the index:next2fill to fill the
1269 			 * mbufs for coming packets.
1270 			 */
1271 			if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1272 				VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1273 						       rxq->cmd_ring[j].next2fill);
1274 			}
1275 		}
1276 		rxq->stopped = FALSE;
1277 		rxq->start_seg = NULL;
1278 	}
1279 
1280 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1281 		struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1282 
1283 		txq->stopped = FALSE;
1284 	}
1285 
1286 	return 0;
1287 }
1288 
1289 static uint8_t rss_intel_key[40] = {
1290 	0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1291 	0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1292 	0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1293 	0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1294 	0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1295 };
1296 
1297 /*
1298  * Additional RSS configurations based on vmxnet v4+ APIs
1299  */
1300 int
1301 vmxnet3_v4_rss_configure(struct rte_eth_dev *dev)
1302 {
1303 	struct vmxnet3_hw *hw = dev->data->dev_private;
1304 	Vmxnet3_DriverShared *shared = hw->shared;
1305 	Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
1306 	struct rte_eth_rss_conf *port_rss_conf;
1307 	uint64_t rss_hf;
1308 	uint32_t ret;
1309 
1310 	PMD_INIT_FUNC_TRACE();
1311 
1312 	cmdInfo->setRSSFields = 0;
1313 	port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1314 	rss_hf = port_rss_conf->rss_hf &
1315 		(VMXNET3_V4_RSS_MASK | VMXNET3_RSS_OFFLOAD_ALL);
1316 
1317 	if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1318 		cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP4;
1319 	if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1320 		cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP6;
1321 	if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
1322 		cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP4;
1323 	if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
1324 		cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP6;
1325 
1326 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1327 			       VMXNET3_CMD_SET_RSS_FIELDS);
1328 	ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
1329 
1330 	if (ret != VMXNET3_SUCCESS) {
1331 		PMD_DRV_LOG(ERR, "Set RSS fields (v4) failed: %d", ret);
1332 	}
1333 
1334 	return ret;
1335 }
1336 
1337 /*
1338  * Configure RSS feature
1339  */
1340 int
1341 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1342 {
1343 	struct vmxnet3_hw *hw = dev->data->dev_private;
1344 	struct VMXNET3_RSSConf *dev_rss_conf;
1345 	struct rte_eth_rss_conf *port_rss_conf;
1346 	uint64_t rss_hf;
1347 	uint8_t i, j;
1348 
1349 	PMD_INIT_FUNC_TRACE();
1350 
1351 	dev_rss_conf = hw->rss_conf;
1352 	port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1353 
1354 	/* loading hashFunc */
1355 	dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1356 	/* loading hashKeySize */
1357 	dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1358 	/* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1359 	dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1360 
1361 	if (port_rss_conf->rss_key == NULL) {
1362 		/* Default hash key */
1363 		port_rss_conf->rss_key = rss_intel_key;
1364 	}
1365 
1366 	/* loading hashKey */
1367 	memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1368 	       dev_rss_conf->hashKeySize);
1369 
1370 	/* loading indTable */
1371 	for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1372 		if (j == dev->data->nb_rx_queues)
1373 			j = 0;
1374 		dev_rss_conf->indTable[i] = j;
1375 	}
1376 
1377 	/* loading hashType */
1378 	dev_rss_conf->hashType = 0;
1379 	rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1380 	if (rss_hf & ETH_RSS_IPV4)
1381 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1382 	if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1383 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1384 	if (rss_hf & ETH_RSS_IPV6)
1385 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1386 	if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1387 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1388 
1389 	return VMXNET3_SUCCESS;
1390 }
1391