xref: /dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c (revision 2490bb897182f57de80fd924dd3ae48dda819b8c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4 
5 #include <sys/queue.h>
6 
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15 
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
22 #include <rte_pci.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
33 #include <rte_mbuf.h>
34 #include <rte_ether.h>
35 #include <ethdev_driver.h>
36 #include <rte_prefetch.h>
37 #include <rte_ip.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_net.h>
44 
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
47 
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
50 
51 #define	VMXNET3_TX_OFFLOAD_MASK	(RTE_MBUF_F_TX_VLAN | \
52 		RTE_MBUF_F_TX_IPV6 |     \
53 		RTE_MBUF_F_TX_IPV4 |     \
54 		RTE_MBUF_F_TX_L4_MASK |  \
55 		RTE_MBUF_F_TX_TCP_SEG)
56 
57 #define	VMXNET3_TX_OFFLOAD_NOTSUP_MASK	\
58 	(RTE_MBUF_F_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
59 
60 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
61 
62 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
63 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
64 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
65 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
66 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
67 #endif
68 
69 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
70 static void
71 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
72 {
73 	uint32_t avail = 0;
74 
75 	if (rxq == NULL)
76 		return;
77 
78 	PMD_RX_LOG(DEBUG,
79 		   "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
80 		   rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
81 	PMD_RX_LOG(DEBUG,
82 		   "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
83 		   (unsigned long)rxq->cmd_ring[0].basePA,
84 		   (unsigned long)rxq->cmd_ring[1].basePA,
85 		   (unsigned long)rxq->comp_ring.basePA);
86 
87 	avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
88 	PMD_RX_LOG(DEBUG,
89 		   "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
90 		   (uint32_t)rxq->cmd_ring[0].size, avail,
91 		   rxq->comp_ring.next2proc,
92 		   rxq->cmd_ring[0].size - avail);
93 
94 	avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
95 	PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
96 		   (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
97 		   rxq->cmd_ring[1].size - avail);
98 
99 }
100 
101 static void
102 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
103 {
104 	uint32_t avail = 0;
105 
106 	if (txq == NULL)
107 		return;
108 
109 	PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
110 		   txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
111 	PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
112 		   (unsigned long)txq->cmd_ring.basePA,
113 		   (unsigned long)txq->comp_ring.basePA,
114 		   (unsigned long)txq->data_ring.basePA);
115 
116 	avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
117 	PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
118 		   (uint32_t)txq->cmd_ring.size, avail,
119 		   txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
120 }
121 #endif
122 
123 static void
124 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
125 {
126 	while (ring->next2comp != ring->next2fill) {
127 		/* No need to worry about desc ownership, device is quiesced by now. */
128 		vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
129 
130 		if (buf_info->m) {
131 			rte_pktmbuf_free(buf_info->m);
132 			buf_info->m = NULL;
133 			buf_info->bufPA = 0;
134 			buf_info->len = 0;
135 		}
136 		vmxnet3_cmd_ring_adv_next2comp(ring);
137 	}
138 }
139 
140 static void
141 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
142 {
143 	uint32_t i;
144 
145 	for (i = 0; i < ring->size; i++) {
146 		/* No need to worry about desc ownership, device is quiesced by now. */
147 		vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
148 
149 		if (buf_info->m) {
150 			rte_pktmbuf_free_seg(buf_info->m);
151 			buf_info->m = NULL;
152 			buf_info->bufPA = 0;
153 			buf_info->len = 0;
154 		}
155 		vmxnet3_cmd_ring_adv_next2comp(ring);
156 	}
157 }
158 
159 static void
160 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
161 {
162 	rte_free(ring->buf_info);
163 	ring->buf_info = NULL;
164 }
165 
166 void
167 vmxnet3_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
168 {
169 	vmxnet3_tx_queue_t *tq = dev->data->tx_queues[qid];
170 
171 	if (tq != NULL) {
172 		/* Release mbufs */
173 		vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
174 		/* Release the cmd_ring */
175 		vmxnet3_cmd_ring_release(&tq->cmd_ring);
176 		/* Release the memzone */
177 		rte_memzone_free(tq->mz);
178 		/* Release the queue */
179 		rte_free(tq);
180 	}
181 }
182 
183 void
184 vmxnet3_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
185 {
186 	int i;
187 	vmxnet3_rx_queue_t *rq = dev->data->rx_queues[qid];
188 
189 	if (rq != NULL) {
190 		/* Release mbufs */
191 		for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
192 			vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
193 
194 		/* Release both the cmd_rings */
195 		for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
196 			vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
197 
198 		/* Release the memzone */
199 		rte_memzone_free(rq->mz);
200 
201 		/* Release the queue */
202 		rte_free(rq);
203 	}
204 }
205 
206 static void
207 vmxnet3_dev_tx_queue_reset(void *txq)
208 {
209 	vmxnet3_tx_queue_t *tq = txq;
210 	struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
211 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
212 	struct vmxnet3_data_ring *data_ring = &tq->data_ring;
213 	int size;
214 
215 	if (tq != NULL) {
216 		/* Release the cmd_ring mbufs */
217 		vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
218 	}
219 
220 	/* Tx vmxnet rings structure initialization*/
221 	ring->next2fill = 0;
222 	ring->next2comp = 0;
223 	ring->gen = VMXNET3_INIT_GEN;
224 	comp_ring->next2proc = 0;
225 	comp_ring->gen = VMXNET3_INIT_GEN;
226 
227 	size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
228 	size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
229 	size += tq->txdata_desc_size * data_ring->size;
230 
231 	memset(ring->base, 0, size);
232 }
233 
234 static void
235 vmxnet3_dev_rx_queue_reset(void *rxq)
236 {
237 	int i;
238 	vmxnet3_rx_queue_t *rq = rxq;
239 	struct vmxnet3_hw *hw = rq->hw;
240 	struct vmxnet3_cmd_ring *ring0, *ring1;
241 	struct vmxnet3_comp_ring *comp_ring;
242 	struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
243 	int size;
244 
245 	/* Release both the cmd_rings mbufs */
246 	for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
247 		vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
248 
249 	ring0 = &rq->cmd_ring[0];
250 	ring1 = &rq->cmd_ring[1];
251 	comp_ring = &rq->comp_ring;
252 
253 	/* Rx vmxnet rings structure initialization */
254 	ring0->next2fill = 0;
255 	ring1->next2fill = 0;
256 	ring0->next2comp = 0;
257 	ring1->next2comp = 0;
258 	ring0->gen = VMXNET3_INIT_GEN;
259 	ring1->gen = VMXNET3_INIT_GEN;
260 	comp_ring->next2proc = 0;
261 	comp_ring->gen = VMXNET3_INIT_GEN;
262 
263 	size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
264 	size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
265 	if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
266 		size += rq->data_desc_size * data_ring->size;
267 
268 	memset(ring0->base, 0, size);
269 }
270 
271 void
272 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
273 {
274 	unsigned i;
275 
276 	PMD_INIT_FUNC_TRACE();
277 
278 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
279 		struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
280 
281 		if (txq != NULL) {
282 			txq->stopped = TRUE;
283 			vmxnet3_dev_tx_queue_reset(txq);
284 		}
285 	}
286 
287 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
288 		struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
289 
290 		if (rxq != NULL) {
291 			rxq->stopped = TRUE;
292 			vmxnet3_dev_rx_queue_reset(rxq);
293 		}
294 	}
295 }
296 
297 static int
298 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
299 {
300 	int completed = 0;
301 	struct rte_mbuf *mbuf;
302 
303 	/* Release cmd_ring descriptor and free mbuf */
304 	RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
305 
306 	mbuf = txq->cmd_ring.buf_info[eop_idx].m;
307 	if (mbuf == NULL)
308 		rte_panic("EOP desc does not point to a valid mbuf");
309 	rte_pktmbuf_free(mbuf);
310 
311 	txq->cmd_ring.buf_info[eop_idx].m = NULL;
312 
313 	while (txq->cmd_ring.next2comp != eop_idx) {
314 		/* no out-of-order completion */
315 		RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
316 		vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
317 		completed++;
318 	}
319 
320 	/* Mark the txd for which tcd was generated as completed */
321 	vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
322 
323 	return completed + 1;
324 }
325 
326 static void
327 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
328 {
329 	int completed = 0;
330 	vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
331 	struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
332 		(comp_ring->base + comp_ring->next2proc);
333 
334 	while (tcd->gen == comp_ring->gen) {
335 		completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
336 
337 		vmxnet3_comp_ring_adv_next2proc(comp_ring);
338 		tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
339 						    comp_ring->next2proc);
340 	}
341 
342 	PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
343 
344 	/* To avoid compiler warnings when not in DEBUG mode. */
345 	RTE_SET_USED(completed);
346 }
347 
348 uint16_t
349 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
350 	uint16_t nb_pkts)
351 {
352 	int32_t ret;
353 	uint32_t i;
354 	uint64_t ol_flags;
355 	struct rte_mbuf *m;
356 
357 	for (i = 0; i != nb_pkts; i++) {
358 		m = tx_pkts[i];
359 		ol_flags = m->ol_flags;
360 
361 		/* Non-TSO packet cannot occupy more than
362 		 * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
363 		 */
364 		if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0 &&
365 				m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
366 			rte_errno = EINVAL;
367 			return i;
368 		}
369 
370 		/* check that only supported TX offloads are requested. */
371 		if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
372 				(ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
373 				RTE_MBUF_F_TX_SCTP_CKSUM) {
374 			rte_errno = ENOTSUP;
375 			return i;
376 		}
377 
378 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
379 		ret = rte_validate_tx_offload(m);
380 		if (ret != 0) {
381 			rte_errno = -ret;
382 			return i;
383 		}
384 #endif
385 		ret = rte_net_intel_cksum_prepare(m);
386 		if (ret != 0) {
387 			rte_errno = -ret;
388 			return i;
389 		}
390 	}
391 
392 	return i;
393 }
394 
395 uint16_t
396 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
397 		  uint16_t nb_pkts)
398 {
399 	uint16_t nb_tx;
400 	vmxnet3_tx_queue_t *txq = tx_queue;
401 	struct vmxnet3_hw *hw = txq->hw;
402 	Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
403 	uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
404 
405 	if (unlikely(txq->stopped)) {
406 		PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
407 		return 0;
408 	}
409 
410 	/* Free up the comp_descriptors aggressively */
411 	vmxnet3_tq_tx_complete(txq);
412 
413 	nb_tx = 0;
414 	while (nb_tx < nb_pkts) {
415 		Vmxnet3_GenericDesc *gdesc;
416 		vmxnet3_buf_info_t *tbi;
417 		uint32_t first2fill, avail, dw2;
418 		struct rte_mbuf *txm = tx_pkts[nb_tx];
419 		struct rte_mbuf *m_seg = txm;
420 		int copy_size = 0;
421 		bool tso = (txm->ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0;
422 		/* # of descriptors needed for a packet. */
423 		unsigned count = txm->nb_segs;
424 
425 		avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
426 		if (count > avail) {
427 			/* Is command ring full? */
428 			if (unlikely(avail == 0)) {
429 				PMD_TX_LOG(DEBUG, "No free ring descriptors");
430 				txq->stats.tx_ring_full++;
431 				txq->stats.drop_total += (nb_pkts - nb_tx);
432 				break;
433 			}
434 
435 			/* Command ring is not full but cannot handle the
436 			 * multi-segmented packet. Let's try the next packet
437 			 * in this case.
438 			 */
439 			PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
440 				   "(avail %d needed %d)", avail, count);
441 			txq->stats.drop_total++;
442 			if (tso)
443 				txq->stats.drop_tso++;
444 			rte_pktmbuf_free(txm);
445 			nb_tx++;
446 			continue;
447 		}
448 
449 		/* Drop non-TSO packet that is excessively fragmented */
450 		if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
451 			PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
452 				   "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
453 			txq->stats.drop_too_many_segs++;
454 			txq->stats.drop_total++;
455 			rte_pktmbuf_free(txm);
456 			nb_tx++;
457 			continue;
458 		}
459 
460 		if (txm->nb_segs == 1 &&
461 		    rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
462 			struct Vmxnet3_TxDataDesc *tdd;
463 
464 			/* Skip empty packets */
465 			if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
466 				txq->stats.drop_total++;
467 				rte_pktmbuf_free(txm);
468 				nb_tx++;
469 				continue;
470 			}
471 
472 			tdd = (struct Vmxnet3_TxDataDesc *)
473 				((uint8 *)txq->data_ring.base +
474 				 txq->cmd_ring.next2fill *
475 				 txq->txdata_desc_size);
476 			copy_size = rte_pktmbuf_pkt_len(txm);
477 			rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
478 		}
479 
480 		/* use the previous gen bit for the SOP desc */
481 		dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
482 		first2fill = txq->cmd_ring.next2fill;
483 		do {
484 			/* Remember the transmit buffer for cleanup */
485 			tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
486 
487 			/* NB: the following assumes that VMXNET3 maximum
488 			 * transmit buffer size (16K) is greater than
489 			 * maximum size of mbuf segment size.
490 			 */
491 			gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
492 
493 			/* Skip empty segments */
494 			if (unlikely(m_seg->data_len == 0))
495 				continue;
496 
497 			if (copy_size) {
498 				uint64 offset =
499 					(uint64)txq->cmd_ring.next2fill *
500 							txq->txdata_desc_size;
501 				gdesc->txd.addr =
502 					rte_cpu_to_le_64(txq->data_ring.basePA +
503 							 offset);
504 			} else {
505 				gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
506 			}
507 
508 			gdesc->dword[2] = dw2 | m_seg->data_len;
509 			gdesc->dword[3] = 0;
510 
511 			/* move to the next2fill descriptor */
512 			vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
513 
514 			/* use the right gen for non-SOP desc */
515 			dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
516 		} while ((m_seg = m_seg->next) != NULL);
517 
518 		/* set the last buf_info for the pkt */
519 		tbi->m = txm;
520 		/* Update the EOP descriptor */
521 		gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
522 
523 		/* Add VLAN tag if present */
524 		gdesc = txq->cmd_ring.base + first2fill;
525 		if (txm->ol_flags & RTE_MBUF_F_TX_VLAN) {
526 			gdesc->txd.ti = 1;
527 			gdesc->txd.tci = txm->vlan_tci;
528 		}
529 
530 		if (tso) {
531 			uint16_t mss = txm->tso_segsz;
532 
533 			RTE_ASSERT(mss > 0);
534 
535 			gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
536 			gdesc->txd.om = VMXNET3_OM_TSO;
537 			gdesc->txd.msscof = mss;
538 
539 			deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
540 		} else if (txm->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
541 			gdesc->txd.om = VMXNET3_OM_CSUM;
542 			gdesc->txd.hlen = txm->l2_len + txm->l3_len;
543 
544 			switch (txm->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
545 			case RTE_MBUF_F_TX_TCP_CKSUM:
546 				gdesc->txd.msscof = gdesc->txd.hlen +
547 					offsetof(struct rte_tcp_hdr, cksum);
548 				break;
549 			case RTE_MBUF_F_TX_UDP_CKSUM:
550 				gdesc->txd.msscof = gdesc->txd.hlen +
551 					offsetof(struct rte_udp_hdr,
552 						dgram_cksum);
553 				break;
554 			default:
555 				PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
556 					   txm->ol_flags & RTE_MBUF_F_TX_L4_MASK);
557 				abort();
558 			}
559 			deferred++;
560 		} else {
561 			gdesc->txd.hlen = 0;
562 			gdesc->txd.om = VMXNET3_OM_NONE;
563 			gdesc->txd.msscof = 0;
564 			deferred++;
565 		}
566 
567 		/* flip the GEN bit on the SOP */
568 		rte_compiler_barrier();
569 		gdesc->dword[2] ^= VMXNET3_TXD_GEN;
570 
571 		txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
572 		nb_tx++;
573 	}
574 
575 	PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
576 
577 	if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
578 		txq_ctrl->txNumDeferred = 0;
579 		/* Notify vSwitch that packets are available. */
580 		VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
581 				       txq->cmd_ring.next2fill);
582 	}
583 
584 	return nb_tx;
585 }
586 
587 static inline void
588 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
589 		   struct rte_mbuf *mbuf)
590 {
591 	uint32_t val;
592 	struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
593 	struct Vmxnet3_RxDesc *rxd =
594 		(struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
595 	vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
596 
597 	if (ring_id == 0) {
598 		/* Usually: One HEAD type buf per packet
599 		 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
600 		 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
601 		 */
602 
603 		/* We use single packet buffer so all heads here */
604 		val = VMXNET3_RXD_BTYPE_HEAD;
605 	} else {
606 		/* All BODY type buffers for 2nd ring */
607 		val = VMXNET3_RXD_BTYPE_BODY;
608 	}
609 
610 	/*
611 	 * Load mbuf pointer into buf_info[ring_size]
612 	 * buf_info structure is equivalent to cookie for virtio-virtqueue
613 	 */
614 	buf_info->m = mbuf;
615 	buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
616 	buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
617 
618 	/* Load Rx Descriptor with the buffer's GPA */
619 	rxd->addr = buf_info->bufPA;
620 
621 	/* After this point rxd->addr MUST not be NULL */
622 	rxd->btype = val;
623 	rxd->len = buf_info->len;
624 	/* Flip gen bit at the end to change ownership */
625 	rxd->gen = ring->gen;
626 
627 	vmxnet3_cmd_ring_adv_next2fill(ring);
628 }
629 /*
630  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
631  *  so that device can receive packets in those buffers.
632  *  Ring layout:
633  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
634  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
635  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
636  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
637  *      only for LRO.
638  */
639 static int
640 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
641 {
642 	int err = 0;
643 	uint32_t i = 0;
644 	struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
645 
646 	while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
647 		struct rte_mbuf *mbuf;
648 
649 		/* Allocate blank mbuf for the current Rx Descriptor */
650 		mbuf = rte_mbuf_raw_alloc(rxq->mp);
651 		if (unlikely(mbuf == NULL)) {
652 			PMD_RX_LOG(ERR, "Error allocating mbuf");
653 			rxq->stats.rx_buf_alloc_failure++;
654 			err = ENOMEM;
655 			break;
656 		}
657 
658 		vmxnet3_renew_desc(rxq, ring_id, mbuf);
659 		i++;
660 	}
661 
662 	/* Return error only if no buffers are posted at present */
663 	if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
664 		return -err;
665 	else
666 		return i;
667 }
668 
669 /* MSS not provided by vmxnet3, guess one with available information */
670 static uint16_t
671 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
672 		struct rte_mbuf *rxm)
673 {
674 	uint32_t hlen, slen;
675 	struct rte_ipv4_hdr *ipv4_hdr;
676 	struct rte_ipv6_hdr *ipv6_hdr;
677 	struct rte_tcp_hdr *tcp_hdr;
678 	char *ptr;
679 	uint8_t segs;
680 
681 	RTE_ASSERT(rcd->tcp);
682 
683 	ptr = rte_pktmbuf_mtod(rxm, char *);
684 	slen = rte_pktmbuf_data_len(rxm);
685 	hlen = sizeof(struct rte_ether_hdr);
686 
687 	if (rcd->v4) {
688 		if (unlikely(slen < hlen + sizeof(struct rte_ipv4_hdr)))
689 			return hw->mtu - sizeof(struct rte_ipv4_hdr)
690 					- sizeof(struct rte_tcp_hdr);
691 
692 		ipv4_hdr = (struct rte_ipv4_hdr *)(ptr + hlen);
693 		hlen += rte_ipv4_hdr_len(ipv4_hdr);
694 	} else if (rcd->v6) {
695 		if (unlikely(slen < hlen + sizeof(struct rte_ipv6_hdr)))
696 			return hw->mtu - sizeof(struct rte_ipv6_hdr) -
697 					sizeof(struct rte_tcp_hdr);
698 
699 		ipv6_hdr = (struct rte_ipv6_hdr *)(ptr + hlen);
700 		hlen += sizeof(struct rte_ipv6_hdr);
701 		if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
702 			int frag;
703 
704 			rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
705 					&hlen, &frag);
706 		}
707 	}
708 
709 	if (unlikely(slen < hlen + sizeof(struct rte_tcp_hdr)))
710 		return hw->mtu - hlen - sizeof(struct rte_tcp_hdr) +
711 				sizeof(struct rte_ether_hdr);
712 
713 	tcp_hdr = (struct rte_tcp_hdr *)(ptr + hlen);
714 	hlen += (tcp_hdr->data_off & 0xf0) >> 2;
715 
716 	segs = *vmxnet3_segs_dynfield(rxm);
717 	if (segs > 1)
718 		return (rte_pktmbuf_pkt_len(rxm) - hlen + segs - 1) / segs;
719 	else
720 		return hw->mtu - hlen + sizeof(struct rte_ether_hdr);
721 }
722 
723 /* Receive side checksum and other offloads */
724 static inline void
725 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
726 		struct rte_mbuf *rxm, const uint8_t sop)
727 {
728 	uint64_t ol_flags = rxm->ol_flags;
729 	uint32_t packet_type = rxm->packet_type;
730 
731 	/* Offloads set in sop */
732 	if (sop) {
733 		/* Set packet type */
734 		packet_type |= RTE_PTYPE_L2_ETHER;
735 
736 		/* Check large packet receive */
737 		if (VMXNET3_VERSION_GE_2(hw) &&
738 		    rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
739 			const Vmxnet3_RxCompDescExt *rcde =
740 					(const Vmxnet3_RxCompDescExt *)rcd;
741 
742 			rxm->tso_segsz = rcde->mss;
743 			*vmxnet3_segs_dynfield(rxm) = rcde->segCnt;
744 			ol_flags |= RTE_MBUF_F_RX_LRO;
745 		}
746 	} else { /* Offloads set in eop */
747 		/* Check for RSS */
748 		if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
749 			ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
750 			rxm->hash.rss = rcd->rssHash;
751 		}
752 
753 		/* Check for hardware stripped VLAN tag */
754 		if (rcd->ts) {
755 			ol_flags |= (RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED);
756 			rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
757 		}
758 
759 		/* Check packet type, checksum errors, etc. */
760 		if (rcd->cnc) {
761 			ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN;
762 		} else {
763 			if (rcd->v4) {
764 				packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
765 
766 				if (rcd->ipc)
767 					ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
768 				else
769 					ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
770 
771 				if (rcd->tuc) {
772 					ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
773 					if (rcd->tcp)
774 						packet_type |= RTE_PTYPE_L4_TCP;
775 					else
776 						packet_type |= RTE_PTYPE_L4_UDP;
777 				} else {
778 					if (rcd->tcp) {
779 						packet_type |= RTE_PTYPE_L4_TCP;
780 						ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
781 					} else if (rcd->udp) {
782 						packet_type |= RTE_PTYPE_L4_UDP;
783 						ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
784 					}
785 				}
786 			} else if (rcd->v6) {
787 				packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
788 
789 				if (rcd->tuc) {
790 					ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
791 					if (rcd->tcp)
792 						packet_type |= RTE_PTYPE_L4_TCP;
793 					else
794 						packet_type |= RTE_PTYPE_L4_UDP;
795 				} else {
796 					if (rcd->tcp) {
797 						packet_type |= RTE_PTYPE_L4_TCP;
798 						ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
799 					} else if (rcd->udp) {
800 						packet_type |= RTE_PTYPE_L4_UDP;
801 						ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
802 					}
803 				}
804 			} else {
805 				packet_type |= RTE_PTYPE_UNKNOWN;
806 			}
807 
808 			/* Old variants of vmxnet3 do not provide MSS */
809 			if ((ol_flags & RTE_MBUF_F_RX_LRO) && rxm->tso_segsz == 0)
810 				rxm->tso_segsz = vmxnet3_guess_mss(hw,
811 						rcd, rxm);
812 		}
813 	}
814 
815 	rxm->ol_flags = ol_flags;
816 	rxm->packet_type = packet_type;
817 }
818 
819 /*
820  * Process the Rx Completion Ring of given vmxnet3_rx_queue
821  * for nb_pkts burst and return the number of packets received
822  */
823 uint16_t
824 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
825 {
826 	uint16_t nb_rx;
827 	uint32_t nb_rxd, idx;
828 	uint8_t ring_idx;
829 	vmxnet3_rx_queue_t *rxq;
830 	Vmxnet3_RxCompDesc *rcd;
831 	vmxnet3_buf_info_t *rbi;
832 	Vmxnet3_RxDesc *rxd;
833 	struct rte_mbuf *rxm = NULL;
834 	struct vmxnet3_hw *hw;
835 
836 	nb_rx = 0;
837 	ring_idx = 0;
838 	nb_rxd = 0;
839 	idx = 0;
840 
841 	rxq = rx_queue;
842 	hw = rxq->hw;
843 
844 	rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
845 
846 	if (unlikely(rxq->stopped)) {
847 		PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
848 		return 0;
849 	}
850 
851 	while (rcd->gen == rxq->comp_ring.gen) {
852 		struct rte_mbuf *newm;
853 
854 		if (nb_rx >= nb_pkts)
855 			break;
856 
857 		newm = rte_mbuf_raw_alloc(rxq->mp);
858 		if (unlikely(newm == NULL)) {
859 			PMD_RX_LOG(ERR, "Error allocating mbuf");
860 			rxq->stats.rx_buf_alloc_failure++;
861 			break;
862 		}
863 
864 		idx = rcd->rxdIdx;
865 		ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
866 		rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
867 		RTE_SET_USED(rxd); /* used only for assert when enabled */
868 		rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
869 
870 		PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
871 
872 		RTE_ASSERT(rcd->len <= rxd->len);
873 		RTE_ASSERT(rbi->m);
874 
875 		/* Get the packet buffer pointer from buf_info */
876 		rxm = rbi->m;
877 
878 		/* Clear descriptor associated buf_info to be reused */
879 		rbi->m = NULL;
880 		rbi->bufPA = 0;
881 
882 		/* Update the index that we received a packet */
883 		rxq->cmd_ring[ring_idx].next2comp = idx;
884 
885 		/* For RCD with EOP set, check if there is frame error */
886 		if (unlikely(rcd->eop && rcd->err)) {
887 			rxq->stats.drop_total++;
888 			rxq->stats.drop_err++;
889 
890 			if (!rcd->fcs) {
891 				rxq->stats.drop_fcs++;
892 				PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
893 			}
894 			PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
895 				   (int)(rcd - (struct Vmxnet3_RxCompDesc *)
896 					 rxq->comp_ring.base), rcd->rxdIdx);
897 			rte_pktmbuf_free_seg(rxm);
898 			if (rxq->start_seg) {
899 				struct rte_mbuf *start = rxq->start_seg;
900 
901 				rxq->start_seg = NULL;
902 				rte_pktmbuf_free(start);
903 			}
904 			goto rcd_done;
905 		}
906 
907 		/* Initialize newly received packet buffer */
908 		rxm->port = rxq->port_id;
909 		rxm->nb_segs = 1;
910 		rxm->next = NULL;
911 		rxm->pkt_len = (uint16_t)rcd->len;
912 		rxm->data_len = (uint16_t)rcd->len;
913 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
914 		rxm->ol_flags = 0;
915 		rxm->vlan_tci = 0;
916 		rxm->packet_type = 0;
917 
918 		/*
919 		 * If this is the first buffer of the received packet,
920 		 * set the pointer to the first mbuf of the packet
921 		 * Otherwise, update the total length and the number of segments
922 		 * of the current scattered packet, and update the pointer to
923 		 * the last mbuf of the current packet.
924 		 */
925 		if (rcd->sop) {
926 			RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
927 
928 			if (unlikely(rcd->len == 0)) {
929 				RTE_ASSERT(rcd->eop);
930 
931 				PMD_RX_LOG(DEBUG,
932 					   "Rx buf was skipped. rxring[%d][%d])",
933 					   ring_idx, idx);
934 				rte_pktmbuf_free_seg(rxm);
935 				goto rcd_done;
936 			}
937 
938 			if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
939 				uint8_t *rdd = rxq->data_ring.base +
940 					idx * rxq->data_desc_size;
941 
942 				RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
943 				rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
944 					   rdd, rcd->len);
945 			}
946 
947 			rxq->start_seg = rxm;
948 			rxq->last_seg = rxm;
949 			vmxnet3_rx_offload(hw, rcd, rxm, 1);
950 		} else {
951 			struct rte_mbuf *start = rxq->start_seg;
952 
953 			RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
954 
955 			if (likely(start && rxm->data_len > 0)) {
956 				start->pkt_len += rxm->data_len;
957 				start->nb_segs++;
958 
959 				rxq->last_seg->next = rxm;
960 				rxq->last_seg = rxm;
961 			} else {
962 				PMD_RX_LOG(ERR, "Error received empty or out of order frame.");
963 				rxq->stats.drop_total++;
964 				rxq->stats.drop_err++;
965 
966 				rte_pktmbuf_free_seg(rxm);
967 			}
968 		}
969 
970 		if (rcd->eop) {
971 			struct rte_mbuf *start = rxq->start_seg;
972 
973 			vmxnet3_rx_offload(hw, rcd, start, 0);
974 			rx_pkts[nb_rx++] = start;
975 			rxq->start_seg = NULL;
976 		}
977 
978 rcd_done:
979 		rxq->cmd_ring[ring_idx].next2comp = idx;
980 		VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
981 					  rxq->cmd_ring[ring_idx].size);
982 
983 		/* It's time to renew descriptors */
984 		vmxnet3_renew_desc(rxq, ring_idx, newm);
985 		if (unlikely(rxq->shared->ctrl.updateRxProd)) {
986 			VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
987 					       rxq->cmd_ring[ring_idx].next2fill);
988 		}
989 
990 		/* Advance to the next descriptor in comp_ring */
991 		vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
992 
993 		rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
994 		nb_rxd++;
995 		if (nb_rxd > rxq->cmd_ring[0].size) {
996 			PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
997 				   " relinquish control.");
998 			break;
999 		}
1000 	}
1001 
1002 	if (unlikely(nb_rxd == 0)) {
1003 		uint32_t avail;
1004 		for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1005 			avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
1006 			if (unlikely(avail > 0)) {
1007 				/* try to alloc new buf and renew descriptors */
1008 				vmxnet3_post_rx_bufs(rxq, ring_idx);
1009 			}
1010 		}
1011 		if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1012 			for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1013 				VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1014 						       rxq->cmd_ring[ring_idx].next2fill);
1015 			}
1016 		}
1017 	}
1018 
1019 	return nb_rx;
1020 }
1021 
1022 int
1023 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1024 			   uint16_t queue_idx,
1025 			   uint16_t nb_desc,
1026 			   unsigned int socket_id,
1027 			   const struct rte_eth_txconf *tx_conf __rte_unused)
1028 {
1029 	struct vmxnet3_hw *hw = dev->data->dev_private;
1030 	const struct rte_memzone *mz;
1031 	struct vmxnet3_tx_queue *txq;
1032 	struct vmxnet3_cmd_ring *ring;
1033 	struct vmxnet3_comp_ring *comp_ring;
1034 	struct vmxnet3_data_ring *data_ring;
1035 	int size;
1036 
1037 	PMD_INIT_FUNC_TRACE();
1038 
1039 	txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1040 			  RTE_CACHE_LINE_SIZE);
1041 	if (txq == NULL) {
1042 		PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1043 		return -ENOMEM;
1044 	}
1045 
1046 	txq->queue_id = queue_idx;
1047 	txq->port_id = dev->data->port_id;
1048 	txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1049 	txq->hw = hw;
1050 	txq->qid = queue_idx;
1051 	txq->stopped = TRUE;
1052 	txq->txdata_desc_size = hw->txdata_desc_size;
1053 
1054 	ring = &txq->cmd_ring;
1055 	comp_ring = &txq->comp_ring;
1056 	data_ring = &txq->data_ring;
1057 
1058 	/* Tx vmxnet ring length should be between 512-4096 */
1059 	if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1060 		PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1061 			     VMXNET3_DEF_TX_RING_SIZE);
1062 		return -EINVAL;
1063 	} else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1064 		PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1065 			     VMXNET3_TX_RING_MAX_SIZE);
1066 		return -EINVAL;
1067 	} else {
1068 		ring->size = nb_desc;
1069 		ring->size &= ~VMXNET3_RING_SIZE_MASK;
1070 	}
1071 	comp_ring->size = data_ring->size = ring->size;
1072 
1073 	/* Tx vmxnet rings structure initialization*/
1074 	ring->next2fill = 0;
1075 	ring->next2comp = 0;
1076 	ring->gen = VMXNET3_INIT_GEN;
1077 	comp_ring->next2proc = 0;
1078 	comp_ring->gen = VMXNET3_INIT_GEN;
1079 
1080 	size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1081 	size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1082 	size += txq->txdata_desc_size * data_ring->size;
1083 
1084 	mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1085 				      VMXNET3_RING_BA_ALIGN, socket_id);
1086 	if (mz == NULL) {
1087 		PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1088 		return -ENOMEM;
1089 	}
1090 	txq->mz = mz;
1091 	memset(mz->addr, 0, mz->len);
1092 
1093 	/* cmd_ring initialization */
1094 	ring->base = mz->addr;
1095 	ring->basePA = mz->iova;
1096 
1097 	/* comp_ring initialization */
1098 	comp_ring->base = ring->base + ring->size;
1099 	comp_ring->basePA = ring->basePA +
1100 		(sizeof(struct Vmxnet3_TxDesc) * ring->size);
1101 
1102 	/* data_ring initialization */
1103 	data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1104 	data_ring->basePA = comp_ring->basePA +
1105 			(sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1106 
1107 	/* cmd_ring0 buf_info allocation */
1108 	ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1109 				     ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1110 	if (ring->buf_info == NULL) {
1111 		PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1112 		return -ENOMEM;
1113 	}
1114 
1115 	/* Update the data portion with txq */
1116 	dev->data->tx_queues[queue_idx] = txq;
1117 
1118 	return 0;
1119 }
1120 
1121 int
1122 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1123 			   uint16_t queue_idx,
1124 			   uint16_t nb_desc,
1125 			   unsigned int socket_id,
1126 			   __rte_unused const struct rte_eth_rxconf *rx_conf,
1127 			   struct rte_mempool *mp)
1128 {
1129 	const struct rte_memzone *mz;
1130 	struct vmxnet3_rx_queue *rxq;
1131 	struct vmxnet3_hw *hw = dev->data->dev_private;
1132 	struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1133 	struct vmxnet3_comp_ring *comp_ring;
1134 	struct vmxnet3_rx_data_ring *data_ring;
1135 	int size;
1136 	uint8_t i;
1137 	char mem_name[32];
1138 
1139 	PMD_INIT_FUNC_TRACE();
1140 
1141 	rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1142 			  RTE_CACHE_LINE_SIZE);
1143 	if (rxq == NULL) {
1144 		PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1145 		return -ENOMEM;
1146 	}
1147 
1148 	rxq->mp = mp;
1149 	rxq->queue_id = queue_idx;
1150 	rxq->port_id = dev->data->port_id;
1151 	rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1152 	rxq->hw = hw;
1153 	rxq->qid1 = queue_idx;
1154 	rxq->qid2 = queue_idx + hw->num_rx_queues;
1155 	rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1156 	rxq->data_desc_size = hw->rxdata_desc_size;
1157 	rxq->stopped = TRUE;
1158 
1159 	ring0 = &rxq->cmd_ring[0];
1160 	ring1 = &rxq->cmd_ring[1];
1161 	comp_ring = &rxq->comp_ring;
1162 	data_ring = &rxq->data_ring;
1163 
1164 	/* Rx vmxnet rings length should be between 256-4096 */
1165 	if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1166 		PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1167 		return -EINVAL;
1168 	} else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1169 		PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1170 		return -EINVAL;
1171 	} else {
1172 		ring0->size = nb_desc;
1173 		ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1174 		ring1->size = ring0->size;
1175 	}
1176 
1177 	comp_ring->size = ring0->size + ring1->size;
1178 	data_ring->size = ring0->size;
1179 
1180 	/* Rx vmxnet rings structure initialization */
1181 	ring0->next2fill = 0;
1182 	ring1->next2fill = 0;
1183 	ring0->next2comp = 0;
1184 	ring1->next2comp = 0;
1185 	ring0->gen = VMXNET3_INIT_GEN;
1186 	ring1->gen = VMXNET3_INIT_GEN;
1187 	comp_ring->next2proc = 0;
1188 	comp_ring->gen = VMXNET3_INIT_GEN;
1189 
1190 	size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1191 	size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1192 	if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1193 		size += rxq->data_desc_size * data_ring->size;
1194 
1195 	mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1196 				      VMXNET3_RING_BA_ALIGN, socket_id);
1197 	if (mz == NULL) {
1198 		PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1199 		return -ENOMEM;
1200 	}
1201 	rxq->mz = mz;
1202 	memset(mz->addr, 0, mz->len);
1203 
1204 	/* cmd_ring0 initialization */
1205 	ring0->base = mz->addr;
1206 	ring0->basePA = mz->iova;
1207 
1208 	/* cmd_ring1 initialization */
1209 	ring1->base = ring0->base + ring0->size;
1210 	ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1211 
1212 	/* comp_ring initialization */
1213 	comp_ring->base = ring1->base + ring1->size;
1214 	comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1215 		ring1->size;
1216 
1217 	/* data_ring initialization */
1218 	if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1219 		data_ring->base =
1220 			(uint8_t *)(comp_ring->base + comp_ring->size);
1221 		data_ring->basePA = comp_ring->basePA +
1222 			sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1223 	}
1224 
1225 	/* cmd_ring0-cmd_ring1 buf_info allocation */
1226 	for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1227 
1228 		ring = &rxq->cmd_ring[i];
1229 		ring->rid = i;
1230 		snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1231 
1232 		ring->buf_info = rte_zmalloc(mem_name,
1233 					     ring->size * sizeof(vmxnet3_buf_info_t),
1234 					     RTE_CACHE_LINE_SIZE);
1235 		if (ring->buf_info == NULL) {
1236 			PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1237 			return -ENOMEM;
1238 		}
1239 	}
1240 
1241 	/* Update the data portion with rxq */
1242 	dev->data->rx_queues[queue_idx] = rxq;
1243 
1244 	return 0;
1245 }
1246 
1247 /*
1248  * Initializes Receive Unit
1249  * Load mbufs in rx queue in advance
1250  */
1251 int
1252 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1253 {
1254 	struct vmxnet3_hw *hw = dev->data->dev_private;
1255 
1256 	int i, ret;
1257 	uint8_t j;
1258 
1259 	PMD_INIT_FUNC_TRACE();
1260 
1261 	for (i = 0; i < hw->num_rx_queues; i++) {
1262 		vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1263 
1264 		for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1265 			/* Passing 0 as alloc_num will allocate full ring */
1266 			ret = vmxnet3_post_rx_bufs(rxq, j);
1267 			if (ret <= 0) {
1268 				PMD_INIT_LOG(ERR,
1269 					     "ERROR: Posting Rxq: %d buffers ring: %d",
1270 					     i, j);
1271 				return -ret;
1272 			}
1273 			/*
1274 			 * Updating device with the index:next2fill to fill the
1275 			 * mbufs for coming packets.
1276 			 */
1277 			if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1278 				VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1279 						       rxq->cmd_ring[j].next2fill);
1280 			}
1281 		}
1282 		rxq->stopped = FALSE;
1283 		rxq->start_seg = NULL;
1284 	}
1285 
1286 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1287 		struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1288 
1289 		txq->stopped = FALSE;
1290 	}
1291 
1292 	return 0;
1293 }
1294 
1295 static uint8_t rss_intel_key[40] = {
1296 	0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1297 	0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1298 	0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1299 	0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1300 	0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1301 };
1302 
1303 /*
1304  * Additional RSS configurations based on vmxnet v4+ APIs
1305  */
1306 int
1307 vmxnet3_v4_rss_configure(struct rte_eth_dev *dev)
1308 {
1309 	struct vmxnet3_hw *hw = dev->data->dev_private;
1310 	Vmxnet3_DriverShared *shared = hw->shared;
1311 	Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
1312 	struct rte_eth_rss_conf *port_rss_conf;
1313 	uint64_t rss_hf;
1314 	uint32_t ret;
1315 
1316 	PMD_INIT_FUNC_TRACE();
1317 
1318 	cmdInfo->setRSSFields = 0;
1319 	port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1320 
1321 	if ((port_rss_conf->rss_hf & VMXNET3_MANDATORY_V4_RSS) !=
1322 	    VMXNET3_MANDATORY_V4_RSS) {
1323 		PMD_INIT_LOG(WARNING, "RSS: IPv4/6 TCP is required for vmxnet3 v4 RSS,"
1324 			     "automatically setting it");
1325 		port_rss_conf->rss_hf |= VMXNET3_MANDATORY_V4_RSS;
1326 	}
1327 
1328 	rss_hf = port_rss_conf->rss_hf &
1329 		(VMXNET3_V4_RSS_MASK | VMXNET3_RSS_OFFLOAD_ALL);
1330 
1331 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1332 		cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP4;
1333 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1334 		cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP6;
1335 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
1336 		cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP4;
1337 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
1338 		cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP6;
1339 
1340 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1341 			       VMXNET3_CMD_SET_RSS_FIELDS);
1342 	ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
1343 
1344 	if (ret != VMXNET3_SUCCESS) {
1345 		PMD_DRV_LOG(ERR, "Set RSS fields (v4) failed: %d", ret);
1346 	}
1347 
1348 	return ret;
1349 }
1350 
1351 /*
1352  * Configure RSS feature
1353  */
1354 int
1355 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1356 {
1357 	struct vmxnet3_hw *hw = dev->data->dev_private;
1358 	struct VMXNET3_RSSConf *dev_rss_conf;
1359 	struct rte_eth_rss_conf *port_rss_conf;
1360 	uint64_t rss_hf;
1361 	uint8_t i, j;
1362 
1363 	PMD_INIT_FUNC_TRACE();
1364 
1365 	dev_rss_conf = hw->rss_conf;
1366 	port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1367 
1368 	/* loading hashFunc */
1369 	dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1370 	/* loading hashKeySize */
1371 	dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1372 	/* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1373 	dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1374 
1375 	if (port_rss_conf->rss_key == NULL) {
1376 		/* Default hash key */
1377 		port_rss_conf->rss_key = rss_intel_key;
1378 	}
1379 
1380 	/* loading hashKey */
1381 	memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1382 	       dev_rss_conf->hashKeySize);
1383 
1384 	/* loading indTable */
1385 	for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1386 		if (j == dev->data->nb_rx_queues)
1387 			j = 0;
1388 		dev_rss_conf->indTable[i] = j;
1389 	}
1390 
1391 	/* loading hashType */
1392 	dev_rss_conf->hashType = 0;
1393 	rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1394 	if (rss_hf & RTE_ETH_RSS_IPV4)
1395 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1396 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1397 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1398 	if (rss_hf & RTE_ETH_RSS_IPV6)
1399 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1400 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1401 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1402 
1403 	return VMXNET3_SUCCESS;
1404 }
1405