xref: /dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c (revision f665790a5dbad7b645ff46f31d65e977324e7bfc)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4 
5 #include <sys/queue.h>
6 
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15 
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
22 #include <rte_pci.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
33 #include <rte_mbuf.h>
34 #include <rte_ether.h>
35 #include <ethdev_driver.h>
36 #include <rte_prefetch.h>
37 #include <rte_ip.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_net.h>
44 
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
47 
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
50 
51 #define	VMXNET3_TX_OFFLOAD_MASK	(RTE_MBUF_F_TX_VLAN | \
52 		RTE_MBUF_F_TX_IPV6 |     \
53 		RTE_MBUF_F_TX_IPV4 |     \
54 		RTE_MBUF_F_TX_L4_MASK |  \
55 		RTE_MBUF_F_TX_TCP_SEG)
56 
57 #define	VMXNET3_TX_OFFLOAD_NOTSUP_MASK	\
58 	(RTE_MBUF_F_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
59 
60 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
61 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
62 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
63 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
64 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
65 #endif
66 
67 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
68 static void
69 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
70 {
71 	uint32_t avail = 0;
72 
73 	if (rxq == NULL)
74 		return;
75 
76 	PMD_RX_LOG(DEBUG,
77 		   "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
78 		   rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
79 	PMD_RX_LOG(DEBUG,
80 		   "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
81 		   (unsigned long)rxq->cmd_ring[0].basePA,
82 		   (unsigned long)rxq->cmd_ring[1].basePA,
83 		   (unsigned long)rxq->comp_ring.basePA);
84 
85 	avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
86 	PMD_RX_LOG(DEBUG,
87 		   "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
88 		   (uint32_t)rxq->cmd_ring[0].size, avail,
89 		   rxq->comp_ring.next2proc,
90 		   rxq->cmd_ring[0].size - avail);
91 
92 	avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
93 	PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
94 		   (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
95 		   rxq->cmd_ring[1].size - avail);
96 
97 }
98 
99 static void
100 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
101 {
102 	uint32_t avail = 0;
103 
104 	if (txq == NULL)
105 		return;
106 
107 	PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
108 		   txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
109 	PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
110 		   (unsigned long)txq->cmd_ring.basePA,
111 		   (unsigned long)txq->comp_ring.basePA,
112 		   (unsigned long)txq->data_ring.basePA);
113 
114 	avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
115 	PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
116 		   (uint32_t)txq->cmd_ring.size, avail,
117 		   txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
118 }
119 #endif
120 
121 static void
122 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
123 {
124 	while (ring->next2comp != ring->next2fill) {
125 		/* No need to worry about desc ownership, device is quiesced by now. */
126 		vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
127 
128 		if (buf_info->m) {
129 			rte_pktmbuf_free(buf_info->m);
130 			buf_info->m = NULL;
131 			buf_info->bufPA = 0;
132 			buf_info->len = 0;
133 		}
134 		vmxnet3_cmd_ring_adv_next2comp(ring);
135 	}
136 }
137 
138 static void
139 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
140 {
141 	uint32_t i;
142 
143 	for (i = 0; i < ring->size; i++) {
144 		/* No need to worry about desc ownership, device is quiesced by now. */
145 		vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
146 
147 		if (buf_info->m) {
148 			rte_pktmbuf_free_seg(buf_info->m);
149 			buf_info->m = NULL;
150 			buf_info->bufPA = 0;
151 			buf_info->len = 0;
152 		}
153 		vmxnet3_cmd_ring_adv_next2comp(ring);
154 	}
155 }
156 
157 static void
158 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
159 {
160 	rte_free(ring->buf_info);
161 	ring->buf_info = NULL;
162 }
163 
164 void
165 vmxnet3_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
166 {
167 	vmxnet3_tx_queue_t *tq = dev->data->tx_queues[qid];
168 
169 	if (tq != NULL) {
170 		/* Release mbufs */
171 		vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
172 		/* Release the cmd_ring */
173 		vmxnet3_cmd_ring_release(&tq->cmd_ring);
174 		/* Release the memzone */
175 		rte_memzone_free(tq->mz);
176 		/* Release the queue */
177 		rte_free(tq);
178 	}
179 }
180 
181 void
182 vmxnet3_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
183 {
184 	int i;
185 	vmxnet3_rx_queue_t *rq = dev->data->rx_queues[qid];
186 
187 	if (rq != NULL) {
188 		/* Release mbufs */
189 		for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
190 			vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
191 
192 		/* Release both the cmd_rings */
193 		for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
194 			vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
195 
196 		/* Release the memzone */
197 		rte_memzone_free(rq->mz);
198 
199 		/* Release the queue */
200 		rte_free(rq);
201 	}
202 }
203 
204 static void
205 vmxnet3_dev_tx_queue_reset(void *txq)
206 {
207 	vmxnet3_tx_queue_t *tq = txq;
208 	struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
209 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
210 	struct vmxnet3_data_ring *data_ring = &tq->data_ring;
211 	int size;
212 
213 	if (tq != NULL) {
214 		/* Release the cmd_ring mbufs */
215 		vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
216 	}
217 
218 	/* Tx vmxnet rings structure initialization*/
219 	ring->next2fill = 0;
220 	ring->next2comp = 0;
221 	ring->gen = VMXNET3_INIT_GEN;
222 	comp_ring->next2proc = 0;
223 	comp_ring->gen = VMXNET3_INIT_GEN;
224 
225 	size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
226 	size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
227 	size += tq->txdata_desc_size * data_ring->size;
228 
229 	memset(ring->base, 0, size);
230 }
231 
232 static void
233 vmxnet3_dev_rx_queue_reset(void *rxq)
234 {
235 	int i;
236 	vmxnet3_rx_queue_t *rq = rxq;
237 	struct vmxnet3_hw *hw = rq->hw;
238 	struct vmxnet3_cmd_ring *ring0, *ring1;
239 	struct vmxnet3_comp_ring *comp_ring;
240 	struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
241 	int size;
242 
243 	/* Release both the cmd_rings mbufs */
244 	for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
245 		vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
246 
247 	ring0 = &rq->cmd_ring[0];
248 	ring1 = &rq->cmd_ring[1];
249 	comp_ring = &rq->comp_ring;
250 
251 	/* Rx vmxnet rings structure initialization */
252 	ring0->next2fill = 0;
253 	ring1->next2fill = 0;
254 	ring0->next2comp = 0;
255 	ring1->next2comp = 0;
256 	ring0->gen = VMXNET3_INIT_GEN;
257 	ring1->gen = VMXNET3_INIT_GEN;
258 	comp_ring->next2proc = 0;
259 	comp_ring->gen = VMXNET3_INIT_GEN;
260 
261 	size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
262 	size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
263 	if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
264 		size += rq->data_desc_size * data_ring->size;
265 
266 	memset(ring0->base, 0, size);
267 }
268 
269 void
270 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
271 {
272 	unsigned i;
273 
274 	PMD_INIT_FUNC_TRACE();
275 
276 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
277 		struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
278 
279 		if (txq != NULL) {
280 			txq->stopped = TRUE;
281 			vmxnet3_dev_tx_queue_reset(txq);
282 		}
283 	}
284 
285 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
286 		struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
287 
288 		if (rxq != NULL) {
289 			rxq->stopped = TRUE;
290 			vmxnet3_dev_rx_queue_reset(rxq);
291 		}
292 	}
293 }
294 
295 static int
296 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
297 {
298 	int completed = 0;
299 	struct rte_mbuf *mbuf;
300 
301 	/* Release cmd_ring descriptor and free mbuf */
302 	RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
303 
304 	mbuf = txq->cmd_ring.buf_info[eop_idx].m;
305 	if (mbuf == NULL)
306 		rte_panic("EOP desc does not point to a valid mbuf");
307 	rte_pktmbuf_free(mbuf);
308 
309 	txq->cmd_ring.buf_info[eop_idx].m = NULL;
310 
311 	while (txq->cmd_ring.next2comp != eop_idx) {
312 		/* no out-of-order completion */
313 		RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
314 		vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
315 		completed++;
316 	}
317 
318 	/* Mark the txd for which tcd was generated as completed */
319 	vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
320 
321 	return completed + 1;
322 }
323 
324 static void
325 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
326 {
327 	int completed = 0;
328 	vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
329 	struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
330 		(comp_ring->base + comp_ring->next2proc);
331 
332 	while (tcd->gen == comp_ring->gen) {
333 		completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
334 
335 		vmxnet3_comp_ring_adv_next2proc(comp_ring);
336 		tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
337 						    comp_ring->next2proc);
338 	}
339 
340 	PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
341 
342 	/* To avoid compiler warnings when not in DEBUG mode. */
343 	RTE_SET_USED(completed);
344 }
345 
346 uint16_t
347 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
348 	uint16_t nb_pkts)
349 {
350 	int32_t ret;
351 	uint32_t i;
352 	uint64_t ol_flags;
353 	struct rte_mbuf *m;
354 
355 	for (i = 0; i != nb_pkts; i++) {
356 		m = tx_pkts[i];
357 		ol_flags = m->ol_flags;
358 
359 		/* Non-TSO packet cannot occupy more than
360 		 * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
361 		 */
362 		if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0 &&
363 				m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
364 			rte_errno = EINVAL;
365 			return i;
366 		}
367 		/* TSO packet cannot occupy more than
368 		 * VMXNET3_MAX_TSO_TXD_PER_PKT TX descriptors.
369 		 */
370 		if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0 &&
371 				m->nb_segs > VMXNET3_MAX_TSO_TXD_PER_PKT) {
372 			rte_errno = EINVAL;
373 			return i;
374 		}
375 
376 		/* check that only supported TX offloads are requested. */
377 		if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
378 				(ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
379 				RTE_MBUF_F_TX_SCTP_CKSUM) {
380 			rte_errno = ENOTSUP;
381 			return i;
382 		}
383 
384 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
385 		ret = rte_validate_tx_offload(m);
386 		if (ret != 0) {
387 			rte_errno = -ret;
388 			return i;
389 		}
390 #endif
391 		ret = rte_net_intel_cksum_prepare(m);
392 		if (ret != 0) {
393 			rte_errno = -ret;
394 			return i;
395 		}
396 	}
397 
398 	return i;
399 }
400 
401 uint16_t
402 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
403 		  uint16_t nb_pkts)
404 {
405 	uint16_t nb_tx;
406 	vmxnet3_tx_queue_t *txq = tx_queue;
407 	struct vmxnet3_hw *hw = txq->hw;
408 	Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
409 	uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
410 
411 	if (unlikely(txq->stopped)) {
412 		PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
413 		return 0;
414 	}
415 
416 	/* Free up the comp_descriptors aggressively */
417 	vmxnet3_tq_tx_complete(txq);
418 
419 	nb_tx = 0;
420 	while (nb_tx < nb_pkts) {
421 		Vmxnet3_GenericDesc *gdesc = NULL;
422 		vmxnet3_buf_info_t *tbi = NULL;
423 		uint32_t first2fill, avail, dw2;
424 		struct rte_mbuf *txm = tx_pkts[nb_tx];
425 		struct rte_mbuf *m_seg = txm;
426 		int copy_size = 0;
427 		bool tso = (txm->ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0;
428 		/* # of descriptors needed for a packet. */
429 		unsigned count = txm->nb_segs;
430 
431 		avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
432 		if (count > avail) {
433 			/* Is command ring full? */
434 			if (unlikely(avail == 0)) {
435 				PMD_TX_LOG(DEBUG, "No free ring descriptors");
436 				txq->stats.tx_ring_full++;
437 				txq->stats.drop_total += (nb_pkts - nb_tx);
438 				break;
439 			}
440 
441 			/* Command ring is not full but cannot handle the
442 			 * multi-segmented packet. Let's try the next packet
443 			 * in this case.
444 			 */
445 			PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
446 				   "(avail %d needed %d)", avail, count);
447 			txq->stats.drop_total++;
448 			if (tso)
449 				txq->stats.drop_tso++;
450 			rte_pktmbuf_free(txm);
451 			nb_tx++;
452 			continue;
453 		}
454 
455 		/* Drop non-TSO or TSO packet that is excessively fragmented */
456 		if (unlikely((!tso && count > VMXNET3_MAX_TXD_PER_PKT) ||
457 			     (tso && count > VMXNET3_MAX_TSO_TXD_PER_PKT))) {
458 			PMD_TX_LOG(ERR, "Non-TSO or TSO packet cannot occupy more than "
459 				   "%d or %d tx descriptors respectively. Packet dropped.",
460 				   VMXNET3_MAX_TXD_PER_PKT, VMXNET3_MAX_TSO_TXD_PER_PKT);
461 			txq->stats.drop_too_many_segs++;
462 			txq->stats.drop_total++;
463 			rte_pktmbuf_free(txm);
464 			nb_tx++;
465 			continue;
466 		}
467 
468 		/* Skip empty packets */
469 		if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
470 			txq->stats.drop_total++;
471 			rte_pktmbuf_free(txm);
472 			nb_tx++;
473 			continue;
474 		}
475 
476 		if (txm->nb_segs == 1 &&
477 		    rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
478 			struct Vmxnet3_TxDataDesc *tdd;
479 
480 			tdd = (struct Vmxnet3_TxDataDesc *)
481 				((uint8 *)txq->data_ring.base +
482 				 txq->cmd_ring.next2fill *
483 				 txq->txdata_desc_size);
484 			copy_size = rte_pktmbuf_pkt_len(txm);
485 			rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
486 		}
487 
488 		/* use the previous gen bit for the SOP desc */
489 		dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
490 		first2fill = txq->cmd_ring.next2fill;
491 		do {
492 			/* Skip empty segments */
493 			if (unlikely(m_seg->data_len == 0))
494 				continue;
495 
496 			/* Remember the transmit buffer for cleanup */
497 			tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
498 
499 			/* NB: the following assumes that VMXNET3 maximum
500 			 * transmit buffer size (16K) is greater than
501 			 * maximum size of mbuf segment size.
502 			 */
503 			gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
504 
505 			if (copy_size) {
506 				uint64 offset =
507 					(uint64)txq->cmd_ring.next2fill *
508 							txq->txdata_desc_size;
509 				gdesc->txd.addr =
510 					rte_cpu_to_le_64(txq->data_ring.basePA +
511 							 offset);
512 			} else {
513 				gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
514 			}
515 
516 			gdesc->dword[2] = dw2 | m_seg->data_len;
517 			gdesc->dword[3] = 0;
518 
519 			/* move to the next2fill descriptor */
520 			vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
521 
522 			/* use the right gen for non-SOP desc */
523 			dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
524 		} while ((m_seg = m_seg->next) != NULL);
525 		/* We must have executed the complete preceding loop at least
526 		 * once without skipping an empty segment, as we can't have
527 		 * a packet with only empty segments.
528 		 * Thus, tbi and gdesc have been initialized.
529 		 */
530 
531 		/* set the last buf_info for the pkt */
532 		tbi->m = txm;
533 		/* Update the EOP descriptor */
534 		gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
535 
536 		/* Add VLAN tag if present */
537 		gdesc = txq->cmd_ring.base + first2fill;
538 		if (txm->ol_flags & RTE_MBUF_F_TX_VLAN) {
539 			gdesc->txd.ti = 1;
540 			gdesc->txd.tci = txm->vlan_tci;
541 		}
542 
543 		if (tso) {
544 			uint16_t mss = txm->tso_segsz;
545 
546 			RTE_ASSERT(mss > 0);
547 
548 			gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
549 			gdesc->txd.om = VMXNET3_OM_TSO;
550 			gdesc->txd.msscof = mss;
551 
552 			deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
553 		} else if (txm->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
554 			gdesc->txd.om = VMXNET3_OM_CSUM;
555 			gdesc->txd.hlen = txm->l2_len + txm->l3_len;
556 
557 			switch (txm->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
558 			case RTE_MBUF_F_TX_TCP_CKSUM:
559 				gdesc->txd.msscof = gdesc->txd.hlen +
560 					offsetof(struct rte_tcp_hdr, cksum);
561 				break;
562 			case RTE_MBUF_F_TX_UDP_CKSUM:
563 				gdesc->txd.msscof = gdesc->txd.hlen +
564 					offsetof(struct rte_udp_hdr,
565 						dgram_cksum);
566 				break;
567 			default:
568 				PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
569 					   txm->ol_flags & RTE_MBUF_F_TX_L4_MASK);
570 				abort();
571 			}
572 			deferred++;
573 		} else {
574 			gdesc->txd.hlen = 0;
575 			gdesc->txd.om = VMXNET3_OM_NONE;
576 			gdesc->txd.msscof = 0;
577 			deferred++;
578 		}
579 
580 		/* flip the GEN bit on the SOP */
581 		rte_compiler_barrier();
582 		gdesc->dword[2] ^= VMXNET3_TXD_GEN;
583 
584 		txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
585 		nb_tx++;
586 	}
587 
588 	PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
589 
590 	if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
591 		txq_ctrl->txNumDeferred = 0;
592 		/* Notify vSwitch that packets are available. */
593 		VMXNET3_WRITE_BAR0_REG(hw, (hw->tx_prod_offset + txq->queue_id * VMXNET3_REG_ALIGN),
594 				       txq->cmd_ring.next2fill);
595 	}
596 
597 	return nb_tx;
598 }
599 
600 static inline void
601 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
602 		   struct rte_mbuf *mbuf)
603 {
604 	uint32_t val;
605 	struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
606 	struct Vmxnet3_RxDesc *rxd =
607 		(struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
608 	vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
609 
610 	if (ring_id == 0) {
611 		/* Usually: One HEAD type buf per packet
612 		 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
613 		 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
614 		 */
615 
616 		/* We use single packet buffer so all heads here */
617 		val = VMXNET3_RXD_BTYPE_HEAD;
618 	} else {
619 		/* All BODY type buffers for 2nd ring */
620 		val = VMXNET3_RXD_BTYPE_BODY;
621 	}
622 
623 	/*
624 	 * Load mbuf pointer into buf_info[ring_size]
625 	 * buf_info structure is equivalent to cookie for virtio-virtqueue
626 	 */
627 	buf_info->m = mbuf;
628 	buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
629 	buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
630 
631 	/* Load Rx Descriptor with the buffer's GPA */
632 	rxd->addr = buf_info->bufPA;
633 
634 	/* After this point rxd->addr MUST not be NULL */
635 	rxd->btype = val;
636 	rxd->len = buf_info->len;
637 	/* Flip gen bit at the end to change ownership */
638 	rxd->gen = ring->gen;
639 
640 	vmxnet3_cmd_ring_adv_next2fill(ring);
641 }
642 /*
643  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
644  *  so that device can receive packets in those buffers.
645  *  Ring layout:
646  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
647  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
648  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
649  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
650  *      only for LRO.
651  */
652 static int
653 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
654 {
655 	int err = 0;
656 	uint32_t i = 0;
657 	struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
658 
659 	while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
660 		struct rte_mbuf *mbuf;
661 
662 		/* Allocate blank mbuf for the current Rx Descriptor */
663 		mbuf = rte_mbuf_raw_alloc(rxq->mp);
664 		if (unlikely(mbuf == NULL)) {
665 			PMD_RX_LOG(ERR, "Error allocating mbuf");
666 			rxq->stats.rx_buf_alloc_failure++;
667 			err = ENOMEM;
668 			break;
669 		}
670 
671 		vmxnet3_renew_desc(rxq, ring_id, mbuf);
672 		i++;
673 	}
674 
675 	/* Return error only if no buffers are posted at present */
676 	if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
677 		return -err;
678 	else
679 		return i;
680 }
681 
682 /* MSS not provided by vmxnet3, guess one with available information */
683 static uint16_t
684 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
685 		struct rte_mbuf *rxm)
686 {
687 	uint32_t hlen, slen;
688 	struct rte_ipv4_hdr *ipv4_hdr;
689 	struct rte_ipv6_hdr *ipv6_hdr;
690 	struct rte_tcp_hdr *tcp_hdr;
691 	char *ptr;
692 	uint8_t segs;
693 
694 	RTE_ASSERT(rcd->tcp);
695 
696 	ptr = rte_pktmbuf_mtod(rxm, char *);
697 	slen = rte_pktmbuf_data_len(rxm);
698 	hlen = sizeof(struct rte_ether_hdr);
699 
700 	if (rcd->v4) {
701 		if (unlikely(slen < hlen + sizeof(struct rte_ipv4_hdr)))
702 			return hw->mtu - sizeof(struct rte_ipv4_hdr)
703 					- sizeof(struct rte_tcp_hdr);
704 
705 		ipv4_hdr = (struct rte_ipv4_hdr *)(ptr + hlen);
706 		hlen += rte_ipv4_hdr_len(ipv4_hdr);
707 	} else if (rcd->v6) {
708 		if (unlikely(slen < hlen + sizeof(struct rte_ipv6_hdr)))
709 			return hw->mtu - sizeof(struct rte_ipv6_hdr) -
710 					sizeof(struct rte_tcp_hdr);
711 
712 		ipv6_hdr = (struct rte_ipv6_hdr *)(ptr + hlen);
713 		hlen += sizeof(struct rte_ipv6_hdr);
714 		if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
715 			int frag;
716 
717 			rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
718 					&hlen, &frag);
719 		}
720 	}
721 
722 	if (unlikely(slen < hlen + sizeof(struct rte_tcp_hdr)))
723 		return hw->mtu - hlen - sizeof(struct rte_tcp_hdr) +
724 				sizeof(struct rte_ether_hdr);
725 
726 	tcp_hdr = (struct rte_tcp_hdr *)(ptr + hlen);
727 	hlen += (tcp_hdr->data_off & 0xf0) >> 2;
728 
729 	segs = *vmxnet3_segs_dynfield(rxm);
730 	if (segs > 1)
731 		return (rte_pktmbuf_pkt_len(rxm) - hlen + segs - 1) / segs;
732 	else
733 		return hw->mtu - hlen + sizeof(struct rte_ether_hdr);
734 }
735 
736 /* Receive side checksum and other offloads */
737 static inline void
738 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
739 		struct rte_mbuf *rxm, const uint8_t sop)
740 {
741 	uint64_t ol_flags = rxm->ol_flags;
742 	uint32_t packet_type = rxm->packet_type;
743 
744 	/* Offloads set in sop */
745 	if (sop) {
746 		/* Set packet type */
747 		packet_type |= RTE_PTYPE_L2_ETHER;
748 
749 		/* Check large packet receive */
750 		if (VMXNET3_VERSION_GE_2(hw) &&
751 		    rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
752 			const Vmxnet3_RxCompDescExt *rcde =
753 					(const Vmxnet3_RxCompDescExt *)rcd;
754 
755 			rxm->tso_segsz = rcde->mss;
756 			*vmxnet3_segs_dynfield(rxm) = rcde->segCnt;
757 			ol_flags |= RTE_MBUF_F_RX_LRO;
758 		}
759 	} else { /* Offloads set in eop */
760 		/* Check for RSS */
761 		if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
762 			ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
763 			rxm->hash.rss = rcd->rssHash;
764 		}
765 
766 		/* Check for hardware stripped VLAN tag */
767 		if (rcd->ts) {
768 			ol_flags |= (RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED);
769 			rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
770 		}
771 
772 		/* Check packet type, checksum errors, etc. */
773 		if (rcd->cnc) {
774 			ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN;
775 
776 			if (rcd->v4) {
777 				packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
778 				if (rcd->tcp)
779 					packet_type |= RTE_PTYPE_L4_TCP;
780 				else if (rcd->udp)
781 					packet_type |= RTE_PTYPE_L4_UDP;
782 			} else if (rcd->v6) {
783 				packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
784 				if (rcd->tcp)
785 					packet_type |= RTE_PTYPE_L4_TCP;
786 				else if (rcd->udp)
787 					packet_type |= RTE_PTYPE_L4_UDP;
788 			} else {
789 				packet_type |= RTE_PTYPE_UNKNOWN;
790 			}
791 
792 		} else {
793 			if (rcd->v4) {
794 				packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
795 
796 				if (rcd->ipc)
797 					ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
798 				else
799 					ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
800 
801 				if (rcd->tuc) {
802 					ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
803 					if (rcd->tcp)
804 						packet_type |= RTE_PTYPE_L4_TCP;
805 					else
806 						packet_type |= RTE_PTYPE_L4_UDP;
807 				} else {
808 					if (rcd->tcp) {
809 						packet_type |= RTE_PTYPE_L4_TCP;
810 						ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
811 					} else if (rcd->udp) {
812 						packet_type |= RTE_PTYPE_L4_UDP;
813 						ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
814 					}
815 				}
816 			} else if (rcd->v6) {
817 				packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
818 
819 				if (rcd->tuc) {
820 					ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
821 					if (rcd->tcp)
822 						packet_type |= RTE_PTYPE_L4_TCP;
823 					else
824 						packet_type |= RTE_PTYPE_L4_UDP;
825 				} else {
826 					if (rcd->tcp) {
827 						packet_type |= RTE_PTYPE_L4_TCP;
828 						ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
829 					} else if (rcd->udp) {
830 						packet_type |= RTE_PTYPE_L4_UDP;
831 						ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
832 					}
833 				}
834 			} else {
835 				packet_type |= RTE_PTYPE_UNKNOWN;
836 			}
837 
838 			/* Old variants of vmxnet3 do not provide MSS */
839 			if ((ol_flags & RTE_MBUF_F_RX_LRO) && rxm->tso_segsz == 0)
840 				rxm->tso_segsz = vmxnet3_guess_mss(hw,
841 						rcd, rxm);
842 		}
843 	}
844 
845 	rxm->ol_flags = ol_flags;
846 	rxm->packet_type = packet_type;
847 }
848 
849 /*
850  * Process the Rx Completion Ring of given vmxnet3_rx_queue
851  * for nb_pkts burst and return the number of packets received
852  */
853 uint16_t
854 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
855 {
856 	uint16_t nb_rx;
857 	uint32_t nb_rxd, idx;
858 	uint8_t ring_idx;
859 	vmxnet3_rx_queue_t *rxq;
860 	Vmxnet3_RxCompDesc *rcd;
861 	vmxnet3_buf_info_t *rbi;
862 	Vmxnet3_RxDesc *rxd;
863 	struct rte_mbuf *rxm = NULL;
864 	struct vmxnet3_hw *hw;
865 
866 	nb_rx = 0;
867 	ring_idx = 0;
868 	nb_rxd = 0;
869 	idx = 0;
870 
871 	rxq = rx_queue;
872 	hw = rxq->hw;
873 
874 	rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
875 
876 	if (unlikely(rxq->stopped)) {
877 		PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
878 		return 0;
879 	}
880 
881 	while (rcd->gen == rxq->comp_ring.gen) {
882 		struct rte_mbuf *newm;
883 
884 		if (nb_rx >= nb_pkts)
885 			break;
886 
887 		newm = rte_mbuf_raw_alloc(rxq->mp);
888 		if (unlikely(newm == NULL)) {
889 			PMD_RX_LOG(ERR, "Error allocating mbuf");
890 			rxq->stats.rx_buf_alloc_failure++;
891 			break;
892 		}
893 
894 		idx = rcd->rxdIdx;
895 		ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
896 		rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
897 		RTE_SET_USED(rxd); /* used only for assert when enabled */
898 		rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
899 
900 		PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
901 
902 		RTE_ASSERT(rcd->len <= rxd->len);
903 		RTE_ASSERT(rbi->m);
904 
905 		/* Get the packet buffer pointer from buf_info */
906 		rxm = rbi->m;
907 
908 		/* Clear descriptor associated buf_info to be reused */
909 		rbi->m = NULL;
910 		rbi->bufPA = 0;
911 
912 		/* Update the index that we received a packet */
913 		rxq->cmd_ring[ring_idx].next2comp = idx;
914 
915 		/* For RCD with EOP set, check if there is frame error */
916 		if (unlikely(rcd->eop && rcd->err)) {
917 			rxq->stats.drop_total++;
918 			rxq->stats.drop_err++;
919 
920 			if (!rcd->fcs) {
921 				rxq->stats.drop_fcs++;
922 				PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
923 			}
924 			PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
925 				   (int)(rcd - (struct Vmxnet3_RxCompDesc *)
926 					 rxq->comp_ring.base), rcd->rxdIdx);
927 			rte_pktmbuf_free_seg(rxm);
928 			if (rxq->start_seg) {
929 				struct rte_mbuf *start = rxq->start_seg;
930 
931 				rxq->start_seg = NULL;
932 				rte_pktmbuf_free(start);
933 			}
934 			goto rcd_done;
935 		}
936 
937 		/* Initialize newly received packet buffer */
938 		rxm->port = rxq->port_id;
939 		rxm->nb_segs = 1;
940 		rxm->next = NULL;
941 		rxm->pkt_len = (uint16_t)rcd->len;
942 		rxm->data_len = (uint16_t)rcd->len;
943 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
944 		rxm->ol_flags = 0;
945 		rxm->vlan_tci = 0;
946 		rxm->packet_type = 0;
947 
948 		/*
949 		 * If this is the first buffer of the received packet,
950 		 * set the pointer to the first mbuf of the packet
951 		 * Otherwise, update the total length and the number of segments
952 		 * of the current scattered packet, and update the pointer to
953 		 * the last mbuf of the current packet.
954 		 */
955 		if (rcd->sop) {
956 			RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
957 
958 			if (unlikely(rcd->len == 0)) {
959 				RTE_ASSERT(rcd->eop);
960 
961 				PMD_RX_LOG(DEBUG,
962 					   "Rx buf was skipped. rxring[%d][%d])",
963 					   ring_idx, idx);
964 				rte_pktmbuf_free_seg(rxm);
965 				goto rcd_done;
966 			}
967 
968 			if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
969 				uint8_t *rdd = rxq->data_ring.base +
970 					idx * rxq->data_desc_size;
971 
972 				RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
973 				rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
974 					   rdd, rcd->len);
975 			}
976 
977 			rxq->start_seg = rxm;
978 			rxq->last_seg = rxm;
979 			vmxnet3_rx_offload(hw, rcd, rxm, 1);
980 		} else {
981 			struct rte_mbuf *start = rxq->start_seg;
982 
983 			RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
984 
985 			if (likely(start && rxm->data_len > 0)) {
986 				start->pkt_len += rxm->data_len;
987 				start->nb_segs++;
988 
989 				rxq->last_seg->next = rxm;
990 				rxq->last_seg = rxm;
991 			} else {
992 				PMD_RX_LOG(ERR, "Error received empty or out of order frame.");
993 				rxq->stats.drop_total++;
994 				rxq->stats.drop_err++;
995 
996 				rte_pktmbuf_free_seg(rxm);
997 			}
998 		}
999 
1000 		if (rcd->eop) {
1001 			struct rte_mbuf *start = rxq->start_seg;
1002 
1003 			vmxnet3_rx_offload(hw, rcd, start, 0);
1004 			rx_pkts[nb_rx++] = start;
1005 			rxq->start_seg = NULL;
1006 		}
1007 
1008 rcd_done:
1009 		rxq->cmd_ring[ring_idx].next2comp = idx;
1010 		VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
1011 					  rxq->cmd_ring[ring_idx].size);
1012 
1013 		/* It's time to renew descriptors */
1014 		vmxnet3_renew_desc(rxq, ring_idx, newm);
1015 		if (unlikely(rxq->shared->ctrl.updateRxProd &&
1016 			 (rxq->cmd_ring[ring_idx].next2fill & 0xf) == 0)) {
1017 			VMXNET3_WRITE_BAR0_REG(hw, hw->rx_prod_offset[ring_idx] +
1018 					       (rxq->queue_id * VMXNET3_REG_ALIGN),
1019 					       rxq->cmd_ring[ring_idx].next2fill);
1020 		}
1021 
1022 		/* Advance to the next descriptor in comp_ring */
1023 		vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
1024 
1025 		rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
1026 		nb_rxd++;
1027 		if (nb_rxd > rxq->cmd_ring[0].size) {
1028 			PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
1029 				   " relinquish control.");
1030 			break;
1031 		}
1032 	}
1033 
1034 	if (unlikely(nb_rxd == 0)) {
1035 		uint32_t avail;
1036 		uint32_t posted = 0;
1037 		for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1038 			avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
1039 			if (unlikely(avail > 0)) {
1040 				/* try to alloc new buf and renew descriptors */
1041 				if (vmxnet3_post_rx_bufs(rxq, ring_idx) > 0)
1042 					posted |= (1 << ring_idx);
1043 			}
1044 		}
1045 		if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1046 			for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1047 				if (posted & (1 << ring_idx))
1048 					VMXNET3_WRITE_BAR0_REG(hw, hw->rx_prod_offset[ring_idx] +
1049 							       (rxq->queue_id * VMXNET3_REG_ALIGN),
1050 							       rxq->cmd_ring[ring_idx].next2fill);
1051 			}
1052 		}
1053 	}
1054 
1055 	return nb_rx;
1056 }
1057 
1058 uint32_t
1059 vmxnet3_dev_rx_queue_count(void *rx_queue)
1060 {
1061 	const vmxnet3_rx_queue_t *rxq;
1062 	const Vmxnet3_RxCompDesc *rcd;
1063 	uint32_t idx, nb_rxd = 0;
1064 	uint8_t gen;
1065 
1066 	rxq = rx_queue;
1067 	if (unlikely(rxq->stopped)) {
1068 		PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
1069 		return 0;
1070 	}
1071 
1072 	gen = rxq->comp_ring.gen;
1073 	idx = rxq->comp_ring.next2proc;
1074 	rcd = &rxq->comp_ring.base[idx].rcd;
1075 	while (rcd->gen == gen) {
1076 		if (rcd->eop)
1077 			++nb_rxd;
1078 		if (++idx == rxq->comp_ring.size) {
1079 			idx = 0;
1080 			gen ^= 1;
1081 		}
1082 		rcd = &rxq->comp_ring.base[idx].rcd;
1083 	}
1084 
1085 	return nb_rxd;
1086 }
1087 
1088 int
1089 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1090 			   uint16_t queue_idx,
1091 			   uint16_t nb_desc,
1092 			   unsigned int socket_id,
1093 			   const struct rte_eth_txconf *tx_conf __rte_unused)
1094 {
1095 	struct vmxnet3_hw *hw = dev->data->dev_private;
1096 	const struct rte_memzone *mz;
1097 	struct vmxnet3_tx_queue *txq;
1098 	struct vmxnet3_cmd_ring *ring;
1099 	struct vmxnet3_comp_ring *comp_ring;
1100 	struct vmxnet3_data_ring *data_ring;
1101 	int size;
1102 
1103 	PMD_INIT_FUNC_TRACE();
1104 
1105 	txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1106 			  RTE_CACHE_LINE_SIZE);
1107 	if (txq == NULL) {
1108 		PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1109 		return -ENOMEM;
1110 	}
1111 
1112 	txq->queue_id = queue_idx;
1113 	txq->port_id = dev->data->port_id;
1114 	txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1115 	txq->hw = hw;
1116 	txq->qid = queue_idx;
1117 	txq->stopped = TRUE;
1118 	txq->txdata_desc_size = hw->txdata_desc_size;
1119 
1120 	ring = &txq->cmd_ring;
1121 	comp_ring = &txq->comp_ring;
1122 	data_ring = &txq->data_ring;
1123 
1124 	/* Tx vmxnet ring length should be between 512-4096 */
1125 	if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1126 		PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1127 			     VMXNET3_DEF_TX_RING_SIZE);
1128 		return -EINVAL;
1129 	} else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1130 		PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1131 			     VMXNET3_TX_RING_MAX_SIZE);
1132 		return -EINVAL;
1133 	} else {
1134 		ring->size = nb_desc;
1135 		if (VMXNET3_VERSION_GE_7(hw))
1136 			ring->size = rte_align32prevpow2(nb_desc);
1137 		ring->size &= ~VMXNET3_RING_SIZE_MASK;
1138 	}
1139 	comp_ring->size = data_ring->size = ring->size;
1140 
1141 	/* Tx vmxnet rings structure initialization*/
1142 	ring->next2fill = 0;
1143 	ring->next2comp = 0;
1144 	ring->gen = VMXNET3_INIT_GEN;
1145 	comp_ring->next2proc = 0;
1146 	comp_ring->gen = VMXNET3_INIT_GEN;
1147 
1148 	size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1149 	size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1150 	size += txq->txdata_desc_size * data_ring->size;
1151 
1152 	mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1153 				      VMXNET3_RING_BA_ALIGN, socket_id);
1154 	if (mz == NULL) {
1155 		PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1156 		return -ENOMEM;
1157 	}
1158 	txq->mz = mz;
1159 	memset(mz->addr, 0, mz->len);
1160 
1161 	/* cmd_ring initialization */
1162 	ring->base = mz->addr;
1163 	ring->basePA = mz->iova;
1164 
1165 	/* comp_ring initialization */
1166 	comp_ring->base = ring->base + ring->size;
1167 	comp_ring->basePA = ring->basePA +
1168 		(sizeof(struct Vmxnet3_TxDesc) * ring->size);
1169 
1170 	/* data_ring initialization */
1171 	data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1172 	data_ring->basePA = comp_ring->basePA +
1173 			(sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1174 
1175 	/* cmd_ring0 buf_info allocation */
1176 	ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1177 				     ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1178 	if (ring->buf_info == NULL) {
1179 		PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1180 		return -ENOMEM;
1181 	}
1182 
1183 	/* Update the data portion with txq */
1184 	dev->data->tx_queues[queue_idx] = txq;
1185 
1186 	return 0;
1187 }
1188 
1189 int
1190 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1191 			   uint16_t queue_idx,
1192 			   uint16_t nb_desc,
1193 			   unsigned int socket_id,
1194 			   __rte_unused const struct rte_eth_rxconf *rx_conf,
1195 			   struct rte_mempool *mp)
1196 {
1197 	const struct rte_memzone *mz;
1198 	struct vmxnet3_rx_queue *rxq;
1199 	struct vmxnet3_hw *hw = dev->data->dev_private;
1200 	struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1201 	struct vmxnet3_comp_ring *comp_ring;
1202 	struct vmxnet3_rx_data_ring *data_ring;
1203 	int size;
1204 	uint8_t i;
1205 	char mem_name[32];
1206 
1207 	PMD_INIT_FUNC_TRACE();
1208 
1209 	rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1210 			  RTE_CACHE_LINE_SIZE);
1211 	if (rxq == NULL) {
1212 		PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1213 		return -ENOMEM;
1214 	}
1215 
1216 	rxq->mp = mp;
1217 	/* Remember buffer size for initialization in dev start. */
1218 	hw->rxdata_buf_size =
1219 		rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
1220 	rxq->queue_id = queue_idx;
1221 	rxq->port_id = dev->data->port_id;
1222 	rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1223 	rxq->hw = hw;
1224 	rxq->qid1 = queue_idx;
1225 	rxq->qid2 = queue_idx + hw->num_rx_queues;
1226 	rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1227 	rxq->data_desc_size = hw->rxdata_desc_size;
1228 	rxq->stopped = TRUE;
1229 
1230 	ring0 = &rxq->cmd_ring[0];
1231 	ring1 = &rxq->cmd_ring[1];
1232 	comp_ring = &rxq->comp_ring;
1233 	data_ring = &rxq->data_ring;
1234 
1235 	/* Rx vmxnet rings length should be between 256-4096 */
1236 	if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1237 		PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1238 		return -EINVAL;
1239 	} else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1240 		PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1241 		return -EINVAL;
1242 	} else {
1243 		ring0->size = nb_desc;
1244 		if (VMXNET3_VERSION_GE_7(hw))
1245 			ring0->size = rte_align32prevpow2(nb_desc);
1246 		ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1247 		ring1->size = ring0->size;
1248 	}
1249 
1250 	comp_ring->size = ring0->size + ring1->size;
1251 	data_ring->size = ring0->size;
1252 
1253 	/* Rx vmxnet rings structure initialization */
1254 	ring0->next2fill = 0;
1255 	ring1->next2fill = 0;
1256 	ring0->next2comp = 0;
1257 	ring1->next2comp = 0;
1258 	ring0->gen = VMXNET3_INIT_GEN;
1259 	ring1->gen = VMXNET3_INIT_GEN;
1260 	comp_ring->next2proc = 0;
1261 	comp_ring->gen = VMXNET3_INIT_GEN;
1262 
1263 	size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1264 	size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1265 	if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1266 		size += rxq->data_desc_size * data_ring->size;
1267 
1268 	mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1269 				      VMXNET3_RING_BA_ALIGN, socket_id);
1270 	if (mz == NULL) {
1271 		PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1272 		return -ENOMEM;
1273 	}
1274 	rxq->mz = mz;
1275 	memset(mz->addr, 0, mz->len);
1276 
1277 	/* cmd_ring0 initialization */
1278 	ring0->base = mz->addr;
1279 	ring0->basePA = mz->iova;
1280 
1281 	/* cmd_ring1 initialization */
1282 	ring1->base = ring0->base + ring0->size;
1283 	ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1284 
1285 	/* comp_ring initialization */
1286 	comp_ring->base = ring1->base + ring1->size;
1287 	comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1288 		ring1->size;
1289 
1290 	/* data_ring initialization */
1291 	if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1292 		data_ring->base =
1293 			(uint8_t *)(comp_ring->base + comp_ring->size);
1294 		data_ring->basePA = comp_ring->basePA +
1295 			sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1296 	}
1297 
1298 	/* cmd_ring0-cmd_ring1 buf_info allocation */
1299 	for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1300 
1301 		ring = &rxq->cmd_ring[i];
1302 		ring->rid = i;
1303 		snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1304 
1305 		ring->buf_info = rte_zmalloc(mem_name,
1306 					     ring->size * sizeof(vmxnet3_buf_info_t),
1307 					     RTE_CACHE_LINE_SIZE);
1308 		if (ring->buf_info == NULL) {
1309 			PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1310 			return -ENOMEM;
1311 		}
1312 	}
1313 
1314 	/* Update the data portion with rxq */
1315 	dev->data->rx_queues[queue_idx] = rxq;
1316 
1317 	return 0;
1318 }
1319 
1320 /*
1321  * Initializes Receive Unit
1322  * Load mbufs in rx queue in advance
1323  */
1324 int
1325 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1326 {
1327 	struct vmxnet3_hw *hw = dev->data->dev_private;
1328 
1329 	int i, ret;
1330 	uint8_t j;
1331 
1332 	PMD_INIT_FUNC_TRACE();
1333 
1334 	for (i = 0; i < hw->num_rx_queues; i++) {
1335 		vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1336 
1337 		for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1338 			/* Passing 0 as alloc_num will allocate full ring */
1339 			ret = vmxnet3_post_rx_bufs(rxq, j);
1340 
1341 			/* Zero number of descriptors in the configuration of the RX queue */
1342 			if (ret == 0) {
1343 				PMD_INIT_LOG(ERR,
1344 					"Invalid configuration in Rx queue: %d, buffers ring: %d",
1345 					i, j);
1346 				return -EINVAL;
1347 			}
1348 			/* Return the error number */
1349 			if (ret < 0) {
1350 				PMD_INIT_LOG(ERR, "Posting Rxq: %d buffers ring: %d", i, j);
1351 				return ret;
1352 			}
1353 			/*
1354 			 * Updating device with the index:next2fill to fill the
1355 			 * mbufs for coming packets.
1356 			 */
1357 			if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1358 				VMXNET3_WRITE_BAR0_REG(hw, hw->rx_prod_offset[j] +
1359 						       (rxq->queue_id * VMXNET3_REG_ALIGN),
1360 						       rxq->cmd_ring[j].next2fill);
1361 			}
1362 		}
1363 		rxq->stopped = FALSE;
1364 		rxq->start_seg = NULL;
1365 	}
1366 
1367 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1368 		struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1369 
1370 		txq->stopped = FALSE;
1371 	}
1372 
1373 	return 0;
1374 }
1375 
1376 static uint8_t rss_intel_key[40] = {
1377 	0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1378 	0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1379 	0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1380 	0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1381 	0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1382 };
1383 
1384 /*
1385  * Additional RSS configurations based on vmxnet v4+ APIs
1386  */
1387 int
1388 vmxnet3_v4_rss_configure(struct rte_eth_dev *dev)
1389 {
1390 	struct vmxnet3_hw *hw = dev->data->dev_private;
1391 	Vmxnet3_DriverShared *shared = hw->shared;
1392 	Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
1393 	struct rte_eth_rss_conf *port_rss_conf;
1394 	uint64_t rss_hf;
1395 	uint32_t ret;
1396 
1397 	PMD_INIT_FUNC_TRACE();
1398 
1399 	cmdInfo->setRSSFields = 0;
1400 	port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1401 
1402 	if ((port_rss_conf->rss_hf & VMXNET3_MANDATORY_V4_RSS) !=
1403 	    VMXNET3_MANDATORY_V4_RSS) {
1404 		PMD_INIT_LOG(WARNING, "RSS: IPv4/6 TCP is required for vmxnet3 v4 RSS,"
1405 			     "automatically setting it");
1406 		port_rss_conf->rss_hf |= VMXNET3_MANDATORY_V4_RSS;
1407 	}
1408 
1409 	rss_hf = port_rss_conf->rss_hf &
1410 		(VMXNET3_V4_RSS_MASK | VMXNET3_RSS_OFFLOAD_ALL);
1411 
1412 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1413 		cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP4;
1414 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1415 		cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP6;
1416 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
1417 		cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP4;
1418 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
1419 		cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP6;
1420 
1421 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1422 			       VMXNET3_CMD_SET_RSS_FIELDS);
1423 	ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
1424 
1425 	if (ret != VMXNET3_SUCCESS) {
1426 		PMD_DRV_LOG(ERR, "Set RSS fields (v4) failed: %d", ret);
1427 	}
1428 
1429 	return ret;
1430 }
1431 
1432 /*
1433  * Configure RSS feature
1434  */
1435 int
1436 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1437 {
1438 	struct vmxnet3_hw *hw = dev->data->dev_private;
1439 	struct VMXNET3_RSSConf *dev_rss_conf;
1440 	struct rte_eth_rss_conf *port_rss_conf;
1441 	uint64_t rss_hf;
1442 	uint8_t i, j;
1443 
1444 	PMD_INIT_FUNC_TRACE();
1445 
1446 	dev_rss_conf = hw->rss_conf;
1447 	port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1448 
1449 	/* loading hashFunc */
1450 	dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1451 	/* loading hashKeySize */
1452 	dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1453 	/* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1454 	dev_rss_conf->indTableSize = (uint16_t)((MAX_RX_QUEUES(hw)) * 4);
1455 
1456 	if (port_rss_conf->rss_key == NULL) {
1457 		/* Default hash key */
1458 		port_rss_conf->rss_key = rss_intel_key;
1459 	}
1460 
1461 	/* loading hashKey */
1462 	memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1463 	       dev_rss_conf->hashKeySize);
1464 
1465 	/* loading indTable */
1466 	for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1467 		if (j == dev->data->nb_rx_queues)
1468 			j = 0;
1469 		dev_rss_conf->indTable[i] = j;
1470 	}
1471 
1472 	/* loading hashType */
1473 	dev_rss_conf->hashType = 0;
1474 	rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1475 	if (rss_hf & RTE_ETH_RSS_IPV4)
1476 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1477 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1478 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1479 	if (rss_hf & RTE_ETH_RSS_IPV6)
1480 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1481 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1482 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1483 
1484 	return VMXNET3_SUCCESS;
1485 }
1486