xref: /dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c (revision 42a8fc7daa46256d150278fc9a7a846e27945a0c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4 
5 #include <sys/queue.h>
6 
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15 
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
22 #include <rte_pci.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
33 #include <rte_mbuf.h>
34 #include <rte_ether.h>
35 #include <ethdev_driver.h>
36 #include <rte_prefetch.h>
37 #include <rte_ip.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_net.h>
44 
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
47 
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
50 
51 #define	VMXNET3_TX_OFFLOAD_MASK	(RTE_MBUF_F_TX_VLAN | \
52 		RTE_MBUF_F_TX_IPV6 |     \
53 		RTE_MBUF_F_TX_IPV4 |     \
54 		RTE_MBUF_F_TX_L4_MASK |  \
55 		RTE_MBUF_F_TX_TCP_SEG)
56 
57 #define	VMXNET3_TX_OFFLOAD_NOTSUP_MASK	\
58 	(RTE_MBUF_F_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
59 
60 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
61 
62 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
63 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
64 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
65 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
66 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
67 #endif
68 
69 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
70 static void
71 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
72 {
73 	uint32_t avail = 0;
74 
75 	if (rxq == NULL)
76 		return;
77 
78 	PMD_RX_LOG(DEBUG,
79 		   "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
80 		   rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
81 	PMD_RX_LOG(DEBUG,
82 		   "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
83 		   (unsigned long)rxq->cmd_ring[0].basePA,
84 		   (unsigned long)rxq->cmd_ring[1].basePA,
85 		   (unsigned long)rxq->comp_ring.basePA);
86 
87 	avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
88 	PMD_RX_LOG(DEBUG,
89 		   "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
90 		   (uint32_t)rxq->cmd_ring[0].size, avail,
91 		   rxq->comp_ring.next2proc,
92 		   rxq->cmd_ring[0].size - avail);
93 
94 	avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
95 	PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
96 		   (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
97 		   rxq->cmd_ring[1].size - avail);
98 
99 }
100 
101 static void
102 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
103 {
104 	uint32_t avail = 0;
105 
106 	if (txq == NULL)
107 		return;
108 
109 	PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
110 		   txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
111 	PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
112 		   (unsigned long)txq->cmd_ring.basePA,
113 		   (unsigned long)txq->comp_ring.basePA,
114 		   (unsigned long)txq->data_ring.basePA);
115 
116 	avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
117 	PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
118 		   (uint32_t)txq->cmd_ring.size, avail,
119 		   txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
120 }
121 #endif
122 
123 static void
124 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
125 {
126 	while (ring->next2comp != ring->next2fill) {
127 		/* No need to worry about desc ownership, device is quiesced by now. */
128 		vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
129 
130 		if (buf_info->m) {
131 			rte_pktmbuf_free(buf_info->m);
132 			buf_info->m = NULL;
133 			buf_info->bufPA = 0;
134 			buf_info->len = 0;
135 		}
136 		vmxnet3_cmd_ring_adv_next2comp(ring);
137 	}
138 }
139 
140 static void
141 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
142 {
143 	uint32_t i;
144 
145 	for (i = 0; i < ring->size; i++) {
146 		/* No need to worry about desc ownership, device is quiesced by now. */
147 		vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
148 
149 		if (buf_info->m) {
150 			rte_pktmbuf_free_seg(buf_info->m);
151 			buf_info->m = NULL;
152 			buf_info->bufPA = 0;
153 			buf_info->len = 0;
154 		}
155 		vmxnet3_cmd_ring_adv_next2comp(ring);
156 	}
157 }
158 
159 static void
160 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
161 {
162 	rte_free(ring->buf_info);
163 	ring->buf_info = NULL;
164 }
165 
166 void
167 vmxnet3_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
168 {
169 	vmxnet3_tx_queue_t *tq = dev->data->tx_queues[qid];
170 
171 	if (tq != NULL) {
172 		/* Release mbufs */
173 		vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
174 		/* Release the cmd_ring */
175 		vmxnet3_cmd_ring_release(&tq->cmd_ring);
176 		/* Release the memzone */
177 		rte_memzone_free(tq->mz);
178 		/* Release the queue */
179 		rte_free(tq);
180 	}
181 }
182 
183 void
184 vmxnet3_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
185 {
186 	int i;
187 	vmxnet3_rx_queue_t *rq = dev->data->rx_queues[qid];
188 
189 	if (rq != NULL) {
190 		/* Release mbufs */
191 		for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
192 			vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
193 
194 		/* Release both the cmd_rings */
195 		for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
196 			vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
197 
198 		/* Release the memzone */
199 		rte_memzone_free(rq->mz);
200 
201 		/* Release the queue */
202 		rte_free(rq);
203 	}
204 }
205 
206 static void
207 vmxnet3_dev_tx_queue_reset(void *txq)
208 {
209 	vmxnet3_tx_queue_t *tq = txq;
210 	struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
211 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
212 	struct vmxnet3_data_ring *data_ring = &tq->data_ring;
213 	int size;
214 
215 	if (tq != NULL) {
216 		/* Release the cmd_ring mbufs */
217 		vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
218 	}
219 
220 	/* Tx vmxnet rings structure initialization*/
221 	ring->next2fill = 0;
222 	ring->next2comp = 0;
223 	ring->gen = VMXNET3_INIT_GEN;
224 	comp_ring->next2proc = 0;
225 	comp_ring->gen = VMXNET3_INIT_GEN;
226 
227 	size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
228 	size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
229 	size += tq->txdata_desc_size * data_ring->size;
230 
231 	memset(ring->base, 0, size);
232 }
233 
234 static void
235 vmxnet3_dev_rx_queue_reset(void *rxq)
236 {
237 	int i;
238 	vmxnet3_rx_queue_t *rq = rxq;
239 	struct vmxnet3_hw *hw = rq->hw;
240 	struct vmxnet3_cmd_ring *ring0, *ring1;
241 	struct vmxnet3_comp_ring *comp_ring;
242 	struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
243 	int size;
244 
245 	/* Release both the cmd_rings mbufs */
246 	for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
247 		vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
248 
249 	ring0 = &rq->cmd_ring[0];
250 	ring1 = &rq->cmd_ring[1];
251 	comp_ring = &rq->comp_ring;
252 
253 	/* Rx vmxnet rings structure initialization */
254 	ring0->next2fill = 0;
255 	ring1->next2fill = 0;
256 	ring0->next2comp = 0;
257 	ring1->next2comp = 0;
258 	ring0->gen = VMXNET3_INIT_GEN;
259 	ring1->gen = VMXNET3_INIT_GEN;
260 	comp_ring->next2proc = 0;
261 	comp_ring->gen = VMXNET3_INIT_GEN;
262 
263 	size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
264 	size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
265 	if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
266 		size += rq->data_desc_size * data_ring->size;
267 
268 	memset(ring0->base, 0, size);
269 }
270 
271 void
272 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
273 {
274 	unsigned i;
275 
276 	PMD_INIT_FUNC_TRACE();
277 
278 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
279 		struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
280 
281 		if (txq != NULL) {
282 			txq->stopped = TRUE;
283 			vmxnet3_dev_tx_queue_reset(txq);
284 		}
285 	}
286 
287 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
288 		struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
289 
290 		if (rxq != NULL) {
291 			rxq->stopped = TRUE;
292 			vmxnet3_dev_rx_queue_reset(rxq);
293 		}
294 	}
295 }
296 
297 static int
298 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
299 {
300 	int completed = 0;
301 	struct rte_mbuf *mbuf;
302 
303 	/* Release cmd_ring descriptor and free mbuf */
304 	RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
305 
306 	mbuf = txq->cmd_ring.buf_info[eop_idx].m;
307 	if (mbuf == NULL)
308 		rte_panic("EOP desc does not point to a valid mbuf");
309 	rte_pktmbuf_free(mbuf);
310 
311 	txq->cmd_ring.buf_info[eop_idx].m = NULL;
312 
313 	while (txq->cmd_ring.next2comp != eop_idx) {
314 		/* no out-of-order completion */
315 		RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
316 		vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
317 		completed++;
318 	}
319 
320 	/* Mark the txd for which tcd was generated as completed */
321 	vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
322 
323 	return completed + 1;
324 }
325 
326 static void
327 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
328 {
329 	int completed = 0;
330 	vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
331 	struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
332 		(comp_ring->base + comp_ring->next2proc);
333 
334 	while (tcd->gen == comp_ring->gen) {
335 		completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
336 
337 		vmxnet3_comp_ring_adv_next2proc(comp_ring);
338 		tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
339 						    comp_ring->next2proc);
340 	}
341 
342 	PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
343 
344 	/* To avoid compiler warnings when not in DEBUG mode. */
345 	RTE_SET_USED(completed);
346 }
347 
348 uint16_t
349 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
350 	uint16_t nb_pkts)
351 {
352 	int32_t ret;
353 	uint32_t i;
354 	uint64_t ol_flags;
355 	struct rte_mbuf *m;
356 
357 	for (i = 0; i != nb_pkts; i++) {
358 		m = tx_pkts[i];
359 		ol_flags = m->ol_flags;
360 
361 		/* Non-TSO packet cannot occupy more than
362 		 * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
363 		 */
364 		if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0 &&
365 				m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
366 			rte_errno = EINVAL;
367 			return i;
368 		}
369 
370 		/* check that only supported TX offloads are requested. */
371 		if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
372 				(ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
373 				RTE_MBUF_F_TX_SCTP_CKSUM) {
374 			rte_errno = ENOTSUP;
375 			return i;
376 		}
377 
378 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
379 		ret = rte_validate_tx_offload(m);
380 		if (ret != 0) {
381 			rte_errno = -ret;
382 			return i;
383 		}
384 #endif
385 		ret = rte_net_intel_cksum_prepare(m);
386 		if (ret != 0) {
387 			rte_errno = -ret;
388 			return i;
389 		}
390 	}
391 
392 	return i;
393 }
394 
395 uint16_t
396 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
397 		  uint16_t nb_pkts)
398 {
399 	uint16_t nb_tx;
400 	vmxnet3_tx_queue_t *txq = tx_queue;
401 	struct vmxnet3_hw *hw = txq->hw;
402 	Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
403 	uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
404 
405 	if (unlikely(txq->stopped)) {
406 		PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
407 		return 0;
408 	}
409 
410 	/* Free up the comp_descriptors aggressively */
411 	vmxnet3_tq_tx_complete(txq);
412 
413 	nb_tx = 0;
414 	while (nb_tx < nb_pkts) {
415 		Vmxnet3_GenericDesc *gdesc;
416 		vmxnet3_buf_info_t *tbi;
417 		uint32_t first2fill, avail, dw2;
418 		struct rte_mbuf *txm = tx_pkts[nb_tx];
419 		struct rte_mbuf *m_seg = txm;
420 		int copy_size = 0;
421 		bool tso = (txm->ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0;
422 		/* # of descriptors needed for a packet. */
423 		unsigned count = txm->nb_segs;
424 
425 		avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
426 		if (count > avail) {
427 			/* Is command ring full? */
428 			if (unlikely(avail == 0)) {
429 				PMD_TX_LOG(DEBUG, "No free ring descriptors");
430 				txq->stats.tx_ring_full++;
431 				txq->stats.drop_total += (nb_pkts - nb_tx);
432 				break;
433 			}
434 
435 			/* Command ring is not full but cannot handle the
436 			 * multi-segmented packet. Let's try the next packet
437 			 * in this case.
438 			 */
439 			PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
440 				   "(avail %d needed %d)", avail, count);
441 			txq->stats.drop_total++;
442 			if (tso)
443 				txq->stats.drop_tso++;
444 			rte_pktmbuf_free(txm);
445 			nb_tx++;
446 			continue;
447 		}
448 
449 		/* Drop non-TSO packet that is excessively fragmented */
450 		if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
451 			PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
452 				   "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
453 			txq->stats.drop_too_many_segs++;
454 			txq->stats.drop_total++;
455 			rte_pktmbuf_free(txm);
456 			nb_tx++;
457 			continue;
458 		}
459 
460 		if (txm->nb_segs == 1 &&
461 		    rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
462 			struct Vmxnet3_TxDataDesc *tdd;
463 
464 			/* Skip empty packets */
465 			if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
466 				txq->stats.drop_total++;
467 				rte_pktmbuf_free(txm);
468 				nb_tx++;
469 				continue;
470 			}
471 
472 			tdd = (struct Vmxnet3_TxDataDesc *)
473 				((uint8 *)txq->data_ring.base +
474 				 txq->cmd_ring.next2fill *
475 				 txq->txdata_desc_size);
476 			copy_size = rte_pktmbuf_pkt_len(txm);
477 			rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
478 		}
479 
480 		/* use the previous gen bit for the SOP desc */
481 		dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
482 		first2fill = txq->cmd_ring.next2fill;
483 		do {
484 			/* Remember the transmit buffer for cleanup */
485 			tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
486 
487 			/* NB: the following assumes that VMXNET3 maximum
488 			 * transmit buffer size (16K) is greater than
489 			 * maximum size of mbuf segment size.
490 			 */
491 			gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
492 
493 			/* Skip empty segments */
494 			if (unlikely(m_seg->data_len == 0))
495 				continue;
496 
497 			if (copy_size) {
498 				uint64 offset =
499 					(uint64)txq->cmd_ring.next2fill *
500 							txq->txdata_desc_size;
501 				gdesc->txd.addr =
502 					rte_cpu_to_le_64(txq->data_ring.basePA +
503 							 offset);
504 			} else {
505 				gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
506 			}
507 
508 			gdesc->dword[2] = dw2 | m_seg->data_len;
509 			gdesc->dword[3] = 0;
510 
511 			/* move to the next2fill descriptor */
512 			vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
513 
514 			/* use the right gen for non-SOP desc */
515 			dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
516 		} while ((m_seg = m_seg->next) != NULL);
517 
518 		/* set the last buf_info for the pkt */
519 		tbi->m = txm;
520 		/* Update the EOP descriptor */
521 		gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
522 
523 		/* Add VLAN tag if present */
524 		gdesc = txq->cmd_ring.base + first2fill;
525 		if (txm->ol_flags & RTE_MBUF_F_TX_VLAN) {
526 			gdesc->txd.ti = 1;
527 			gdesc->txd.tci = txm->vlan_tci;
528 		}
529 
530 		if (tso) {
531 			uint16_t mss = txm->tso_segsz;
532 
533 			RTE_ASSERT(mss > 0);
534 
535 			gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
536 			gdesc->txd.om = VMXNET3_OM_TSO;
537 			gdesc->txd.msscof = mss;
538 
539 			deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
540 		} else if (txm->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
541 			gdesc->txd.om = VMXNET3_OM_CSUM;
542 			gdesc->txd.hlen = txm->l2_len + txm->l3_len;
543 
544 			switch (txm->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
545 			case RTE_MBUF_F_TX_TCP_CKSUM:
546 				gdesc->txd.msscof = gdesc->txd.hlen +
547 					offsetof(struct rte_tcp_hdr, cksum);
548 				break;
549 			case RTE_MBUF_F_TX_UDP_CKSUM:
550 				gdesc->txd.msscof = gdesc->txd.hlen +
551 					offsetof(struct rte_udp_hdr,
552 						dgram_cksum);
553 				break;
554 			default:
555 				PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
556 					   txm->ol_flags & RTE_MBUF_F_TX_L4_MASK);
557 				abort();
558 			}
559 			deferred++;
560 		} else {
561 			gdesc->txd.hlen = 0;
562 			gdesc->txd.om = VMXNET3_OM_NONE;
563 			gdesc->txd.msscof = 0;
564 			deferred++;
565 		}
566 
567 		/* flip the GEN bit on the SOP */
568 		rte_compiler_barrier();
569 		gdesc->dword[2] ^= VMXNET3_TXD_GEN;
570 
571 		txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
572 		nb_tx++;
573 	}
574 
575 	PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
576 
577 	if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
578 		txq_ctrl->txNumDeferred = 0;
579 		/* Notify vSwitch that packets are available. */
580 		VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
581 				       txq->cmd_ring.next2fill);
582 	}
583 
584 	return nb_tx;
585 }
586 
587 static inline void
588 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
589 		   struct rte_mbuf *mbuf)
590 {
591 	uint32_t val;
592 	struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
593 	struct Vmxnet3_RxDesc *rxd =
594 		(struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
595 	vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
596 
597 	if (ring_id == 0) {
598 		/* Usually: One HEAD type buf per packet
599 		 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
600 		 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
601 		 */
602 
603 		/* We use single packet buffer so all heads here */
604 		val = VMXNET3_RXD_BTYPE_HEAD;
605 	} else {
606 		/* All BODY type buffers for 2nd ring */
607 		val = VMXNET3_RXD_BTYPE_BODY;
608 	}
609 
610 	/*
611 	 * Load mbuf pointer into buf_info[ring_size]
612 	 * buf_info structure is equivalent to cookie for virtio-virtqueue
613 	 */
614 	buf_info->m = mbuf;
615 	buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
616 	buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
617 
618 	/* Load Rx Descriptor with the buffer's GPA */
619 	rxd->addr = buf_info->bufPA;
620 
621 	/* After this point rxd->addr MUST not be NULL */
622 	rxd->btype = val;
623 	rxd->len = buf_info->len;
624 	/* Flip gen bit at the end to change ownership */
625 	rxd->gen = ring->gen;
626 
627 	vmxnet3_cmd_ring_adv_next2fill(ring);
628 }
629 /*
630  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
631  *  so that device can receive packets in those buffers.
632  *  Ring layout:
633  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
634  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
635  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
636  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
637  *      only for LRO.
638  */
639 static int
640 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
641 {
642 	int err = 0;
643 	uint32_t i = 0;
644 	struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
645 
646 	while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
647 		struct rte_mbuf *mbuf;
648 
649 		/* Allocate blank mbuf for the current Rx Descriptor */
650 		mbuf = rte_mbuf_raw_alloc(rxq->mp);
651 		if (unlikely(mbuf == NULL)) {
652 			PMD_RX_LOG(ERR, "Error allocating mbuf");
653 			rxq->stats.rx_buf_alloc_failure++;
654 			err = ENOMEM;
655 			break;
656 		}
657 
658 		vmxnet3_renew_desc(rxq, ring_id, mbuf);
659 		i++;
660 	}
661 
662 	/* Return error only if no buffers are posted at present */
663 	if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
664 		return -err;
665 	else
666 		return i;
667 }
668 
669 /* MSS not provided by vmxnet3, guess one with available information */
670 static uint16_t
671 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
672 		struct rte_mbuf *rxm)
673 {
674 	uint32_t hlen, slen;
675 	struct rte_ipv4_hdr *ipv4_hdr;
676 	struct rte_ipv6_hdr *ipv6_hdr;
677 	struct rte_tcp_hdr *tcp_hdr;
678 	char *ptr;
679 	uint8_t segs;
680 
681 	RTE_ASSERT(rcd->tcp);
682 
683 	ptr = rte_pktmbuf_mtod(rxm, char *);
684 	slen = rte_pktmbuf_data_len(rxm);
685 	hlen = sizeof(struct rte_ether_hdr);
686 
687 	if (rcd->v4) {
688 		if (unlikely(slen < hlen + sizeof(struct rte_ipv4_hdr)))
689 			return hw->mtu - sizeof(struct rte_ipv4_hdr)
690 					- sizeof(struct rte_tcp_hdr);
691 
692 		ipv4_hdr = (struct rte_ipv4_hdr *)(ptr + hlen);
693 		hlen += rte_ipv4_hdr_len(ipv4_hdr);
694 	} else if (rcd->v6) {
695 		if (unlikely(slen < hlen + sizeof(struct rte_ipv6_hdr)))
696 			return hw->mtu - sizeof(struct rte_ipv6_hdr) -
697 					sizeof(struct rte_tcp_hdr);
698 
699 		ipv6_hdr = (struct rte_ipv6_hdr *)(ptr + hlen);
700 		hlen += sizeof(struct rte_ipv6_hdr);
701 		if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
702 			int frag;
703 
704 			rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
705 					&hlen, &frag);
706 		}
707 	}
708 
709 	if (unlikely(slen < hlen + sizeof(struct rte_tcp_hdr)))
710 		return hw->mtu - hlen - sizeof(struct rte_tcp_hdr) +
711 				sizeof(struct rte_ether_hdr);
712 
713 	tcp_hdr = (struct rte_tcp_hdr *)(ptr + hlen);
714 	hlen += (tcp_hdr->data_off & 0xf0) >> 2;
715 
716 	segs = *vmxnet3_segs_dynfield(rxm);
717 	if (segs > 1)
718 		return (rte_pktmbuf_pkt_len(rxm) - hlen + segs - 1) / segs;
719 	else
720 		return hw->mtu - hlen + sizeof(struct rte_ether_hdr);
721 }
722 
723 /* Receive side checksum and other offloads */
724 static inline void
725 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
726 		struct rte_mbuf *rxm, const uint8_t sop)
727 {
728 	uint64_t ol_flags = rxm->ol_flags;
729 	uint32_t packet_type = rxm->packet_type;
730 
731 	/* Offloads set in sop */
732 	if (sop) {
733 		/* Set packet type */
734 		packet_type |= RTE_PTYPE_L2_ETHER;
735 
736 		/* Check large packet receive */
737 		if (VMXNET3_VERSION_GE_2(hw) &&
738 		    rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
739 			const Vmxnet3_RxCompDescExt *rcde =
740 					(const Vmxnet3_RxCompDescExt *)rcd;
741 
742 			rxm->tso_segsz = rcde->mss;
743 			*vmxnet3_segs_dynfield(rxm) = rcde->segCnt;
744 			ol_flags |= RTE_MBUF_F_RX_LRO;
745 		}
746 	} else { /* Offloads set in eop */
747 		/* Check for RSS */
748 		if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
749 			ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
750 			rxm->hash.rss = rcd->rssHash;
751 		}
752 
753 		/* Check for hardware stripped VLAN tag */
754 		if (rcd->ts) {
755 			ol_flags |= (RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED);
756 			rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
757 		}
758 
759 		/* Check packet type, checksum errors, etc. */
760 		if (rcd->cnc) {
761 			ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN;
762 
763 			if (rcd->v4) {
764 				packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
765 				if (rcd->tcp)
766 					packet_type |= RTE_PTYPE_L4_TCP;
767 				else if (rcd->udp)
768 					packet_type |= RTE_PTYPE_L4_UDP;
769 			} else if (rcd->v6) {
770 				packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
771 				if (rcd->tcp)
772 					packet_type |= RTE_PTYPE_L4_TCP;
773 				else if (rcd->udp)
774 					packet_type |= RTE_PTYPE_L4_UDP;
775 			} else {
776 				packet_type |= RTE_PTYPE_UNKNOWN;
777 			}
778 
779 		} else {
780 			if (rcd->v4) {
781 				packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
782 
783 				if (rcd->ipc)
784 					ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
785 				else
786 					ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
787 
788 				if (rcd->tuc) {
789 					ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
790 					if (rcd->tcp)
791 						packet_type |= RTE_PTYPE_L4_TCP;
792 					else
793 						packet_type |= RTE_PTYPE_L4_UDP;
794 				} else {
795 					if (rcd->tcp) {
796 						packet_type |= RTE_PTYPE_L4_TCP;
797 						ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
798 					} else if (rcd->udp) {
799 						packet_type |= RTE_PTYPE_L4_UDP;
800 						ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
801 					}
802 				}
803 			} else if (rcd->v6) {
804 				packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
805 
806 				if (rcd->tuc) {
807 					ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
808 					if (rcd->tcp)
809 						packet_type |= RTE_PTYPE_L4_TCP;
810 					else
811 						packet_type |= RTE_PTYPE_L4_UDP;
812 				} else {
813 					if (rcd->tcp) {
814 						packet_type |= RTE_PTYPE_L4_TCP;
815 						ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
816 					} else if (rcd->udp) {
817 						packet_type |= RTE_PTYPE_L4_UDP;
818 						ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
819 					}
820 				}
821 			} else {
822 				packet_type |= RTE_PTYPE_UNKNOWN;
823 			}
824 
825 			/* Old variants of vmxnet3 do not provide MSS */
826 			if ((ol_flags & RTE_MBUF_F_RX_LRO) && rxm->tso_segsz == 0)
827 				rxm->tso_segsz = vmxnet3_guess_mss(hw,
828 						rcd, rxm);
829 		}
830 	}
831 
832 	rxm->ol_flags = ol_flags;
833 	rxm->packet_type = packet_type;
834 }
835 
836 /*
837  * Process the Rx Completion Ring of given vmxnet3_rx_queue
838  * for nb_pkts burst and return the number of packets received
839  */
840 uint16_t
841 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
842 {
843 	uint16_t nb_rx;
844 	uint32_t nb_rxd, idx;
845 	uint8_t ring_idx;
846 	vmxnet3_rx_queue_t *rxq;
847 	Vmxnet3_RxCompDesc *rcd;
848 	vmxnet3_buf_info_t *rbi;
849 	Vmxnet3_RxDesc *rxd;
850 	struct rte_mbuf *rxm = NULL;
851 	struct vmxnet3_hw *hw;
852 
853 	nb_rx = 0;
854 	ring_idx = 0;
855 	nb_rxd = 0;
856 	idx = 0;
857 
858 	rxq = rx_queue;
859 	hw = rxq->hw;
860 
861 	rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
862 
863 	if (unlikely(rxq->stopped)) {
864 		PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
865 		return 0;
866 	}
867 
868 	while (rcd->gen == rxq->comp_ring.gen) {
869 		struct rte_mbuf *newm;
870 
871 		if (nb_rx >= nb_pkts)
872 			break;
873 
874 		newm = rte_mbuf_raw_alloc(rxq->mp);
875 		if (unlikely(newm == NULL)) {
876 			PMD_RX_LOG(ERR, "Error allocating mbuf");
877 			rxq->stats.rx_buf_alloc_failure++;
878 			break;
879 		}
880 
881 		idx = rcd->rxdIdx;
882 		ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
883 		rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
884 		RTE_SET_USED(rxd); /* used only for assert when enabled */
885 		rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
886 
887 		PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
888 
889 		RTE_ASSERT(rcd->len <= rxd->len);
890 		RTE_ASSERT(rbi->m);
891 
892 		/* Get the packet buffer pointer from buf_info */
893 		rxm = rbi->m;
894 
895 		/* Clear descriptor associated buf_info to be reused */
896 		rbi->m = NULL;
897 		rbi->bufPA = 0;
898 
899 		/* Update the index that we received a packet */
900 		rxq->cmd_ring[ring_idx].next2comp = idx;
901 
902 		/* For RCD with EOP set, check if there is frame error */
903 		if (unlikely(rcd->eop && rcd->err)) {
904 			rxq->stats.drop_total++;
905 			rxq->stats.drop_err++;
906 
907 			if (!rcd->fcs) {
908 				rxq->stats.drop_fcs++;
909 				PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
910 			}
911 			PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
912 				   (int)(rcd - (struct Vmxnet3_RxCompDesc *)
913 					 rxq->comp_ring.base), rcd->rxdIdx);
914 			rte_pktmbuf_free_seg(rxm);
915 			if (rxq->start_seg) {
916 				struct rte_mbuf *start = rxq->start_seg;
917 
918 				rxq->start_seg = NULL;
919 				rte_pktmbuf_free(start);
920 			}
921 			goto rcd_done;
922 		}
923 
924 		/* Initialize newly received packet buffer */
925 		rxm->port = rxq->port_id;
926 		rxm->nb_segs = 1;
927 		rxm->next = NULL;
928 		rxm->pkt_len = (uint16_t)rcd->len;
929 		rxm->data_len = (uint16_t)rcd->len;
930 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
931 		rxm->ol_flags = 0;
932 		rxm->vlan_tci = 0;
933 		rxm->packet_type = 0;
934 
935 		/*
936 		 * If this is the first buffer of the received packet,
937 		 * set the pointer to the first mbuf of the packet
938 		 * Otherwise, update the total length and the number of segments
939 		 * of the current scattered packet, and update the pointer to
940 		 * the last mbuf of the current packet.
941 		 */
942 		if (rcd->sop) {
943 			RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
944 
945 			if (unlikely(rcd->len == 0)) {
946 				RTE_ASSERT(rcd->eop);
947 
948 				PMD_RX_LOG(DEBUG,
949 					   "Rx buf was skipped. rxring[%d][%d])",
950 					   ring_idx, idx);
951 				rte_pktmbuf_free_seg(rxm);
952 				goto rcd_done;
953 			}
954 
955 			if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
956 				uint8_t *rdd = rxq->data_ring.base +
957 					idx * rxq->data_desc_size;
958 
959 				RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
960 				rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
961 					   rdd, rcd->len);
962 			}
963 
964 			rxq->start_seg = rxm;
965 			rxq->last_seg = rxm;
966 			vmxnet3_rx_offload(hw, rcd, rxm, 1);
967 		} else {
968 			struct rte_mbuf *start = rxq->start_seg;
969 
970 			RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
971 
972 			if (likely(start && rxm->data_len > 0)) {
973 				start->pkt_len += rxm->data_len;
974 				start->nb_segs++;
975 
976 				rxq->last_seg->next = rxm;
977 				rxq->last_seg = rxm;
978 			} else {
979 				PMD_RX_LOG(ERR, "Error received empty or out of order frame.");
980 				rxq->stats.drop_total++;
981 				rxq->stats.drop_err++;
982 
983 				rte_pktmbuf_free_seg(rxm);
984 			}
985 		}
986 
987 		if (rcd->eop) {
988 			struct rte_mbuf *start = rxq->start_seg;
989 
990 			vmxnet3_rx_offload(hw, rcd, start, 0);
991 			rx_pkts[nb_rx++] = start;
992 			rxq->start_seg = NULL;
993 		}
994 
995 rcd_done:
996 		rxq->cmd_ring[ring_idx].next2comp = idx;
997 		VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
998 					  rxq->cmd_ring[ring_idx].size);
999 
1000 		/* It's time to renew descriptors */
1001 		vmxnet3_renew_desc(rxq, ring_idx, newm);
1002 		if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1003 			VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1004 					       rxq->cmd_ring[ring_idx].next2fill);
1005 		}
1006 
1007 		/* Advance to the next descriptor in comp_ring */
1008 		vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
1009 
1010 		rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
1011 		nb_rxd++;
1012 		if (nb_rxd > rxq->cmd_ring[0].size) {
1013 			PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
1014 				   " relinquish control.");
1015 			break;
1016 		}
1017 	}
1018 
1019 	if (unlikely(nb_rxd == 0)) {
1020 		uint32_t avail;
1021 		for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1022 			avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
1023 			if (unlikely(avail > 0)) {
1024 				/* try to alloc new buf and renew descriptors */
1025 				vmxnet3_post_rx_bufs(rxq, ring_idx);
1026 			}
1027 		}
1028 		if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1029 			for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1030 				VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1031 						       rxq->cmd_ring[ring_idx].next2fill);
1032 			}
1033 		}
1034 	}
1035 
1036 	return nb_rx;
1037 }
1038 
1039 uint32_t
1040 vmxnet3_dev_rx_queue_count(void *rx_queue)
1041 {
1042 	const vmxnet3_rx_queue_t *rxq;
1043 	const Vmxnet3_RxCompDesc *rcd;
1044 	uint32_t idx, nb_rxd = 0;
1045 	uint8_t gen;
1046 
1047 	rxq = rx_queue;
1048 	if (unlikely(rxq->stopped)) {
1049 		PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
1050 		return 0;
1051 	}
1052 
1053 	gen = rxq->comp_ring.gen;
1054 	idx = rxq->comp_ring.next2proc;
1055 	rcd = &rxq->comp_ring.base[idx].rcd;
1056 	while (rcd->gen == gen) {
1057 		if (rcd->eop)
1058 			++nb_rxd;
1059 		if (++idx == rxq->comp_ring.size) {
1060 			idx = 0;
1061 			gen ^= 1;
1062 		}
1063 		rcd = &rxq->comp_ring.base[idx].rcd;
1064 	}
1065 
1066 	return nb_rxd;
1067 }
1068 
1069 int
1070 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1071 			   uint16_t queue_idx,
1072 			   uint16_t nb_desc,
1073 			   unsigned int socket_id,
1074 			   const struct rte_eth_txconf *tx_conf __rte_unused)
1075 {
1076 	struct vmxnet3_hw *hw = dev->data->dev_private;
1077 	const struct rte_memzone *mz;
1078 	struct vmxnet3_tx_queue *txq;
1079 	struct vmxnet3_cmd_ring *ring;
1080 	struct vmxnet3_comp_ring *comp_ring;
1081 	struct vmxnet3_data_ring *data_ring;
1082 	int size;
1083 
1084 	PMD_INIT_FUNC_TRACE();
1085 
1086 	txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1087 			  RTE_CACHE_LINE_SIZE);
1088 	if (txq == NULL) {
1089 		PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1090 		return -ENOMEM;
1091 	}
1092 
1093 	txq->queue_id = queue_idx;
1094 	txq->port_id = dev->data->port_id;
1095 	txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1096 	txq->hw = hw;
1097 	txq->qid = queue_idx;
1098 	txq->stopped = TRUE;
1099 	txq->txdata_desc_size = hw->txdata_desc_size;
1100 
1101 	ring = &txq->cmd_ring;
1102 	comp_ring = &txq->comp_ring;
1103 	data_ring = &txq->data_ring;
1104 
1105 	/* Tx vmxnet ring length should be between 512-4096 */
1106 	if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1107 		PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1108 			     VMXNET3_DEF_TX_RING_SIZE);
1109 		return -EINVAL;
1110 	} else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1111 		PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1112 			     VMXNET3_TX_RING_MAX_SIZE);
1113 		return -EINVAL;
1114 	} else {
1115 		ring->size = nb_desc;
1116 		ring->size &= ~VMXNET3_RING_SIZE_MASK;
1117 	}
1118 	comp_ring->size = data_ring->size = ring->size;
1119 
1120 	/* Tx vmxnet rings structure initialization*/
1121 	ring->next2fill = 0;
1122 	ring->next2comp = 0;
1123 	ring->gen = VMXNET3_INIT_GEN;
1124 	comp_ring->next2proc = 0;
1125 	comp_ring->gen = VMXNET3_INIT_GEN;
1126 
1127 	size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1128 	size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1129 	size += txq->txdata_desc_size * data_ring->size;
1130 
1131 	mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1132 				      VMXNET3_RING_BA_ALIGN, socket_id);
1133 	if (mz == NULL) {
1134 		PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1135 		return -ENOMEM;
1136 	}
1137 	txq->mz = mz;
1138 	memset(mz->addr, 0, mz->len);
1139 
1140 	/* cmd_ring initialization */
1141 	ring->base = mz->addr;
1142 	ring->basePA = mz->iova;
1143 
1144 	/* comp_ring initialization */
1145 	comp_ring->base = ring->base + ring->size;
1146 	comp_ring->basePA = ring->basePA +
1147 		(sizeof(struct Vmxnet3_TxDesc) * ring->size);
1148 
1149 	/* data_ring initialization */
1150 	data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1151 	data_ring->basePA = comp_ring->basePA +
1152 			(sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1153 
1154 	/* cmd_ring0 buf_info allocation */
1155 	ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1156 				     ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1157 	if (ring->buf_info == NULL) {
1158 		PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1159 		return -ENOMEM;
1160 	}
1161 
1162 	/* Update the data portion with txq */
1163 	dev->data->tx_queues[queue_idx] = txq;
1164 
1165 	return 0;
1166 }
1167 
1168 int
1169 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1170 			   uint16_t queue_idx,
1171 			   uint16_t nb_desc,
1172 			   unsigned int socket_id,
1173 			   __rte_unused const struct rte_eth_rxconf *rx_conf,
1174 			   struct rte_mempool *mp)
1175 {
1176 	const struct rte_memzone *mz;
1177 	struct vmxnet3_rx_queue *rxq;
1178 	struct vmxnet3_hw *hw = dev->data->dev_private;
1179 	struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1180 	struct vmxnet3_comp_ring *comp_ring;
1181 	struct vmxnet3_rx_data_ring *data_ring;
1182 	int size;
1183 	uint8_t i;
1184 	char mem_name[32];
1185 
1186 	PMD_INIT_FUNC_TRACE();
1187 
1188 	rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1189 			  RTE_CACHE_LINE_SIZE);
1190 	if (rxq == NULL) {
1191 		PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1192 		return -ENOMEM;
1193 	}
1194 
1195 	rxq->mp = mp;
1196 	rxq->queue_id = queue_idx;
1197 	rxq->port_id = dev->data->port_id;
1198 	rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1199 	rxq->hw = hw;
1200 	rxq->qid1 = queue_idx;
1201 	rxq->qid2 = queue_idx + hw->num_rx_queues;
1202 	rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1203 	rxq->data_desc_size = hw->rxdata_desc_size;
1204 	rxq->stopped = TRUE;
1205 
1206 	ring0 = &rxq->cmd_ring[0];
1207 	ring1 = &rxq->cmd_ring[1];
1208 	comp_ring = &rxq->comp_ring;
1209 	data_ring = &rxq->data_ring;
1210 
1211 	/* Rx vmxnet rings length should be between 256-4096 */
1212 	if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1213 		PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1214 		return -EINVAL;
1215 	} else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1216 		PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1217 		return -EINVAL;
1218 	} else {
1219 		ring0->size = nb_desc;
1220 		ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1221 		ring1->size = ring0->size;
1222 	}
1223 
1224 	comp_ring->size = ring0->size + ring1->size;
1225 	data_ring->size = ring0->size;
1226 
1227 	/* Rx vmxnet rings structure initialization */
1228 	ring0->next2fill = 0;
1229 	ring1->next2fill = 0;
1230 	ring0->next2comp = 0;
1231 	ring1->next2comp = 0;
1232 	ring0->gen = VMXNET3_INIT_GEN;
1233 	ring1->gen = VMXNET3_INIT_GEN;
1234 	comp_ring->next2proc = 0;
1235 	comp_ring->gen = VMXNET3_INIT_GEN;
1236 
1237 	size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1238 	size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1239 	if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1240 		size += rxq->data_desc_size * data_ring->size;
1241 
1242 	mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1243 				      VMXNET3_RING_BA_ALIGN, socket_id);
1244 	if (mz == NULL) {
1245 		PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1246 		return -ENOMEM;
1247 	}
1248 	rxq->mz = mz;
1249 	memset(mz->addr, 0, mz->len);
1250 
1251 	/* cmd_ring0 initialization */
1252 	ring0->base = mz->addr;
1253 	ring0->basePA = mz->iova;
1254 
1255 	/* cmd_ring1 initialization */
1256 	ring1->base = ring0->base + ring0->size;
1257 	ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1258 
1259 	/* comp_ring initialization */
1260 	comp_ring->base = ring1->base + ring1->size;
1261 	comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1262 		ring1->size;
1263 
1264 	/* data_ring initialization */
1265 	if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1266 		data_ring->base =
1267 			(uint8_t *)(comp_ring->base + comp_ring->size);
1268 		data_ring->basePA = comp_ring->basePA +
1269 			sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1270 	}
1271 
1272 	/* cmd_ring0-cmd_ring1 buf_info allocation */
1273 	for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1274 
1275 		ring = &rxq->cmd_ring[i];
1276 		ring->rid = i;
1277 		snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1278 
1279 		ring->buf_info = rte_zmalloc(mem_name,
1280 					     ring->size * sizeof(vmxnet3_buf_info_t),
1281 					     RTE_CACHE_LINE_SIZE);
1282 		if (ring->buf_info == NULL) {
1283 			PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1284 			return -ENOMEM;
1285 		}
1286 	}
1287 
1288 	/* Update the data portion with rxq */
1289 	dev->data->rx_queues[queue_idx] = rxq;
1290 
1291 	return 0;
1292 }
1293 
1294 /*
1295  * Initializes Receive Unit
1296  * Load mbufs in rx queue in advance
1297  */
1298 int
1299 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1300 {
1301 	struct vmxnet3_hw *hw = dev->data->dev_private;
1302 
1303 	int i, ret;
1304 	uint8_t j;
1305 
1306 	PMD_INIT_FUNC_TRACE();
1307 
1308 	for (i = 0; i < hw->num_rx_queues; i++) {
1309 		vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1310 
1311 		for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1312 			/* Passing 0 as alloc_num will allocate full ring */
1313 			ret = vmxnet3_post_rx_bufs(rxq, j);
1314 			if (ret <= 0) {
1315 				PMD_INIT_LOG(ERR,
1316 					     "ERROR: Posting Rxq: %d buffers ring: %d",
1317 					     i, j);
1318 				return -ret;
1319 			}
1320 			/*
1321 			 * Updating device with the index:next2fill to fill the
1322 			 * mbufs for coming packets.
1323 			 */
1324 			if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1325 				VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1326 						       rxq->cmd_ring[j].next2fill);
1327 			}
1328 		}
1329 		rxq->stopped = FALSE;
1330 		rxq->start_seg = NULL;
1331 	}
1332 
1333 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1334 		struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1335 
1336 		txq->stopped = FALSE;
1337 	}
1338 
1339 	return 0;
1340 }
1341 
1342 static uint8_t rss_intel_key[40] = {
1343 	0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1344 	0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1345 	0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1346 	0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1347 	0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1348 };
1349 
1350 /*
1351  * Additional RSS configurations based on vmxnet v4+ APIs
1352  */
1353 int
1354 vmxnet3_v4_rss_configure(struct rte_eth_dev *dev)
1355 {
1356 	struct vmxnet3_hw *hw = dev->data->dev_private;
1357 	Vmxnet3_DriverShared *shared = hw->shared;
1358 	Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
1359 	struct rte_eth_rss_conf *port_rss_conf;
1360 	uint64_t rss_hf;
1361 	uint32_t ret;
1362 
1363 	PMD_INIT_FUNC_TRACE();
1364 
1365 	cmdInfo->setRSSFields = 0;
1366 	port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1367 
1368 	if ((port_rss_conf->rss_hf & VMXNET3_MANDATORY_V4_RSS) !=
1369 	    VMXNET3_MANDATORY_V4_RSS) {
1370 		PMD_INIT_LOG(WARNING, "RSS: IPv4/6 TCP is required for vmxnet3 v4 RSS,"
1371 			     "automatically setting it");
1372 		port_rss_conf->rss_hf |= VMXNET3_MANDATORY_V4_RSS;
1373 	}
1374 
1375 	rss_hf = port_rss_conf->rss_hf &
1376 		(VMXNET3_V4_RSS_MASK | VMXNET3_RSS_OFFLOAD_ALL);
1377 
1378 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1379 		cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP4;
1380 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1381 		cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP6;
1382 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
1383 		cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP4;
1384 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
1385 		cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP6;
1386 
1387 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1388 			       VMXNET3_CMD_SET_RSS_FIELDS);
1389 	ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
1390 
1391 	if (ret != VMXNET3_SUCCESS) {
1392 		PMD_DRV_LOG(ERR, "Set RSS fields (v4) failed: %d", ret);
1393 	}
1394 
1395 	return ret;
1396 }
1397 
1398 /*
1399  * Configure RSS feature
1400  */
1401 int
1402 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1403 {
1404 	struct vmxnet3_hw *hw = dev->data->dev_private;
1405 	struct VMXNET3_RSSConf *dev_rss_conf;
1406 	struct rte_eth_rss_conf *port_rss_conf;
1407 	uint64_t rss_hf;
1408 	uint8_t i, j;
1409 
1410 	PMD_INIT_FUNC_TRACE();
1411 
1412 	dev_rss_conf = hw->rss_conf;
1413 	port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1414 
1415 	/* loading hashFunc */
1416 	dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1417 	/* loading hashKeySize */
1418 	dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1419 	/* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1420 	dev_rss_conf->indTableSize = (uint16_t)((MAX_RX_QUEUES(hw)) * 4);
1421 
1422 	if (port_rss_conf->rss_key == NULL) {
1423 		/* Default hash key */
1424 		port_rss_conf->rss_key = rss_intel_key;
1425 	}
1426 
1427 	/* loading hashKey */
1428 	memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1429 	       dev_rss_conf->hashKeySize);
1430 
1431 	/* loading indTable */
1432 	for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1433 		if (j == dev->data->nb_rx_queues)
1434 			j = 0;
1435 		dev_rss_conf->indTable[i] = j;
1436 	}
1437 
1438 	/* loading hashType */
1439 	dev_rss_conf->hashType = 0;
1440 	rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1441 	if (rss_hf & RTE_ETH_RSS_IPV4)
1442 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1443 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1444 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1445 	if (rss_hf & RTE_ETH_RSS_IPV6)
1446 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1447 	if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1448 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1449 
1450 	return VMXNET3_SUCCESS;
1451 }
1452