xref: /dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c (revision bd03d3f1e4f1734c70bf6be32cdeb5e3ae6fa611)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4 
5 #include <sys/queue.h>
6 
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15 
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
22 #include <rte_pci.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
33 #include <rte_mbuf.h>
34 #include <rte_ether.h>
35 #include <rte_ethdev_driver.h>
36 #include <rte_prefetch.h>
37 #include <rte_ip.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_net.h>
44 
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
47 
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
50 
51 #define	VMXNET3_TX_OFFLOAD_MASK	( \
52 		PKT_TX_VLAN_PKT | \
53 		PKT_TX_IPV6 |     \
54 		PKT_TX_IPV4 |     \
55 		PKT_TX_L4_MASK |  \
56 		PKT_TX_TCP_SEG)
57 
58 #define	VMXNET3_TX_OFFLOAD_NOTSUP_MASK	\
59 	(PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
60 
61 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
62 
63 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
64 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
65 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
66 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
67 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
68 #endif
69 
70 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
71 static void
72 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
73 {
74 	uint32_t avail = 0;
75 
76 	if (rxq == NULL)
77 		return;
78 
79 	PMD_RX_LOG(DEBUG,
80 		   "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
81 		   rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
82 	PMD_RX_LOG(DEBUG,
83 		   "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
84 		   (unsigned long)rxq->cmd_ring[0].basePA,
85 		   (unsigned long)rxq->cmd_ring[1].basePA,
86 		   (unsigned long)rxq->comp_ring.basePA);
87 
88 	avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
89 	PMD_RX_LOG(DEBUG,
90 		   "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
91 		   (uint32_t)rxq->cmd_ring[0].size, avail,
92 		   rxq->comp_ring.next2proc,
93 		   rxq->cmd_ring[0].size - avail);
94 
95 	avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
96 	PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
97 		   (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
98 		   rxq->cmd_ring[1].size - avail);
99 
100 }
101 
102 static void
103 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
104 {
105 	uint32_t avail = 0;
106 
107 	if (txq == NULL)
108 		return;
109 
110 	PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
111 		   txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
112 	PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
113 		   (unsigned long)txq->cmd_ring.basePA,
114 		   (unsigned long)txq->comp_ring.basePA,
115 		   (unsigned long)txq->data_ring.basePA);
116 
117 	avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
118 	PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
119 		   (uint32_t)txq->cmd_ring.size, avail,
120 		   txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
121 }
122 #endif
123 
124 static void
125 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
126 {
127 	while (ring->next2comp != ring->next2fill) {
128 		/* No need to worry about desc ownership, device is quiesced by now. */
129 		vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
130 
131 		if (buf_info->m) {
132 			rte_pktmbuf_free(buf_info->m);
133 			buf_info->m = NULL;
134 			buf_info->bufPA = 0;
135 			buf_info->len = 0;
136 		}
137 		vmxnet3_cmd_ring_adv_next2comp(ring);
138 	}
139 }
140 
141 static void
142 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
143 {
144 	uint32_t i;
145 
146 	for (i = 0; i < ring->size; i++) {
147 		/* No need to worry about desc ownership, device is quiesced by now. */
148 		vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
149 
150 		if (buf_info->m) {
151 			rte_pktmbuf_free_seg(buf_info->m);
152 			buf_info->m = NULL;
153 			buf_info->bufPA = 0;
154 			buf_info->len = 0;
155 		}
156 		vmxnet3_cmd_ring_adv_next2comp(ring);
157 	}
158 }
159 
160 static void
161 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
162 {
163 	rte_free(ring->buf_info);
164 	ring->buf_info = NULL;
165 }
166 
167 void
168 vmxnet3_dev_tx_queue_release(void *txq)
169 {
170 	vmxnet3_tx_queue_t *tq = txq;
171 
172 	if (tq != NULL) {
173 		/* Release mbufs */
174 		vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
175 		/* Release the cmd_ring */
176 		vmxnet3_cmd_ring_release(&tq->cmd_ring);
177 		/* Release the memzone */
178 		rte_memzone_free(tq->mz);
179 		/* Release the queue */
180 		rte_free(tq);
181 	}
182 }
183 
184 void
185 vmxnet3_dev_rx_queue_release(void *rxq)
186 {
187 	int i;
188 	vmxnet3_rx_queue_t *rq = rxq;
189 
190 	if (rq != NULL) {
191 		/* Release mbufs */
192 		for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
193 			vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
194 
195 		/* Release both the cmd_rings */
196 		for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
197 			vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
198 
199 		/* Release the memzone */
200 		rte_memzone_free(rq->mz);
201 
202 		/* Release the queue */
203 		rte_free(rq);
204 	}
205 }
206 
207 static void
208 vmxnet3_dev_tx_queue_reset(void *txq)
209 {
210 	vmxnet3_tx_queue_t *tq = txq;
211 	struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
212 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
213 	struct vmxnet3_data_ring *data_ring = &tq->data_ring;
214 	int size;
215 
216 	if (tq != NULL) {
217 		/* Release the cmd_ring mbufs */
218 		vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
219 	}
220 
221 	/* Tx vmxnet rings structure initialization*/
222 	ring->next2fill = 0;
223 	ring->next2comp = 0;
224 	ring->gen = VMXNET3_INIT_GEN;
225 	comp_ring->next2proc = 0;
226 	comp_ring->gen = VMXNET3_INIT_GEN;
227 
228 	size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
229 	size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
230 	size += tq->txdata_desc_size * data_ring->size;
231 
232 	memset(ring->base, 0, size);
233 }
234 
235 static void
236 vmxnet3_dev_rx_queue_reset(void *rxq)
237 {
238 	int i;
239 	vmxnet3_rx_queue_t *rq = rxq;
240 	struct vmxnet3_hw *hw = rq->hw;
241 	struct vmxnet3_cmd_ring *ring0, *ring1;
242 	struct vmxnet3_comp_ring *comp_ring;
243 	struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
244 	int size;
245 
246 	/* Release both the cmd_rings mbufs */
247 	for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
248 		vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
249 
250 	ring0 = &rq->cmd_ring[0];
251 	ring1 = &rq->cmd_ring[1];
252 	comp_ring = &rq->comp_ring;
253 
254 	/* Rx vmxnet rings structure initialization */
255 	ring0->next2fill = 0;
256 	ring1->next2fill = 0;
257 	ring0->next2comp = 0;
258 	ring1->next2comp = 0;
259 	ring0->gen = VMXNET3_INIT_GEN;
260 	ring1->gen = VMXNET3_INIT_GEN;
261 	comp_ring->next2proc = 0;
262 	comp_ring->gen = VMXNET3_INIT_GEN;
263 
264 	size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
265 	size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
266 	if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
267 		size += rq->data_desc_size * data_ring->size;
268 
269 	memset(ring0->base, 0, size);
270 }
271 
272 void
273 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
274 {
275 	unsigned i;
276 
277 	PMD_INIT_FUNC_TRACE();
278 
279 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
280 		struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
281 
282 		if (txq != NULL) {
283 			txq->stopped = TRUE;
284 			vmxnet3_dev_tx_queue_reset(txq);
285 		}
286 	}
287 
288 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
289 		struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
290 
291 		if (rxq != NULL) {
292 			rxq->stopped = TRUE;
293 			vmxnet3_dev_rx_queue_reset(rxq);
294 		}
295 	}
296 }
297 
298 static int
299 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
300 {
301 	int completed = 0;
302 	struct rte_mbuf *mbuf;
303 
304 	/* Release cmd_ring descriptor and free mbuf */
305 	RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
306 
307 	mbuf = txq->cmd_ring.buf_info[eop_idx].m;
308 	if (mbuf == NULL)
309 		rte_panic("EOP desc does not point to a valid mbuf");
310 	rte_pktmbuf_free(mbuf);
311 
312 	txq->cmd_ring.buf_info[eop_idx].m = NULL;
313 
314 	while (txq->cmd_ring.next2comp != eop_idx) {
315 		/* no out-of-order completion */
316 		RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
317 		vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
318 		completed++;
319 	}
320 
321 	/* Mark the txd for which tcd was generated as completed */
322 	vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
323 
324 	return completed + 1;
325 }
326 
327 static void
328 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
329 {
330 	int completed = 0;
331 	vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
332 	struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
333 		(comp_ring->base + comp_ring->next2proc);
334 
335 	while (tcd->gen == comp_ring->gen) {
336 		completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
337 
338 		vmxnet3_comp_ring_adv_next2proc(comp_ring);
339 		tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
340 						    comp_ring->next2proc);
341 	}
342 
343 	PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
344 }
345 
346 uint16_t
347 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
348 	uint16_t nb_pkts)
349 {
350 	int32_t ret;
351 	uint32_t i;
352 	uint64_t ol_flags;
353 	struct rte_mbuf *m;
354 
355 	for (i = 0; i != nb_pkts; i++) {
356 		m = tx_pkts[i];
357 		ol_flags = m->ol_flags;
358 
359 		/* Non-TSO packet cannot occupy more than
360 		 * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
361 		 */
362 		if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
363 				m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
364 			rte_errno = -EINVAL;
365 			return i;
366 		}
367 
368 		/* check that only supported TX offloads are requested. */
369 		if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
370 				(ol_flags & PKT_TX_L4_MASK) ==
371 				PKT_TX_SCTP_CKSUM) {
372 			rte_errno = -ENOTSUP;
373 			return i;
374 		}
375 
376 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
377 		ret = rte_validate_tx_offload(m);
378 		if (ret != 0) {
379 			rte_errno = ret;
380 			return i;
381 		}
382 #endif
383 		ret = rte_net_intel_cksum_prepare(m);
384 		if (ret != 0) {
385 			rte_errno = ret;
386 			return i;
387 		}
388 	}
389 
390 	return i;
391 }
392 
393 uint16_t
394 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
395 		  uint16_t nb_pkts)
396 {
397 	uint16_t nb_tx;
398 	vmxnet3_tx_queue_t *txq = tx_queue;
399 	struct vmxnet3_hw *hw = txq->hw;
400 	Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
401 	uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
402 
403 	if (unlikely(txq->stopped)) {
404 		PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
405 		return 0;
406 	}
407 
408 	/* Free up the comp_descriptors aggressively */
409 	vmxnet3_tq_tx_complete(txq);
410 
411 	nb_tx = 0;
412 	while (nb_tx < nb_pkts) {
413 		Vmxnet3_GenericDesc *gdesc;
414 		vmxnet3_buf_info_t *tbi;
415 		uint32_t first2fill, avail, dw2;
416 		struct rte_mbuf *txm = tx_pkts[nb_tx];
417 		struct rte_mbuf *m_seg = txm;
418 		int copy_size = 0;
419 		bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
420 		/* # of descriptors needed for a packet. */
421 		unsigned count = txm->nb_segs;
422 
423 		avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
424 		if (count > avail) {
425 			/* Is command ring full? */
426 			if (unlikely(avail == 0)) {
427 				PMD_TX_LOG(DEBUG, "No free ring descriptors");
428 				txq->stats.tx_ring_full++;
429 				txq->stats.drop_total += (nb_pkts - nb_tx);
430 				break;
431 			}
432 
433 			/* Command ring is not full but cannot handle the
434 			 * multi-segmented packet. Let's try the next packet
435 			 * in this case.
436 			 */
437 			PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
438 				   "(avail %d needed %d)", avail, count);
439 			txq->stats.drop_total++;
440 			if (tso)
441 				txq->stats.drop_tso++;
442 			rte_pktmbuf_free(txm);
443 			nb_tx++;
444 			continue;
445 		}
446 
447 		/* Drop non-TSO packet that is excessively fragmented */
448 		if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
449 			PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
450 				   "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
451 			txq->stats.drop_too_many_segs++;
452 			txq->stats.drop_total++;
453 			rte_pktmbuf_free(txm);
454 			nb_tx++;
455 			continue;
456 		}
457 
458 		if (txm->nb_segs == 1 &&
459 		    rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
460 			struct Vmxnet3_TxDataDesc *tdd;
461 
462 			/* Skip empty packets */
463 			if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
464 				txq->stats.drop_total++;
465 				rte_pktmbuf_free(txm);
466 				nb_tx++;
467 				continue;
468 			}
469 
470 			tdd = (struct Vmxnet3_TxDataDesc *)
471 				((uint8 *)txq->data_ring.base +
472 				 txq->cmd_ring.next2fill *
473 				 txq->txdata_desc_size);
474 			copy_size = rte_pktmbuf_pkt_len(txm);
475 			rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
476 		}
477 
478 		/* use the previous gen bit for the SOP desc */
479 		dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
480 		first2fill = txq->cmd_ring.next2fill;
481 		do {
482 			/* Remember the transmit buffer for cleanup */
483 			tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
484 
485 			/* NB: the following assumes that VMXNET3 maximum
486 			 * transmit buffer size (16K) is greater than
487 			 * maximum size of mbuf segment size.
488 			 */
489 			gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
490 
491 			/* Skip empty segments */
492 			if (unlikely(m_seg->data_len == 0))
493 				continue;
494 
495 			if (copy_size) {
496 				uint64 offset =
497 					(uint64)txq->cmd_ring.next2fill *
498 							txq->txdata_desc_size;
499 				gdesc->txd.addr =
500 					rte_cpu_to_le_64(txq->data_ring.basePA +
501 							 offset);
502 			} else {
503 				gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
504 			}
505 
506 			gdesc->dword[2] = dw2 | m_seg->data_len;
507 			gdesc->dword[3] = 0;
508 
509 			/* move to the next2fill descriptor */
510 			vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
511 
512 			/* use the right gen for non-SOP desc */
513 			dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
514 		} while ((m_seg = m_seg->next) != NULL);
515 
516 		/* set the last buf_info for the pkt */
517 		tbi->m = txm;
518 		/* Update the EOP descriptor */
519 		gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
520 
521 		/* Add VLAN tag if present */
522 		gdesc = txq->cmd_ring.base + first2fill;
523 		if (txm->ol_flags & PKT_TX_VLAN_PKT) {
524 			gdesc->txd.ti = 1;
525 			gdesc->txd.tci = txm->vlan_tci;
526 		}
527 
528 		if (tso) {
529 			uint16_t mss = txm->tso_segsz;
530 
531 			RTE_ASSERT(mss > 0);
532 
533 			gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
534 			gdesc->txd.om = VMXNET3_OM_TSO;
535 			gdesc->txd.msscof = mss;
536 
537 			deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
538 		} else if (txm->ol_flags & PKT_TX_L4_MASK) {
539 			gdesc->txd.om = VMXNET3_OM_CSUM;
540 			gdesc->txd.hlen = txm->l2_len + txm->l3_len;
541 
542 			switch (txm->ol_flags & PKT_TX_L4_MASK) {
543 			case PKT_TX_TCP_CKSUM:
544 				gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
545 				break;
546 			case PKT_TX_UDP_CKSUM:
547 				gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
548 				break;
549 			default:
550 				PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
551 					   txm->ol_flags & PKT_TX_L4_MASK);
552 				abort();
553 			}
554 			deferred++;
555 		} else {
556 			gdesc->txd.hlen = 0;
557 			gdesc->txd.om = VMXNET3_OM_NONE;
558 			gdesc->txd.msscof = 0;
559 			deferred++;
560 		}
561 
562 		/* flip the GEN bit on the SOP */
563 		rte_compiler_barrier();
564 		gdesc->dword[2] ^= VMXNET3_TXD_GEN;
565 
566 		txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
567 		nb_tx++;
568 	}
569 
570 	PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
571 
572 	if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
573 		txq_ctrl->txNumDeferred = 0;
574 		/* Notify vSwitch that packets are available. */
575 		VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
576 				       txq->cmd_ring.next2fill);
577 	}
578 
579 	return nb_tx;
580 }
581 
582 static inline void
583 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
584 		   struct rte_mbuf *mbuf)
585 {
586 	uint32_t val;
587 	struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
588 	struct Vmxnet3_RxDesc *rxd =
589 		(struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
590 	vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
591 
592 	if (ring_id == 0) {
593 		/* Usually: One HEAD type buf per packet
594 		 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
595 		 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
596 		 */
597 
598 		/* We use single packet buffer so all heads here */
599 		val = VMXNET3_RXD_BTYPE_HEAD;
600 	} else {
601 		/* All BODY type buffers for 2nd ring */
602 		val = VMXNET3_RXD_BTYPE_BODY;
603 	}
604 
605 	/*
606 	 * Load mbuf pointer into buf_info[ring_size]
607 	 * buf_info structure is equivalent to cookie for virtio-virtqueue
608 	 */
609 	buf_info->m = mbuf;
610 	buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
611 	buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
612 
613 	/* Load Rx Descriptor with the buffer's GPA */
614 	rxd->addr = buf_info->bufPA;
615 
616 	/* After this point rxd->addr MUST not be NULL */
617 	rxd->btype = val;
618 	rxd->len = buf_info->len;
619 	/* Flip gen bit at the end to change ownership */
620 	rxd->gen = ring->gen;
621 
622 	vmxnet3_cmd_ring_adv_next2fill(ring);
623 }
624 /*
625  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
626  *  so that device can receive packets in those buffers.
627  *  Ring layout:
628  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
629  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
630  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
631  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
632  *      only for LRO.
633  */
634 static int
635 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
636 {
637 	int err = 0;
638 	uint32_t i = 0;
639 	struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
640 
641 	while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
642 		struct rte_mbuf *mbuf;
643 
644 		/* Allocate blank mbuf for the current Rx Descriptor */
645 		mbuf = rte_mbuf_raw_alloc(rxq->mp);
646 		if (unlikely(mbuf == NULL)) {
647 			PMD_RX_LOG(ERR, "Error allocating mbuf");
648 			rxq->stats.rx_buf_alloc_failure++;
649 			err = ENOMEM;
650 			break;
651 		}
652 
653 		vmxnet3_renew_desc(rxq, ring_id, mbuf);
654 		i++;
655 	}
656 
657 	/* Return error only if no buffers are posted at present */
658 	if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
659 		return -err;
660 	else
661 		return i;
662 }
663 
664 /* MSS not provided by vmxnet3, guess one with available information */
665 static uint16_t
666 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
667 		struct rte_mbuf *rxm)
668 {
669 	uint32_t hlen, slen;
670 	struct ipv4_hdr *ipv4_hdr;
671 	struct ipv6_hdr *ipv6_hdr;
672 	struct tcp_hdr *tcp_hdr;
673 	char *ptr;
674 
675 	RTE_ASSERT(rcd->tcp);
676 
677 	ptr = rte_pktmbuf_mtod(rxm, char *);
678 	slen = rte_pktmbuf_data_len(rxm);
679 	hlen = sizeof(struct ether_hdr);
680 
681 	if (rcd->v4) {
682 		if (unlikely(slen < hlen + sizeof(struct ipv4_hdr)))
683 			return hw->mtu - sizeof(struct ipv4_hdr)
684 					- sizeof(struct tcp_hdr);
685 
686 		ipv4_hdr = (struct ipv4_hdr *)(ptr + hlen);
687 		hlen += (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
688 				IPV4_IHL_MULTIPLIER;
689 	} else if (rcd->v6) {
690 		if (unlikely(slen < hlen + sizeof(struct ipv6_hdr)))
691 			return hw->mtu - sizeof(struct ipv6_hdr) -
692 					sizeof(struct tcp_hdr);
693 
694 		ipv6_hdr = (struct ipv6_hdr *)(ptr + hlen);
695 		hlen += sizeof(struct ipv6_hdr);
696 		if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
697 			int frag;
698 
699 			rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
700 					&hlen, &frag);
701 		}
702 	}
703 
704 	if (unlikely(slen < hlen + sizeof(struct tcp_hdr)))
705 		return hw->mtu - hlen - sizeof(struct tcp_hdr) +
706 				sizeof(struct ether_hdr);
707 
708 	tcp_hdr = (struct tcp_hdr *)(ptr + hlen);
709 	hlen += (tcp_hdr->data_off & 0xf0) >> 2;
710 
711 	if (rxm->udata64 > 1)
712 		return (rte_pktmbuf_pkt_len(rxm) - hlen +
713 				rxm->udata64 - 1) / rxm->udata64;
714 	else
715 		return hw->mtu - hlen + sizeof(struct ether_hdr);
716 }
717 
718 /* Receive side checksum and other offloads */
719 static inline void
720 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
721 		struct rte_mbuf *rxm, const uint8_t sop)
722 {
723 	uint64_t ol_flags = rxm->ol_flags;
724 	uint32_t packet_type = rxm->packet_type;
725 
726 	/* Offloads set in sop */
727 	if (sop) {
728 		/* Set packet type */
729 		packet_type |= RTE_PTYPE_L2_ETHER;
730 
731 		/* Check large packet receive */
732 		if (VMXNET3_VERSION_GE_2(hw) &&
733 		    rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
734 			const Vmxnet3_RxCompDescExt *rcde =
735 					(const Vmxnet3_RxCompDescExt *)rcd;
736 
737 			rxm->tso_segsz = rcde->mss;
738 			rxm->udata64 = rcde->segCnt;
739 			ol_flags |= PKT_RX_LRO;
740 		}
741 	} else { /* Offloads set in eop */
742 		/* Check for RSS */
743 		if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
744 			ol_flags |= PKT_RX_RSS_HASH;
745 			rxm->hash.rss = rcd->rssHash;
746 		}
747 
748 		/* Check for hardware stripped VLAN tag */
749 		if (rcd->ts) {
750 			ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
751 			rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
752 		}
753 
754 		/* Check packet type, checksum errors, etc. */
755 		if (rcd->cnc) {
756 			ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
757 		} else {
758 			if (rcd->v4) {
759 				packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
760 
761 				if (rcd->ipc)
762 					ol_flags |= PKT_RX_IP_CKSUM_GOOD;
763 				else
764 					ol_flags |= PKT_RX_IP_CKSUM_BAD;
765 
766 				if (rcd->tuc) {
767 					ol_flags |= PKT_RX_L4_CKSUM_GOOD;
768 					if (rcd->tcp)
769 						packet_type |= RTE_PTYPE_L4_TCP;
770 					else
771 						packet_type |= RTE_PTYPE_L4_UDP;
772 				} else {
773 					if (rcd->tcp) {
774 						packet_type |= RTE_PTYPE_L4_TCP;
775 						ol_flags |= PKT_RX_L4_CKSUM_BAD;
776 					} else if (rcd->udp) {
777 						packet_type |= RTE_PTYPE_L4_UDP;
778 						ol_flags |= PKT_RX_L4_CKSUM_BAD;
779 					}
780 				}
781 			} else if (rcd->v6) {
782 				packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
783 
784 				if (rcd->tuc) {
785 					ol_flags |= PKT_RX_L4_CKSUM_GOOD;
786 					if (rcd->tcp)
787 						packet_type |= RTE_PTYPE_L4_TCP;
788 					else
789 						packet_type |= RTE_PTYPE_L4_UDP;
790 				} else {
791 					if (rcd->tcp) {
792 						packet_type |= RTE_PTYPE_L4_TCP;
793 						ol_flags |= PKT_RX_L4_CKSUM_BAD;
794 					} else if (rcd->udp) {
795 						packet_type |= RTE_PTYPE_L4_UDP;
796 						ol_flags |= PKT_RX_L4_CKSUM_BAD;
797 					}
798 				}
799 			} else {
800 				packet_type |= RTE_PTYPE_UNKNOWN;
801 			}
802 
803 			/* Old variants of vmxnet3 do not provide MSS */
804 			if ((ol_flags & PKT_RX_LRO) && rxm->tso_segsz == 0)
805 				rxm->tso_segsz = vmxnet3_guess_mss(hw,
806 						rcd, rxm);
807 		}
808 	}
809 
810 	rxm->ol_flags = ol_flags;
811 	rxm->packet_type = packet_type;
812 }
813 
814 /*
815  * Process the Rx Completion Ring of given vmxnet3_rx_queue
816  * for nb_pkts burst and return the number of packets received
817  */
818 uint16_t
819 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
820 {
821 	uint16_t nb_rx;
822 	uint32_t nb_rxd, idx;
823 	uint8_t ring_idx;
824 	vmxnet3_rx_queue_t *rxq;
825 	Vmxnet3_RxCompDesc *rcd;
826 	vmxnet3_buf_info_t *rbi;
827 	Vmxnet3_RxDesc *rxd;
828 	struct rte_mbuf *rxm = NULL;
829 	struct vmxnet3_hw *hw;
830 
831 	nb_rx = 0;
832 	ring_idx = 0;
833 	nb_rxd = 0;
834 	idx = 0;
835 
836 	rxq = rx_queue;
837 	hw = rxq->hw;
838 
839 	rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
840 
841 	if (unlikely(rxq->stopped)) {
842 		PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
843 		return 0;
844 	}
845 
846 	while (rcd->gen == rxq->comp_ring.gen) {
847 		struct rte_mbuf *newm;
848 
849 		if (nb_rx >= nb_pkts)
850 			break;
851 
852 		newm = rte_mbuf_raw_alloc(rxq->mp);
853 		if (unlikely(newm == NULL)) {
854 			PMD_RX_LOG(ERR, "Error allocating mbuf");
855 			rxq->stats.rx_buf_alloc_failure++;
856 			break;
857 		}
858 
859 		idx = rcd->rxdIdx;
860 		ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
861 		rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
862 		RTE_SET_USED(rxd); /* used only for assert when enabled */
863 		rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
864 
865 		PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
866 
867 		RTE_ASSERT(rcd->len <= rxd->len);
868 		RTE_ASSERT(rbi->m);
869 
870 		/* Get the packet buffer pointer from buf_info */
871 		rxm = rbi->m;
872 
873 		/* Clear descriptor associated buf_info to be reused */
874 		rbi->m = NULL;
875 		rbi->bufPA = 0;
876 
877 		/* Update the index that we received a packet */
878 		rxq->cmd_ring[ring_idx].next2comp = idx;
879 
880 		/* For RCD with EOP set, check if there is frame error */
881 		if (unlikely(rcd->eop && rcd->err)) {
882 			rxq->stats.drop_total++;
883 			rxq->stats.drop_err++;
884 
885 			if (!rcd->fcs) {
886 				rxq->stats.drop_fcs++;
887 				PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
888 			}
889 			PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
890 				   (int)(rcd - (struct Vmxnet3_RxCompDesc *)
891 					 rxq->comp_ring.base), rcd->rxdIdx);
892 			rte_pktmbuf_free_seg(rxm);
893 			if (rxq->start_seg) {
894 				struct rte_mbuf *start = rxq->start_seg;
895 
896 				rxq->start_seg = NULL;
897 				rte_pktmbuf_free(start);
898 			}
899 			goto rcd_done;
900 		}
901 
902 		/* Initialize newly received packet buffer */
903 		rxm->port = rxq->port_id;
904 		rxm->nb_segs = 1;
905 		rxm->next = NULL;
906 		rxm->pkt_len = (uint16_t)rcd->len;
907 		rxm->data_len = (uint16_t)rcd->len;
908 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
909 		rxm->ol_flags = 0;
910 		rxm->vlan_tci = 0;
911 		rxm->packet_type = 0;
912 
913 		/*
914 		 * If this is the first buffer of the received packet,
915 		 * set the pointer to the first mbuf of the packet
916 		 * Otherwise, update the total length and the number of segments
917 		 * of the current scattered packet, and update the pointer to
918 		 * the last mbuf of the current packet.
919 		 */
920 		if (rcd->sop) {
921 			RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
922 
923 			if (unlikely(rcd->len == 0)) {
924 				RTE_ASSERT(rcd->eop);
925 
926 				PMD_RX_LOG(DEBUG,
927 					   "Rx buf was skipped. rxring[%d][%d])",
928 					   ring_idx, idx);
929 				rte_pktmbuf_free_seg(rxm);
930 				goto rcd_done;
931 			}
932 
933 			if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
934 				uint8_t *rdd = rxq->data_ring.base +
935 					idx * rxq->data_desc_size;
936 
937 				RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
938 				rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
939 					   rdd, rcd->len);
940 			}
941 
942 			rxq->start_seg = rxm;
943 			rxq->last_seg = rxm;
944 			vmxnet3_rx_offload(hw, rcd, rxm, 1);
945 		} else {
946 			struct rte_mbuf *start = rxq->start_seg;
947 
948 			RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
949 
950 			if (rxm->data_len) {
951 				start->pkt_len += rxm->data_len;
952 				start->nb_segs++;
953 
954 				rxq->last_seg->next = rxm;
955 				rxq->last_seg = rxm;
956 			} else {
957 				rte_pktmbuf_free_seg(rxm);
958 			}
959 		}
960 
961 		if (rcd->eop) {
962 			struct rte_mbuf *start = rxq->start_seg;
963 
964 			vmxnet3_rx_offload(hw, rcd, start, 0);
965 			rx_pkts[nb_rx++] = start;
966 			rxq->start_seg = NULL;
967 		}
968 
969 rcd_done:
970 		rxq->cmd_ring[ring_idx].next2comp = idx;
971 		VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
972 					  rxq->cmd_ring[ring_idx].size);
973 
974 		/* It's time to renew descriptors */
975 		vmxnet3_renew_desc(rxq, ring_idx, newm);
976 		if (unlikely(rxq->shared->ctrl.updateRxProd)) {
977 			VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
978 					       rxq->cmd_ring[ring_idx].next2fill);
979 		}
980 
981 		/* Advance to the next descriptor in comp_ring */
982 		vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
983 
984 		rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
985 		nb_rxd++;
986 		if (nb_rxd > rxq->cmd_ring[0].size) {
987 			PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
988 				   " relinquish control.");
989 			break;
990 		}
991 	}
992 
993 	if (unlikely(nb_rxd == 0)) {
994 		uint32_t avail;
995 		for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
996 			avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
997 			if (unlikely(avail > 0)) {
998 				/* try to alloc new buf and renew descriptors */
999 				vmxnet3_post_rx_bufs(rxq, ring_idx);
1000 			}
1001 		}
1002 		if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1003 			for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1004 				VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1005 						       rxq->cmd_ring[ring_idx].next2fill);
1006 			}
1007 		}
1008 	}
1009 
1010 	return nb_rx;
1011 }
1012 
1013 int
1014 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1015 			   uint16_t queue_idx,
1016 			   uint16_t nb_desc,
1017 			   unsigned int socket_id,
1018 			   const struct rte_eth_txconf *tx_conf __rte_unused)
1019 {
1020 	struct vmxnet3_hw *hw = dev->data->dev_private;
1021 	const struct rte_memzone *mz;
1022 	struct vmxnet3_tx_queue *txq;
1023 	struct vmxnet3_cmd_ring *ring;
1024 	struct vmxnet3_comp_ring *comp_ring;
1025 	struct vmxnet3_data_ring *data_ring;
1026 	int size;
1027 
1028 	PMD_INIT_FUNC_TRACE();
1029 
1030 	txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1031 			  RTE_CACHE_LINE_SIZE);
1032 	if (txq == NULL) {
1033 		PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1034 		return -ENOMEM;
1035 	}
1036 
1037 	txq->queue_id = queue_idx;
1038 	txq->port_id = dev->data->port_id;
1039 	txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1040 	txq->hw = hw;
1041 	txq->qid = queue_idx;
1042 	txq->stopped = TRUE;
1043 	txq->txdata_desc_size = hw->txdata_desc_size;
1044 
1045 	ring = &txq->cmd_ring;
1046 	comp_ring = &txq->comp_ring;
1047 	data_ring = &txq->data_ring;
1048 
1049 	/* Tx vmxnet ring length should be between 512-4096 */
1050 	if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1051 		PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1052 			     VMXNET3_DEF_TX_RING_SIZE);
1053 		return -EINVAL;
1054 	} else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1055 		PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1056 			     VMXNET3_TX_RING_MAX_SIZE);
1057 		return -EINVAL;
1058 	} else {
1059 		ring->size = nb_desc;
1060 		ring->size &= ~VMXNET3_RING_SIZE_MASK;
1061 	}
1062 	comp_ring->size = data_ring->size = ring->size;
1063 
1064 	/* Tx vmxnet rings structure initialization*/
1065 	ring->next2fill = 0;
1066 	ring->next2comp = 0;
1067 	ring->gen = VMXNET3_INIT_GEN;
1068 	comp_ring->next2proc = 0;
1069 	comp_ring->gen = VMXNET3_INIT_GEN;
1070 
1071 	size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1072 	size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1073 	size += txq->txdata_desc_size * data_ring->size;
1074 
1075 	mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1076 				      VMXNET3_RING_BA_ALIGN, socket_id);
1077 	if (mz == NULL) {
1078 		PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1079 		return -ENOMEM;
1080 	}
1081 	txq->mz = mz;
1082 	memset(mz->addr, 0, mz->len);
1083 
1084 	/* cmd_ring initialization */
1085 	ring->base = mz->addr;
1086 	ring->basePA = mz->iova;
1087 
1088 	/* comp_ring initialization */
1089 	comp_ring->base = ring->base + ring->size;
1090 	comp_ring->basePA = ring->basePA +
1091 		(sizeof(struct Vmxnet3_TxDesc) * ring->size);
1092 
1093 	/* data_ring initialization */
1094 	data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1095 	data_ring->basePA = comp_ring->basePA +
1096 			(sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1097 
1098 	/* cmd_ring0 buf_info allocation */
1099 	ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1100 				     ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1101 	if (ring->buf_info == NULL) {
1102 		PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1103 		return -ENOMEM;
1104 	}
1105 
1106 	/* Update the data portion with txq */
1107 	dev->data->tx_queues[queue_idx] = txq;
1108 
1109 	return 0;
1110 }
1111 
1112 int
1113 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1114 			   uint16_t queue_idx,
1115 			   uint16_t nb_desc,
1116 			   unsigned int socket_id,
1117 			   __rte_unused const struct rte_eth_rxconf *rx_conf,
1118 			   struct rte_mempool *mp)
1119 {
1120 	const struct rte_memzone *mz;
1121 	struct vmxnet3_rx_queue *rxq;
1122 	struct vmxnet3_hw *hw = dev->data->dev_private;
1123 	struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1124 	struct vmxnet3_comp_ring *comp_ring;
1125 	struct vmxnet3_rx_data_ring *data_ring;
1126 	int size;
1127 	uint8_t i;
1128 	char mem_name[32];
1129 
1130 	PMD_INIT_FUNC_TRACE();
1131 
1132 	rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1133 			  RTE_CACHE_LINE_SIZE);
1134 	if (rxq == NULL) {
1135 		PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1136 		return -ENOMEM;
1137 	}
1138 
1139 	rxq->mp = mp;
1140 	rxq->queue_id = queue_idx;
1141 	rxq->port_id = dev->data->port_id;
1142 	rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1143 	rxq->hw = hw;
1144 	rxq->qid1 = queue_idx;
1145 	rxq->qid2 = queue_idx + hw->num_rx_queues;
1146 	rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1147 	rxq->data_desc_size = hw->rxdata_desc_size;
1148 	rxq->stopped = TRUE;
1149 
1150 	ring0 = &rxq->cmd_ring[0];
1151 	ring1 = &rxq->cmd_ring[1];
1152 	comp_ring = &rxq->comp_ring;
1153 	data_ring = &rxq->data_ring;
1154 
1155 	/* Rx vmxnet rings length should be between 256-4096 */
1156 	if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1157 		PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1158 		return -EINVAL;
1159 	} else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1160 		PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1161 		return -EINVAL;
1162 	} else {
1163 		ring0->size = nb_desc;
1164 		ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1165 		ring1->size = ring0->size;
1166 	}
1167 
1168 	comp_ring->size = ring0->size + ring1->size;
1169 	data_ring->size = ring0->size;
1170 
1171 	/* Rx vmxnet rings structure initialization */
1172 	ring0->next2fill = 0;
1173 	ring1->next2fill = 0;
1174 	ring0->next2comp = 0;
1175 	ring1->next2comp = 0;
1176 	ring0->gen = VMXNET3_INIT_GEN;
1177 	ring1->gen = VMXNET3_INIT_GEN;
1178 	comp_ring->next2proc = 0;
1179 	comp_ring->gen = VMXNET3_INIT_GEN;
1180 
1181 	size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1182 	size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1183 	if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1184 		size += rxq->data_desc_size * data_ring->size;
1185 
1186 	mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1187 				      VMXNET3_RING_BA_ALIGN, socket_id);
1188 	if (mz == NULL) {
1189 		PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1190 		return -ENOMEM;
1191 	}
1192 	rxq->mz = mz;
1193 	memset(mz->addr, 0, mz->len);
1194 
1195 	/* cmd_ring0 initialization */
1196 	ring0->base = mz->addr;
1197 	ring0->basePA = mz->iova;
1198 
1199 	/* cmd_ring1 initialization */
1200 	ring1->base = ring0->base + ring0->size;
1201 	ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1202 
1203 	/* comp_ring initialization */
1204 	comp_ring->base = ring1->base + ring1->size;
1205 	comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1206 		ring1->size;
1207 
1208 	/* data_ring initialization */
1209 	if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1210 		data_ring->base =
1211 			(uint8_t *)(comp_ring->base + comp_ring->size);
1212 		data_ring->basePA = comp_ring->basePA +
1213 			sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1214 	}
1215 
1216 	/* cmd_ring0-cmd_ring1 buf_info allocation */
1217 	for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1218 
1219 		ring = &rxq->cmd_ring[i];
1220 		ring->rid = i;
1221 		snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1222 
1223 		ring->buf_info = rte_zmalloc(mem_name,
1224 					     ring->size * sizeof(vmxnet3_buf_info_t),
1225 					     RTE_CACHE_LINE_SIZE);
1226 		if (ring->buf_info == NULL) {
1227 			PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1228 			return -ENOMEM;
1229 		}
1230 	}
1231 
1232 	/* Update the data portion with rxq */
1233 	dev->data->rx_queues[queue_idx] = rxq;
1234 
1235 	return 0;
1236 }
1237 
1238 /*
1239  * Initializes Receive Unit
1240  * Load mbufs in rx queue in advance
1241  */
1242 int
1243 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1244 {
1245 	struct vmxnet3_hw *hw = dev->data->dev_private;
1246 
1247 	int i, ret;
1248 	uint8_t j;
1249 
1250 	PMD_INIT_FUNC_TRACE();
1251 
1252 	for (i = 0; i < hw->num_rx_queues; i++) {
1253 		vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1254 
1255 		for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1256 			/* Passing 0 as alloc_num will allocate full ring */
1257 			ret = vmxnet3_post_rx_bufs(rxq, j);
1258 			if (ret <= 0) {
1259 				PMD_INIT_LOG(ERR,
1260 					     "ERROR: Posting Rxq: %d buffers ring: %d",
1261 					     i, j);
1262 				return -ret;
1263 			}
1264 			/*
1265 			 * Updating device with the index:next2fill to fill the
1266 			 * mbufs for coming packets.
1267 			 */
1268 			if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1269 				VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1270 						       rxq->cmd_ring[j].next2fill);
1271 			}
1272 		}
1273 		rxq->stopped = FALSE;
1274 		rxq->start_seg = NULL;
1275 	}
1276 
1277 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1278 		struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1279 
1280 		txq->stopped = FALSE;
1281 	}
1282 
1283 	return 0;
1284 }
1285 
1286 static uint8_t rss_intel_key[40] = {
1287 	0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1288 	0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1289 	0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1290 	0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1291 	0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1292 };
1293 
1294 /*
1295  * Configure RSS feature
1296  */
1297 int
1298 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1299 {
1300 	struct vmxnet3_hw *hw = dev->data->dev_private;
1301 	struct VMXNET3_RSSConf *dev_rss_conf;
1302 	struct rte_eth_rss_conf *port_rss_conf;
1303 	uint64_t rss_hf;
1304 	uint8_t i, j;
1305 
1306 	PMD_INIT_FUNC_TRACE();
1307 
1308 	dev_rss_conf = hw->rss_conf;
1309 	port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1310 
1311 	/* loading hashFunc */
1312 	dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1313 	/* loading hashKeySize */
1314 	dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1315 	/* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1316 	dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1317 
1318 	if (port_rss_conf->rss_key == NULL) {
1319 		/* Default hash key */
1320 		port_rss_conf->rss_key = rss_intel_key;
1321 	}
1322 
1323 	/* loading hashKey */
1324 	memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1325 	       dev_rss_conf->hashKeySize);
1326 
1327 	/* loading indTable */
1328 	for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1329 		if (j == dev->data->nb_rx_queues)
1330 			j = 0;
1331 		dev_rss_conf->indTable[i] = j;
1332 	}
1333 
1334 	/* loading hashType */
1335 	dev_rss_conf->hashType = 0;
1336 	rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1337 	if (rss_hf & ETH_RSS_IPV4)
1338 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1339 	if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1340 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1341 	if (rss_hf & ETH_RSS_IPV6)
1342 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1343 	if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1344 		dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1345 
1346 	return VMXNET3_SUCCESS;
1347 }
1348