xref: /dpdk/drivers/net/qede/qede_rxtx.c (revision d80e42cce4c7017ed8c99dabb8ae444a492acc1c)
1 /*
2  * Copyright (c) 2016 - 2018 Cavium Inc.
3  * All rights reserved.
4  * www.cavium.com
5  *
6  * See LICENSE.qede_pmd for copyright and licensing details.
7  */
8 
9 #include <rte_net.h>
10 #include "qede_rxtx.h"
11 
12 static inline int qede_alloc_rx_buffer(struct qede_rx_queue *rxq)
13 {
14 	struct rte_mbuf *new_mb = NULL;
15 	struct eth_rx_bd *rx_bd;
16 	dma_addr_t mapping;
17 	uint16_t idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq);
18 
19 	new_mb = rte_mbuf_raw_alloc(rxq->mb_pool);
20 	if (unlikely(!new_mb)) {
21 		PMD_RX_LOG(ERR, rxq,
22 			   "Failed to allocate rx buffer "
23 			   "sw_rx_prod %u sw_rx_cons %u mp entries %u free %u",
24 			   idx, rxq->sw_rx_cons & NUM_RX_BDS(rxq),
25 			   rte_mempool_avail_count(rxq->mb_pool),
26 			   rte_mempool_in_use_count(rxq->mb_pool));
27 		return -ENOMEM;
28 	}
29 	rxq->sw_rx_ring[idx].mbuf = new_mb;
30 	rxq->sw_rx_ring[idx].page_offset = 0;
31 	mapping = rte_mbuf_data_iova_default(new_mb);
32 	/* Advance PROD and get BD pointer */
33 	rx_bd = (struct eth_rx_bd *)ecore_chain_produce(&rxq->rx_bd_ring);
34 	rx_bd->addr.hi = rte_cpu_to_le_32(U64_HI(mapping));
35 	rx_bd->addr.lo = rte_cpu_to_le_32(U64_LO(mapping));
36 	rxq->sw_rx_prod++;
37 	return 0;
38 }
39 
40 int
41 qede_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
42 		    uint16_t nb_desc, unsigned int socket_id,
43 		    __rte_unused const struct rte_eth_rxconf *rx_conf,
44 		    struct rte_mempool *mp)
45 {
46 	struct qede_dev *qdev = QEDE_INIT_QDEV(dev);
47 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
48 	struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
49 	struct qede_rx_queue *rxq;
50 	uint16_t max_rx_pkt_len;
51 	uint16_t bufsz;
52 	size_t size;
53 	int rc;
54 
55 	PMD_INIT_FUNC_TRACE(edev);
56 
57 	/* Note: Ring size/align is controlled by struct rte_eth_desc_lim */
58 	if (!rte_is_power_of_2(nb_desc)) {
59 		DP_ERR(edev, "Ring size %u is not power of 2\n",
60 			  nb_desc);
61 		return -EINVAL;
62 	}
63 
64 	/* Free memory prior to re-allocation if needed... */
65 	if (dev->data->rx_queues[queue_idx] != NULL) {
66 		qede_rx_queue_release(dev->data->rx_queues[queue_idx]);
67 		dev->data->rx_queues[queue_idx] = NULL;
68 	}
69 
70 	/* First allocate the rx queue data structure */
71 	rxq = rte_zmalloc_socket("qede_rx_queue", sizeof(struct qede_rx_queue),
72 				 RTE_CACHE_LINE_SIZE, socket_id);
73 
74 	if (!rxq) {
75 		DP_ERR(edev, "Unable to allocate memory for rxq on socket %u",
76 			  socket_id);
77 		return -ENOMEM;
78 	}
79 
80 	rxq->qdev = qdev;
81 	rxq->mb_pool = mp;
82 	rxq->nb_rx_desc = nb_desc;
83 	rxq->queue_id = queue_idx;
84 	rxq->port_id = dev->data->port_id;
85 
86 	max_rx_pkt_len = (uint16_t)rxmode->max_rx_pkt_len;
87 
88 	/* Fix up RX buffer size */
89 	bufsz = (uint16_t)rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
90 	if ((rxmode->offloads & DEV_RX_OFFLOAD_SCATTER)	||
91 	    (max_rx_pkt_len + QEDE_ETH_OVERHEAD) > bufsz) {
92 		if (!dev->data->scattered_rx) {
93 			DP_INFO(edev, "Forcing scatter-gather mode\n");
94 			dev->data->scattered_rx = 1;
95 		}
96 	}
97 
98 	if (dev->data->scattered_rx)
99 		rxq->rx_buf_size = bufsz + ETHER_HDR_LEN +
100 				   ETHER_CRC_LEN + QEDE_ETH_OVERHEAD;
101 	else
102 		rxq->rx_buf_size = max_rx_pkt_len + QEDE_ETH_OVERHEAD;
103 	/* Align to cache-line size if needed */
104 	rxq->rx_buf_size = QEDE_CEIL_TO_CACHE_LINE_SIZE(rxq->rx_buf_size);
105 
106 	DP_INFO(edev, "mtu %u mbufsz %u bd_max_bytes %u scatter_mode %d\n",
107 		qdev->mtu, bufsz, rxq->rx_buf_size, dev->data->scattered_rx);
108 
109 	/* Allocate the parallel driver ring for Rx buffers */
110 	size = sizeof(*rxq->sw_rx_ring) * rxq->nb_rx_desc;
111 	rxq->sw_rx_ring = rte_zmalloc_socket("sw_rx_ring", size,
112 					     RTE_CACHE_LINE_SIZE, socket_id);
113 	if (!rxq->sw_rx_ring) {
114 		DP_ERR(edev, "Memory allocation fails for sw_rx_ring on"
115 		       " socket %u\n", socket_id);
116 		rte_free(rxq);
117 		return -ENOMEM;
118 	}
119 
120 	/* Allocate FW Rx ring  */
121 	rc = qdev->ops->common->chain_alloc(edev,
122 					    ECORE_CHAIN_USE_TO_CONSUME_PRODUCE,
123 					    ECORE_CHAIN_MODE_NEXT_PTR,
124 					    ECORE_CHAIN_CNT_TYPE_U16,
125 					    rxq->nb_rx_desc,
126 					    sizeof(struct eth_rx_bd),
127 					    &rxq->rx_bd_ring,
128 					    NULL);
129 
130 	if (rc != ECORE_SUCCESS) {
131 		DP_ERR(edev, "Memory allocation fails for RX BD ring"
132 		       " on socket %u\n", socket_id);
133 		rte_free(rxq->sw_rx_ring);
134 		rte_free(rxq);
135 		return -ENOMEM;
136 	}
137 
138 	/* Allocate FW completion ring */
139 	rc = qdev->ops->common->chain_alloc(edev,
140 					    ECORE_CHAIN_USE_TO_CONSUME,
141 					    ECORE_CHAIN_MODE_PBL,
142 					    ECORE_CHAIN_CNT_TYPE_U16,
143 					    rxq->nb_rx_desc,
144 					    sizeof(union eth_rx_cqe),
145 					    &rxq->rx_comp_ring,
146 					    NULL);
147 
148 	if (rc != ECORE_SUCCESS) {
149 		DP_ERR(edev, "Memory allocation fails for RX CQE ring"
150 		       " on socket %u\n", socket_id);
151 		qdev->ops->common->chain_free(edev, &rxq->rx_bd_ring);
152 		rte_free(rxq->sw_rx_ring);
153 		rte_free(rxq);
154 		return -ENOMEM;
155 	}
156 
157 	dev->data->rx_queues[queue_idx] = rxq;
158 	qdev->fp_array[queue_idx].rxq = rxq;
159 
160 	DP_INFO(edev, "rxq %d num_desc %u rx_buf_size=%u socket %u\n",
161 		  queue_idx, nb_desc, rxq->rx_buf_size, socket_id);
162 
163 	return 0;
164 }
165 
166 static void
167 qede_rx_queue_reset(__rte_unused struct qede_dev *qdev,
168 		    struct qede_rx_queue *rxq)
169 {
170 	DP_INFO(&qdev->edev, "Reset RX queue %u\n", rxq->queue_id);
171 	ecore_chain_reset(&rxq->rx_bd_ring);
172 	ecore_chain_reset(&rxq->rx_comp_ring);
173 	rxq->sw_rx_prod = 0;
174 	rxq->sw_rx_cons = 0;
175 	*rxq->hw_cons_ptr = 0;
176 }
177 
178 static void qede_rx_queue_release_mbufs(struct qede_rx_queue *rxq)
179 {
180 	uint16_t i;
181 
182 	if (rxq->sw_rx_ring) {
183 		for (i = 0; i < rxq->nb_rx_desc; i++) {
184 			if (rxq->sw_rx_ring[i].mbuf) {
185 				rte_pktmbuf_free(rxq->sw_rx_ring[i].mbuf);
186 				rxq->sw_rx_ring[i].mbuf = NULL;
187 			}
188 		}
189 	}
190 }
191 
192 void qede_rx_queue_release(void *rx_queue)
193 {
194 	struct qede_rx_queue *rxq = rx_queue;
195 
196 	if (rxq) {
197 		qede_rx_queue_release_mbufs(rxq);
198 		rte_free(rxq->sw_rx_ring);
199 		rte_free(rxq);
200 	}
201 }
202 
203 /* Stops a given RX queue in the HW */
204 static int qede_rx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
205 {
206 	struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
207 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
208 	struct ecore_hwfn *p_hwfn;
209 	struct qede_rx_queue *rxq;
210 	int hwfn_index;
211 	int rc;
212 
213 	if (rx_queue_id < eth_dev->data->nb_rx_queues) {
214 		rxq = eth_dev->data->rx_queues[rx_queue_id];
215 		hwfn_index = rx_queue_id % edev->num_hwfns;
216 		p_hwfn = &edev->hwfns[hwfn_index];
217 		rc = ecore_eth_rx_queue_stop(p_hwfn, rxq->handle,
218 				true, false);
219 		if (rc != ECORE_SUCCESS) {
220 			DP_ERR(edev, "RX queue %u stop fails\n", rx_queue_id);
221 			return -1;
222 		}
223 		qede_rx_queue_release_mbufs(rxq);
224 		qede_rx_queue_reset(qdev, rxq);
225 		eth_dev->data->rx_queue_state[rx_queue_id] =
226 			RTE_ETH_QUEUE_STATE_STOPPED;
227 		DP_INFO(edev, "RX queue %u stopped\n", rx_queue_id);
228 	} else {
229 		DP_ERR(edev, "RX queue %u is not in range\n", rx_queue_id);
230 		rc = -EINVAL;
231 	}
232 
233 	return rc;
234 }
235 
236 int
237 qede_tx_queue_setup(struct rte_eth_dev *dev,
238 		    uint16_t queue_idx,
239 		    uint16_t nb_desc,
240 		    unsigned int socket_id,
241 		    const struct rte_eth_txconf *tx_conf)
242 {
243 	struct qede_dev *qdev = dev->data->dev_private;
244 	struct ecore_dev *edev = &qdev->edev;
245 	struct qede_tx_queue *txq;
246 	int rc;
247 
248 	PMD_INIT_FUNC_TRACE(edev);
249 
250 	if (!rte_is_power_of_2(nb_desc)) {
251 		DP_ERR(edev, "Ring size %u is not power of 2\n",
252 		       nb_desc);
253 		return -EINVAL;
254 	}
255 
256 	/* Free memory prior to re-allocation if needed... */
257 	if (dev->data->tx_queues[queue_idx] != NULL) {
258 		qede_tx_queue_release(dev->data->tx_queues[queue_idx]);
259 		dev->data->tx_queues[queue_idx] = NULL;
260 	}
261 
262 	txq = rte_zmalloc_socket("qede_tx_queue", sizeof(struct qede_tx_queue),
263 				 RTE_CACHE_LINE_SIZE, socket_id);
264 
265 	if (txq == NULL) {
266 		DP_ERR(edev,
267 		       "Unable to allocate memory for txq on socket %u",
268 		       socket_id);
269 		return -ENOMEM;
270 	}
271 
272 	txq->nb_tx_desc = nb_desc;
273 	txq->qdev = qdev;
274 	txq->port_id = dev->data->port_id;
275 
276 	rc = qdev->ops->common->chain_alloc(edev,
277 					    ECORE_CHAIN_USE_TO_CONSUME_PRODUCE,
278 					    ECORE_CHAIN_MODE_PBL,
279 					    ECORE_CHAIN_CNT_TYPE_U16,
280 					    txq->nb_tx_desc,
281 					    sizeof(union eth_tx_bd_types),
282 					    &txq->tx_pbl,
283 					    NULL);
284 	if (rc != ECORE_SUCCESS) {
285 		DP_ERR(edev,
286 		       "Unable to allocate memory for txbd ring on socket %u",
287 		       socket_id);
288 		qede_tx_queue_release(txq);
289 		return -ENOMEM;
290 	}
291 
292 	/* Allocate software ring */
293 	txq->sw_tx_ring = rte_zmalloc_socket("txq->sw_tx_ring",
294 					     (sizeof(struct qede_tx_entry) *
295 					      txq->nb_tx_desc),
296 					     RTE_CACHE_LINE_SIZE, socket_id);
297 
298 	if (!txq->sw_tx_ring) {
299 		DP_ERR(edev,
300 		       "Unable to allocate memory for txbd ring on socket %u",
301 		       socket_id);
302 		qdev->ops->common->chain_free(edev, &txq->tx_pbl);
303 		qede_tx_queue_release(txq);
304 		return -ENOMEM;
305 	}
306 
307 	txq->queue_id = queue_idx;
308 
309 	txq->nb_tx_avail = txq->nb_tx_desc;
310 
311 	txq->tx_free_thresh =
312 	    tx_conf->tx_free_thresh ? tx_conf->tx_free_thresh :
313 	    (txq->nb_tx_desc - QEDE_DEFAULT_TX_FREE_THRESH);
314 
315 	dev->data->tx_queues[queue_idx] = txq;
316 	qdev->fp_array[queue_idx].txq = txq;
317 
318 	DP_INFO(edev,
319 		  "txq %u num_desc %u tx_free_thresh %u socket %u\n",
320 		  queue_idx, nb_desc, txq->tx_free_thresh, socket_id);
321 
322 	return 0;
323 }
324 
325 static void
326 qede_tx_queue_reset(__rte_unused struct qede_dev *qdev,
327 		    struct qede_tx_queue *txq)
328 {
329 	DP_INFO(&qdev->edev, "Reset TX queue %u\n", txq->queue_id);
330 	ecore_chain_reset(&txq->tx_pbl);
331 	txq->sw_tx_cons = 0;
332 	txq->sw_tx_prod = 0;
333 	*txq->hw_cons_ptr = 0;
334 }
335 
336 static void qede_tx_queue_release_mbufs(struct qede_tx_queue *txq)
337 {
338 	uint16_t i;
339 
340 	if (txq->sw_tx_ring) {
341 		for (i = 0; i < txq->nb_tx_desc; i++) {
342 			if (txq->sw_tx_ring[i].mbuf) {
343 				rte_pktmbuf_free(txq->sw_tx_ring[i].mbuf);
344 				txq->sw_tx_ring[i].mbuf = NULL;
345 			}
346 		}
347 	}
348 }
349 
350 void qede_tx_queue_release(void *tx_queue)
351 {
352 	struct qede_tx_queue *txq = tx_queue;
353 
354 	if (txq) {
355 		qede_tx_queue_release_mbufs(txq);
356 		rte_free(txq->sw_tx_ring);
357 		rte_free(txq);
358 	}
359 }
360 
361 /* This function allocates fast-path status block memory */
362 static int
363 qede_alloc_mem_sb(struct qede_dev *qdev, struct ecore_sb_info *sb_info,
364 		  uint16_t sb_id)
365 {
366 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
367 	struct status_block_e4 *sb_virt;
368 	dma_addr_t sb_phys;
369 	int rc;
370 
371 	sb_virt = OSAL_DMA_ALLOC_COHERENT(edev, &sb_phys,
372 					  sizeof(struct status_block_e4));
373 	if (!sb_virt) {
374 		DP_ERR(edev, "Status block allocation failed\n");
375 		return -ENOMEM;
376 	}
377 	rc = qdev->ops->common->sb_init(edev, sb_info, sb_virt,
378 					sb_phys, sb_id);
379 	if (rc) {
380 		DP_ERR(edev, "Status block initialization failed\n");
381 		OSAL_DMA_FREE_COHERENT(edev, sb_virt, sb_phys,
382 				       sizeof(struct status_block_e4));
383 		return rc;
384 	}
385 
386 	return 0;
387 }
388 
389 int qede_alloc_fp_resc(struct qede_dev *qdev)
390 {
391 	struct ecore_dev *edev = &qdev->edev;
392 	struct qede_fastpath *fp;
393 	uint32_t num_sbs;
394 	uint16_t sb_idx;
395 
396 	if (IS_VF(edev))
397 		ecore_vf_get_num_sbs(ECORE_LEADING_HWFN(edev), &num_sbs);
398 	else
399 		num_sbs = ecore_cxt_get_proto_cid_count
400 			  (ECORE_LEADING_HWFN(edev), PROTOCOLID_ETH, NULL);
401 
402 	if (num_sbs == 0) {
403 		DP_ERR(edev, "No status blocks available\n");
404 		return -EINVAL;
405 	}
406 
407 	qdev->fp_array = rte_calloc("fp", QEDE_RXTX_MAX(qdev),
408 				sizeof(*qdev->fp_array), RTE_CACHE_LINE_SIZE);
409 
410 	if (!qdev->fp_array) {
411 		DP_ERR(edev, "fp array allocation failed\n");
412 		return -ENOMEM;
413 	}
414 
415 	memset((void *)qdev->fp_array, 0, QEDE_RXTX_MAX(qdev) *
416 			sizeof(*qdev->fp_array));
417 
418 	for (sb_idx = 0; sb_idx < QEDE_RXTX_MAX(qdev); sb_idx++) {
419 		fp = &qdev->fp_array[sb_idx];
420 		if (!fp)
421 			continue;
422 		fp->sb_info = rte_calloc("sb", 1, sizeof(struct ecore_sb_info),
423 				RTE_CACHE_LINE_SIZE);
424 		if (!fp->sb_info) {
425 			DP_ERR(edev, "FP sb_info allocation fails\n");
426 			return -1;
427 		}
428 		if (qede_alloc_mem_sb(qdev, fp->sb_info, sb_idx)) {
429 			DP_ERR(edev, "FP status block allocation fails\n");
430 			return -1;
431 		}
432 		DP_INFO(edev, "sb_info idx 0x%x initialized\n",
433 				fp->sb_info->igu_sb_id);
434 	}
435 
436 	return 0;
437 }
438 
439 void qede_dealloc_fp_resc(struct rte_eth_dev *eth_dev)
440 {
441 	struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
442 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
443 	struct qede_fastpath *fp;
444 	struct qede_rx_queue *rxq;
445 	struct qede_tx_queue *txq;
446 	uint16_t sb_idx;
447 	uint8_t i;
448 
449 	PMD_INIT_FUNC_TRACE(edev);
450 
451 	for (sb_idx = 0; sb_idx < QEDE_RXTX_MAX(qdev); sb_idx++) {
452 		fp = &qdev->fp_array[sb_idx];
453 		if (!fp)
454 			continue;
455 		DP_INFO(edev, "Free sb_info index 0x%x\n",
456 				fp->sb_info->igu_sb_id);
457 		if (fp->sb_info) {
458 			OSAL_DMA_FREE_COHERENT(edev, fp->sb_info->sb_virt,
459 				fp->sb_info->sb_phys,
460 				sizeof(struct status_block_e4));
461 			rte_free(fp->sb_info);
462 			fp->sb_info = NULL;
463 		}
464 	}
465 
466 	/* Free packet buffers and ring memories */
467 	for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
468 		if (eth_dev->data->rx_queues[i]) {
469 			qede_rx_queue_release(eth_dev->data->rx_queues[i]);
470 			rxq = eth_dev->data->rx_queues[i];
471 			qdev->ops->common->chain_free(edev,
472 						      &rxq->rx_bd_ring);
473 			qdev->ops->common->chain_free(edev,
474 						      &rxq->rx_comp_ring);
475 			eth_dev->data->rx_queues[i] = NULL;
476 		}
477 	}
478 
479 	for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
480 		if (eth_dev->data->tx_queues[i]) {
481 			txq = eth_dev->data->tx_queues[i];
482 			qede_tx_queue_release(eth_dev->data->tx_queues[i]);
483 			qdev->ops->common->chain_free(edev,
484 						      &txq->tx_pbl);
485 			eth_dev->data->tx_queues[i] = NULL;
486 		}
487 	}
488 
489 	if (qdev->fp_array)
490 		rte_free(qdev->fp_array);
491 	qdev->fp_array = NULL;
492 }
493 
494 static inline void
495 qede_update_rx_prod(__rte_unused struct qede_dev *edev,
496 		    struct qede_rx_queue *rxq)
497 {
498 	uint16_t bd_prod = ecore_chain_get_prod_idx(&rxq->rx_bd_ring);
499 	uint16_t cqe_prod = ecore_chain_get_prod_idx(&rxq->rx_comp_ring);
500 	struct eth_rx_prod_data rx_prods = { 0 };
501 
502 	/* Update producers */
503 	rx_prods.bd_prod = rte_cpu_to_le_16(bd_prod);
504 	rx_prods.cqe_prod = rte_cpu_to_le_16(cqe_prod);
505 
506 	/* Make sure that the BD and SGE data is updated before updating the
507 	 * producers since FW might read the BD/SGE right after the producer
508 	 * is updated.
509 	 */
510 	rte_wmb();
511 
512 	internal_ram_wr(rxq->hw_rxq_prod_addr, sizeof(rx_prods),
513 			(uint32_t *)&rx_prods);
514 
515 	/* mmiowb is needed to synchronize doorbell writes from more than one
516 	 * processor. It guarantees that the write arrives to the device before
517 	 * the napi lock is released and another qede_poll is called (possibly
518 	 * on another CPU). Without this barrier, the next doorbell can bypass
519 	 * this doorbell. This is applicable to IA64/Altix systems.
520 	 */
521 	rte_wmb();
522 
523 	PMD_RX_LOG(DEBUG, rxq, "bd_prod %u  cqe_prod %u", bd_prod, cqe_prod);
524 }
525 
526 /* Starts a given RX queue in HW */
527 static int
528 qede_rx_queue_start(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
529 {
530 	struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
531 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
532 	struct ecore_queue_start_common_params params;
533 	struct ecore_rxq_start_ret_params ret_params;
534 	struct qede_rx_queue *rxq;
535 	struct qede_fastpath *fp;
536 	struct ecore_hwfn *p_hwfn;
537 	dma_addr_t p_phys_table;
538 	uint16_t page_cnt;
539 	uint16_t j;
540 	int hwfn_index;
541 	int rc;
542 
543 	if (rx_queue_id < eth_dev->data->nb_rx_queues) {
544 		fp = &qdev->fp_array[rx_queue_id];
545 		rxq = eth_dev->data->rx_queues[rx_queue_id];
546 		/* Allocate buffers for the Rx ring */
547 		for (j = 0; j < rxq->nb_rx_desc; j++) {
548 			rc = qede_alloc_rx_buffer(rxq);
549 			if (rc) {
550 				DP_ERR(edev, "RX buffer allocation failed"
551 						" for rxq = %u\n", rx_queue_id);
552 				return -ENOMEM;
553 			}
554 		}
555 		/* disable interrupts */
556 		ecore_sb_ack(fp->sb_info, IGU_INT_DISABLE, 0);
557 		/* Prepare ramrod */
558 		memset(&params, 0, sizeof(params));
559 		params.queue_id = rx_queue_id / edev->num_hwfns;
560 		params.vport_id = 0;
561 		params.stats_id = params.vport_id;
562 		params.p_sb = fp->sb_info;
563 		DP_INFO(edev, "rxq %u igu_sb_id 0x%x\n",
564 				fp->rxq->queue_id, fp->sb_info->igu_sb_id);
565 		params.sb_idx = RX_PI;
566 		hwfn_index = rx_queue_id % edev->num_hwfns;
567 		p_hwfn = &edev->hwfns[hwfn_index];
568 		p_phys_table = ecore_chain_get_pbl_phys(&fp->rxq->rx_comp_ring);
569 		page_cnt = ecore_chain_get_page_cnt(&fp->rxq->rx_comp_ring);
570 		memset(&ret_params, 0, sizeof(ret_params));
571 		rc = ecore_eth_rx_queue_start(p_hwfn,
572 				p_hwfn->hw_info.opaque_fid,
573 				&params, fp->rxq->rx_buf_size,
574 				fp->rxq->rx_bd_ring.p_phys_addr,
575 				p_phys_table, page_cnt,
576 				&ret_params);
577 		if (rc) {
578 			DP_ERR(edev, "RX queue %u could not be started, rc = %d\n",
579 					rx_queue_id, rc);
580 			return -1;
581 		}
582 		/* Update with the returned parameters */
583 		fp->rxq->hw_rxq_prod_addr = ret_params.p_prod;
584 		fp->rxq->handle = ret_params.p_handle;
585 
586 		fp->rxq->hw_cons_ptr = &fp->sb_info->sb_virt->pi_array[RX_PI];
587 		qede_update_rx_prod(qdev, fp->rxq);
588 		eth_dev->data->rx_queue_state[rx_queue_id] =
589 			RTE_ETH_QUEUE_STATE_STARTED;
590 		DP_INFO(edev, "RX queue %u started\n", rx_queue_id);
591 	} else {
592 		DP_ERR(edev, "RX queue %u is not in range\n", rx_queue_id);
593 		rc = -EINVAL;
594 	}
595 
596 	return rc;
597 }
598 
599 static int
600 qede_tx_queue_start(struct rte_eth_dev *eth_dev, uint16_t tx_queue_id)
601 {
602 	struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
603 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
604 	struct ecore_queue_start_common_params params;
605 	struct ecore_txq_start_ret_params ret_params;
606 	struct ecore_hwfn *p_hwfn;
607 	dma_addr_t p_phys_table;
608 	struct qede_tx_queue *txq;
609 	struct qede_fastpath *fp;
610 	uint16_t page_cnt;
611 	int hwfn_index;
612 	int rc;
613 
614 	if (tx_queue_id < eth_dev->data->nb_tx_queues) {
615 		txq = eth_dev->data->tx_queues[tx_queue_id];
616 		fp = &qdev->fp_array[tx_queue_id];
617 		memset(&params, 0, sizeof(params));
618 		params.queue_id = tx_queue_id / edev->num_hwfns;
619 		params.vport_id = 0;
620 		params.stats_id = params.vport_id;
621 		params.p_sb = fp->sb_info;
622 		DP_INFO(edev, "txq %u igu_sb_id 0x%x\n",
623 				fp->txq->queue_id, fp->sb_info->igu_sb_id);
624 		params.sb_idx = TX_PI(0); /* tc = 0 */
625 		p_phys_table = ecore_chain_get_pbl_phys(&txq->tx_pbl);
626 		page_cnt = ecore_chain_get_page_cnt(&txq->tx_pbl);
627 		hwfn_index = tx_queue_id % edev->num_hwfns;
628 		p_hwfn = &edev->hwfns[hwfn_index];
629 		if (qdev->dev_info.is_legacy)
630 			fp->txq->is_legacy = true;
631 		rc = ecore_eth_tx_queue_start(p_hwfn,
632 				p_hwfn->hw_info.opaque_fid,
633 				&params, 0 /* tc */,
634 				p_phys_table, page_cnt,
635 				&ret_params);
636 		if (rc != ECORE_SUCCESS) {
637 			DP_ERR(edev, "TX queue %u couldn't be started, rc=%d\n",
638 					tx_queue_id, rc);
639 			return -1;
640 		}
641 		txq->doorbell_addr = ret_params.p_doorbell;
642 		txq->handle = ret_params.p_handle;
643 
644 		txq->hw_cons_ptr = &fp->sb_info->sb_virt->pi_array[TX_PI(0)];
645 		SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_DEST,
646 				DB_DEST_XCM);
647 		SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_CMD,
648 				DB_AGG_CMD_SET);
649 		SET_FIELD(txq->tx_db.data.params,
650 				ETH_DB_DATA_AGG_VAL_SEL,
651 				DQ_XCM_ETH_TX_BD_PROD_CMD);
652 		txq->tx_db.data.agg_flags = DQ_XCM_ETH_DQ_CF_CMD;
653 		eth_dev->data->tx_queue_state[tx_queue_id] =
654 			RTE_ETH_QUEUE_STATE_STARTED;
655 		DP_INFO(edev, "TX queue %u started\n", tx_queue_id);
656 	} else {
657 		DP_ERR(edev, "TX queue %u is not in range\n", tx_queue_id);
658 		rc = -EINVAL;
659 	}
660 
661 	return rc;
662 }
663 
664 static inline void
665 qede_free_tx_pkt(struct qede_tx_queue *txq)
666 {
667 	struct rte_mbuf *mbuf;
668 	uint16_t nb_segs;
669 	uint16_t idx;
670 
671 	idx = TX_CONS(txq);
672 	mbuf = txq->sw_tx_ring[idx].mbuf;
673 	if (mbuf) {
674 		nb_segs = mbuf->nb_segs;
675 		PMD_TX_LOG(DEBUG, txq, "nb_segs to free %u\n", nb_segs);
676 		while (nb_segs) {
677 			/* It's like consuming rxbuf in recv() */
678 			ecore_chain_consume(&txq->tx_pbl);
679 			txq->nb_tx_avail++;
680 			nb_segs--;
681 		}
682 		rte_pktmbuf_free(mbuf);
683 		txq->sw_tx_ring[idx].mbuf = NULL;
684 		txq->sw_tx_cons++;
685 		PMD_TX_LOG(DEBUG, txq, "Freed tx packet\n");
686 	} else {
687 		ecore_chain_consume(&txq->tx_pbl);
688 		txq->nb_tx_avail++;
689 	}
690 }
691 
692 static inline void
693 qede_process_tx_compl(__rte_unused struct ecore_dev *edev,
694 		      struct qede_tx_queue *txq)
695 {
696 	uint16_t hw_bd_cons;
697 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
698 	uint16_t sw_tx_cons;
699 #endif
700 
701 	rte_compiler_barrier();
702 	hw_bd_cons = rte_le_to_cpu_16(*txq->hw_cons_ptr);
703 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
704 	sw_tx_cons = ecore_chain_get_cons_idx(&txq->tx_pbl);
705 	PMD_TX_LOG(DEBUG, txq, "Tx Completions = %u\n",
706 		   abs(hw_bd_cons - sw_tx_cons));
707 #endif
708 	while (hw_bd_cons !=  ecore_chain_get_cons_idx(&txq->tx_pbl))
709 		qede_free_tx_pkt(txq);
710 }
711 
712 static int qede_drain_txq(struct qede_dev *qdev,
713 			  struct qede_tx_queue *txq, bool allow_drain)
714 {
715 	struct ecore_dev *edev = &qdev->edev;
716 	int rc, cnt = 1000;
717 
718 	while (txq->sw_tx_cons != txq->sw_tx_prod) {
719 		qede_process_tx_compl(edev, txq);
720 		if (!cnt) {
721 			if (allow_drain) {
722 				DP_ERR(edev, "Tx queue[%u] is stuck,"
723 					  "requesting MCP to drain\n",
724 					  txq->queue_id);
725 				rc = qdev->ops->common->drain(edev);
726 				if (rc)
727 					return rc;
728 				return qede_drain_txq(qdev, txq, false);
729 			}
730 			DP_ERR(edev, "Timeout waiting for tx queue[%d]:"
731 				  "PROD=%d, CONS=%d\n",
732 				  txq->queue_id, txq->sw_tx_prod,
733 				  txq->sw_tx_cons);
734 			return -1;
735 		}
736 		cnt--;
737 		DELAY(1000);
738 		rte_compiler_barrier();
739 	}
740 
741 	/* FW finished processing, wait for HW to transmit all tx packets */
742 	DELAY(2000);
743 
744 	return 0;
745 }
746 
747 /* Stops a given TX queue in the HW */
748 static int qede_tx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t tx_queue_id)
749 {
750 	struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
751 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
752 	struct ecore_hwfn *p_hwfn;
753 	struct qede_tx_queue *txq;
754 	int hwfn_index;
755 	int rc;
756 
757 	if (tx_queue_id < eth_dev->data->nb_tx_queues) {
758 		txq = eth_dev->data->tx_queues[tx_queue_id];
759 		/* Drain txq */
760 		if (qede_drain_txq(qdev, txq, true))
761 			return -1; /* For the lack of retcodes */
762 		/* Stop txq */
763 		hwfn_index = tx_queue_id % edev->num_hwfns;
764 		p_hwfn = &edev->hwfns[hwfn_index];
765 		rc = ecore_eth_tx_queue_stop(p_hwfn, txq->handle);
766 		if (rc != ECORE_SUCCESS) {
767 			DP_ERR(edev, "TX queue %u stop fails\n", tx_queue_id);
768 			return -1;
769 		}
770 		qede_tx_queue_release_mbufs(txq);
771 		qede_tx_queue_reset(qdev, txq);
772 		eth_dev->data->tx_queue_state[tx_queue_id] =
773 			RTE_ETH_QUEUE_STATE_STOPPED;
774 		DP_INFO(edev, "TX queue %u stopped\n", tx_queue_id);
775 	} else {
776 		DP_ERR(edev, "TX queue %u is not in range\n", tx_queue_id);
777 		rc = -EINVAL;
778 	}
779 
780 	return rc;
781 }
782 
783 int qede_start_queues(struct rte_eth_dev *eth_dev)
784 {
785 	struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
786 	uint8_t id;
787 	int rc = -1;
788 
789 	for_each_rss(id) {
790 		rc = qede_rx_queue_start(eth_dev, id);
791 		if (rc != ECORE_SUCCESS)
792 			return -1;
793 	}
794 
795 	for_each_tss(id) {
796 		rc = qede_tx_queue_start(eth_dev, id);
797 		if (rc != ECORE_SUCCESS)
798 			return -1;
799 	}
800 
801 	return rc;
802 }
803 
804 void qede_stop_queues(struct rte_eth_dev *eth_dev)
805 {
806 	struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
807 	uint8_t id;
808 
809 	/* Stopping RX/TX queues */
810 	for_each_tss(id) {
811 		qede_tx_queue_stop(eth_dev, id);
812 	}
813 
814 	for_each_rss(id) {
815 		qede_rx_queue_stop(eth_dev, id);
816 	}
817 }
818 
819 static inline bool qede_tunn_exist(uint16_t flag)
820 {
821 	return !!((PARSING_AND_ERR_FLAGS_TUNNELEXIST_MASK <<
822 		    PARSING_AND_ERR_FLAGS_TUNNELEXIST_SHIFT) & flag);
823 }
824 
825 static inline uint8_t qede_check_tunn_csum_l3(uint16_t flag)
826 {
827 	return !!((PARSING_AND_ERR_FLAGS_TUNNELIPHDRERROR_MASK <<
828 		PARSING_AND_ERR_FLAGS_TUNNELIPHDRERROR_SHIFT) & flag);
829 }
830 
831 /*
832  * qede_check_tunn_csum_l4:
833  * Returns:
834  * 1 : If L4 csum is enabled AND if the validation has failed.
835  * 0 : Otherwise
836  */
837 static inline uint8_t qede_check_tunn_csum_l4(uint16_t flag)
838 {
839 	if ((PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_MASK <<
840 	     PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_SHIFT) & flag)
841 		return !!((PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_MASK <<
842 			PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_SHIFT) & flag);
843 
844 	return 0;
845 }
846 
847 static inline uint8_t qede_check_notunn_csum_l4(uint16_t flag)
848 {
849 	if ((PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK <<
850 	     PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_SHIFT) & flag)
851 		return !!((PARSING_AND_ERR_FLAGS_L4CHKSMERROR_MASK <<
852 			   PARSING_AND_ERR_FLAGS_L4CHKSMERROR_SHIFT) & flag);
853 
854 	return 0;
855 }
856 
857 /* Returns outer L2, L3 and L4 packet_type for tunneled packets */
858 static inline uint32_t qede_rx_cqe_to_pkt_type_outer(struct rte_mbuf *m)
859 {
860 	uint32_t packet_type = RTE_PTYPE_UNKNOWN;
861 	struct ether_hdr *eth_hdr;
862 	struct ipv4_hdr *ipv4_hdr;
863 	struct ipv6_hdr *ipv6_hdr;
864 	struct vlan_hdr *vlan_hdr;
865 	uint16_t ethertype;
866 	bool vlan_tagged = 0;
867 	uint16_t len;
868 
869 	eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
870 	len = sizeof(struct ether_hdr);
871 	ethertype = rte_cpu_to_be_16(eth_hdr->ether_type);
872 
873 	 /* Note: Valid only if VLAN stripping is disabled */
874 	if (ethertype == ETHER_TYPE_VLAN) {
875 		vlan_tagged = 1;
876 		vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
877 		len += sizeof(struct vlan_hdr);
878 		ethertype = rte_cpu_to_be_16(vlan_hdr->eth_proto);
879 	}
880 
881 	if (ethertype == ETHER_TYPE_IPv4) {
882 		packet_type |= RTE_PTYPE_L3_IPV4;
883 		ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, len);
884 		if (ipv4_hdr->next_proto_id == IPPROTO_TCP)
885 			packet_type |= RTE_PTYPE_L4_TCP;
886 		else if (ipv4_hdr->next_proto_id == IPPROTO_UDP)
887 			packet_type |= RTE_PTYPE_L4_UDP;
888 	} else if (ethertype == ETHER_TYPE_IPv6) {
889 		packet_type |= RTE_PTYPE_L3_IPV6;
890 		ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, len);
891 		if (ipv6_hdr->proto == IPPROTO_TCP)
892 			packet_type |= RTE_PTYPE_L4_TCP;
893 		else if (ipv6_hdr->proto == IPPROTO_UDP)
894 			packet_type |= RTE_PTYPE_L4_UDP;
895 	}
896 
897 	if (vlan_tagged)
898 		packet_type |= RTE_PTYPE_L2_ETHER_VLAN;
899 	else
900 		packet_type |= RTE_PTYPE_L2_ETHER;
901 
902 	return packet_type;
903 }
904 
905 static inline uint32_t qede_rx_cqe_to_pkt_type_inner(uint16_t flags)
906 {
907 	uint16_t val;
908 
909 	/* Lookup table */
910 	static const uint32_t
911 	ptype_lkup_tbl[QEDE_PKT_TYPE_MAX] __rte_cache_aligned = {
912 		[QEDE_PKT_TYPE_IPV4] = RTE_PTYPE_INNER_L3_IPV4		|
913 				       RTE_PTYPE_INNER_L2_ETHER,
914 		[QEDE_PKT_TYPE_IPV6] = RTE_PTYPE_INNER_L3_IPV6		|
915 				       RTE_PTYPE_INNER_L2_ETHER,
916 		[QEDE_PKT_TYPE_IPV4_TCP] = RTE_PTYPE_INNER_L3_IPV4	|
917 					   RTE_PTYPE_INNER_L4_TCP	|
918 					   RTE_PTYPE_INNER_L2_ETHER,
919 		[QEDE_PKT_TYPE_IPV6_TCP] = RTE_PTYPE_INNER_L3_IPV6	|
920 					   RTE_PTYPE_INNER_L4_TCP	|
921 					   RTE_PTYPE_INNER_L2_ETHER,
922 		[QEDE_PKT_TYPE_IPV4_UDP] = RTE_PTYPE_INNER_L3_IPV4	|
923 					   RTE_PTYPE_INNER_L4_UDP	|
924 					   RTE_PTYPE_INNER_L2_ETHER,
925 		[QEDE_PKT_TYPE_IPV6_UDP] = RTE_PTYPE_INNER_L3_IPV6	|
926 					   RTE_PTYPE_INNER_L4_UDP	|
927 					   RTE_PTYPE_INNER_L2_ETHER,
928 		/* Frags with no VLAN */
929 		[QEDE_PKT_TYPE_IPV4_FRAG] = RTE_PTYPE_INNER_L3_IPV4	|
930 					    RTE_PTYPE_INNER_L4_FRAG	|
931 					    RTE_PTYPE_INNER_L2_ETHER,
932 		[QEDE_PKT_TYPE_IPV6_FRAG] = RTE_PTYPE_INNER_L3_IPV6	|
933 					    RTE_PTYPE_INNER_L4_FRAG	|
934 					    RTE_PTYPE_INNER_L2_ETHER,
935 		/* VLANs */
936 		[QEDE_PKT_TYPE_IPV4_VLAN] = RTE_PTYPE_INNER_L3_IPV4	|
937 					    RTE_PTYPE_INNER_L2_ETHER_VLAN,
938 		[QEDE_PKT_TYPE_IPV6_VLAN] = RTE_PTYPE_INNER_L3_IPV6	|
939 					    RTE_PTYPE_INNER_L2_ETHER_VLAN,
940 		[QEDE_PKT_TYPE_IPV4_TCP_VLAN] = RTE_PTYPE_INNER_L3_IPV4	|
941 						RTE_PTYPE_INNER_L4_TCP	|
942 						RTE_PTYPE_INNER_L2_ETHER_VLAN,
943 		[QEDE_PKT_TYPE_IPV6_TCP_VLAN] = RTE_PTYPE_INNER_L3_IPV6	|
944 						RTE_PTYPE_INNER_L4_TCP	|
945 						RTE_PTYPE_INNER_L2_ETHER_VLAN,
946 		[QEDE_PKT_TYPE_IPV4_UDP_VLAN] = RTE_PTYPE_INNER_L3_IPV4	|
947 						RTE_PTYPE_INNER_L4_UDP	|
948 						RTE_PTYPE_INNER_L2_ETHER_VLAN,
949 		[QEDE_PKT_TYPE_IPV6_UDP_VLAN] = RTE_PTYPE_INNER_L3_IPV6	|
950 						RTE_PTYPE_INNER_L4_UDP	|
951 						RTE_PTYPE_INNER_L2_ETHER_VLAN,
952 		/* Frags with VLAN */
953 		[QEDE_PKT_TYPE_IPV4_VLAN_FRAG] = RTE_PTYPE_INNER_L3_IPV4 |
954 						 RTE_PTYPE_INNER_L4_FRAG |
955 						 RTE_PTYPE_INNER_L2_ETHER_VLAN,
956 		[QEDE_PKT_TYPE_IPV6_VLAN_FRAG] = RTE_PTYPE_INNER_L3_IPV6 |
957 						 RTE_PTYPE_INNER_L4_FRAG |
958 						 RTE_PTYPE_INNER_L2_ETHER_VLAN,
959 	};
960 
961 	/* Bits (0..3) provides L3/L4 protocol type */
962 	/* Bits (4,5) provides frag and VLAN info */
963 	val = ((PARSING_AND_ERR_FLAGS_L3TYPE_MASK <<
964 	       PARSING_AND_ERR_FLAGS_L3TYPE_SHIFT) |
965 	       (PARSING_AND_ERR_FLAGS_L4PROTOCOL_MASK <<
966 		PARSING_AND_ERR_FLAGS_L4PROTOCOL_SHIFT) |
967 	       (PARSING_AND_ERR_FLAGS_IPV4FRAG_MASK <<
968 		PARSING_AND_ERR_FLAGS_IPV4FRAG_SHIFT) |
969 		(PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK <<
970 		 PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT)) & flags;
971 
972 	if (val < QEDE_PKT_TYPE_MAX)
973 		return ptype_lkup_tbl[val];
974 
975 	return RTE_PTYPE_UNKNOWN;
976 }
977 
978 static inline uint32_t qede_rx_cqe_to_pkt_type(uint16_t flags)
979 {
980 	uint16_t val;
981 
982 	/* Lookup table */
983 	static const uint32_t
984 	ptype_lkup_tbl[QEDE_PKT_TYPE_MAX] __rte_cache_aligned = {
985 		[QEDE_PKT_TYPE_IPV4] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L2_ETHER,
986 		[QEDE_PKT_TYPE_IPV6] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER,
987 		[QEDE_PKT_TYPE_IPV4_TCP] = RTE_PTYPE_L3_IPV4	|
988 					   RTE_PTYPE_L4_TCP	|
989 					   RTE_PTYPE_L2_ETHER,
990 		[QEDE_PKT_TYPE_IPV6_TCP] = RTE_PTYPE_L3_IPV6	|
991 					   RTE_PTYPE_L4_TCP	|
992 					   RTE_PTYPE_L2_ETHER,
993 		[QEDE_PKT_TYPE_IPV4_UDP] = RTE_PTYPE_L3_IPV4	|
994 					   RTE_PTYPE_L4_UDP	|
995 					   RTE_PTYPE_L2_ETHER,
996 		[QEDE_PKT_TYPE_IPV6_UDP] = RTE_PTYPE_L3_IPV6	|
997 					   RTE_PTYPE_L4_UDP	|
998 					   RTE_PTYPE_L2_ETHER,
999 		/* Frags with no VLAN */
1000 		[QEDE_PKT_TYPE_IPV4_FRAG] = RTE_PTYPE_L3_IPV4	|
1001 					    RTE_PTYPE_L4_FRAG	|
1002 					    RTE_PTYPE_L2_ETHER,
1003 		[QEDE_PKT_TYPE_IPV6_FRAG] = RTE_PTYPE_L3_IPV6	|
1004 					    RTE_PTYPE_L4_FRAG	|
1005 					    RTE_PTYPE_L2_ETHER,
1006 		/* VLANs */
1007 		[QEDE_PKT_TYPE_IPV4_VLAN] = RTE_PTYPE_L3_IPV4		|
1008 					    RTE_PTYPE_L2_ETHER_VLAN,
1009 		[QEDE_PKT_TYPE_IPV6_VLAN] = RTE_PTYPE_L3_IPV6		|
1010 					    RTE_PTYPE_L2_ETHER_VLAN,
1011 		[QEDE_PKT_TYPE_IPV4_TCP_VLAN] = RTE_PTYPE_L3_IPV4	|
1012 						RTE_PTYPE_L4_TCP	|
1013 						RTE_PTYPE_L2_ETHER_VLAN,
1014 		[QEDE_PKT_TYPE_IPV6_TCP_VLAN] = RTE_PTYPE_L3_IPV6	|
1015 						RTE_PTYPE_L4_TCP	|
1016 						RTE_PTYPE_L2_ETHER_VLAN,
1017 		[QEDE_PKT_TYPE_IPV4_UDP_VLAN] = RTE_PTYPE_L3_IPV4	|
1018 						RTE_PTYPE_L4_UDP	|
1019 						RTE_PTYPE_L2_ETHER_VLAN,
1020 		[QEDE_PKT_TYPE_IPV6_UDP_VLAN] = RTE_PTYPE_L3_IPV6	|
1021 						RTE_PTYPE_L4_UDP	|
1022 						RTE_PTYPE_L2_ETHER_VLAN,
1023 		/* Frags with VLAN */
1024 		[QEDE_PKT_TYPE_IPV4_VLAN_FRAG] = RTE_PTYPE_L3_IPV4	|
1025 						 RTE_PTYPE_L4_FRAG	|
1026 						 RTE_PTYPE_L2_ETHER_VLAN,
1027 		[QEDE_PKT_TYPE_IPV6_VLAN_FRAG] = RTE_PTYPE_L3_IPV6	|
1028 						 RTE_PTYPE_L4_FRAG	|
1029 						 RTE_PTYPE_L2_ETHER_VLAN,
1030 	};
1031 
1032 	/* Bits (0..3) provides L3/L4 protocol type */
1033 	/* Bits (4,5) provides frag and VLAN info */
1034 	val = ((PARSING_AND_ERR_FLAGS_L3TYPE_MASK <<
1035 	       PARSING_AND_ERR_FLAGS_L3TYPE_SHIFT) |
1036 	       (PARSING_AND_ERR_FLAGS_L4PROTOCOL_MASK <<
1037 		PARSING_AND_ERR_FLAGS_L4PROTOCOL_SHIFT) |
1038 	       (PARSING_AND_ERR_FLAGS_IPV4FRAG_MASK <<
1039 		PARSING_AND_ERR_FLAGS_IPV4FRAG_SHIFT) |
1040 		(PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK <<
1041 		 PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT)) & flags;
1042 
1043 	if (val < QEDE_PKT_TYPE_MAX)
1044 		return ptype_lkup_tbl[val];
1045 
1046 	return RTE_PTYPE_UNKNOWN;
1047 }
1048 
1049 static inline uint8_t
1050 qede_check_notunn_csum_l3(struct rte_mbuf *m, uint16_t flag)
1051 {
1052 	struct ipv4_hdr *ip;
1053 	uint16_t pkt_csum;
1054 	uint16_t calc_csum;
1055 	uint16_t val;
1056 
1057 	val = ((PARSING_AND_ERR_FLAGS_IPHDRERROR_MASK <<
1058 		PARSING_AND_ERR_FLAGS_IPHDRERROR_SHIFT) & flag);
1059 
1060 	if (unlikely(val)) {
1061 		m->packet_type = qede_rx_cqe_to_pkt_type(flag);
1062 		if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
1063 			ip = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
1064 					   sizeof(struct ether_hdr));
1065 			pkt_csum = ip->hdr_checksum;
1066 			ip->hdr_checksum = 0;
1067 			calc_csum = rte_ipv4_cksum(ip);
1068 			ip->hdr_checksum = pkt_csum;
1069 			return (calc_csum != pkt_csum);
1070 		} else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
1071 			return 1;
1072 		}
1073 	}
1074 	return 0;
1075 }
1076 
1077 static inline void qede_rx_bd_ring_consume(struct qede_rx_queue *rxq)
1078 {
1079 	ecore_chain_consume(&rxq->rx_bd_ring);
1080 	rxq->sw_rx_cons++;
1081 }
1082 
1083 static inline void
1084 qede_reuse_page(__rte_unused struct qede_dev *qdev,
1085 		struct qede_rx_queue *rxq, struct qede_rx_entry *curr_cons)
1086 {
1087 	struct eth_rx_bd *rx_bd_prod = ecore_chain_produce(&rxq->rx_bd_ring);
1088 	uint16_t idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
1089 	struct qede_rx_entry *curr_prod;
1090 	dma_addr_t new_mapping;
1091 
1092 	curr_prod = &rxq->sw_rx_ring[idx];
1093 	*curr_prod = *curr_cons;
1094 
1095 	new_mapping = rte_mbuf_data_iova_default(curr_prod->mbuf) +
1096 		      curr_prod->page_offset;
1097 
1098 	rx_bd_prod->addr.hi = rte_cpu_to_le_32(U64_HI(new_mapping));
1099 	rx_bd_prod->addr.lo = rte_cpu_to_le_32(U64_LO(new_mapping));
1100 
1101 	rxq->sw_rx_prod++;
1102 }
1103 
1104 static inline void
1105 qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq,
1106 			struct qede_dev *qdev, uint8_t count)
1107 {
1108 	struct qede_rx_entry *curr_cons;
1109 
1110 	for (; count > 0; count--) {
1111 		curr_cons = &rxq->sw_rx_ring[rxq->sw_rx_cons & NUM_RX_BDS(rxq)];
1112 		qede_reuse_page(qdev, rxq, curr_cons);
1113 		qede_rx_bd_ring_consume(rxq);
1114 	}
1115 }
1116 
1117 static inline void
1118 qede_rx_process_tpa_cmn_cont_end_cqe(__rte_unused struct qede_dev *qdev,
1119 				     struct qede_rx_queue *rxq,
1120 				     uint8_t agg_index, uint16_t len)
1121 {
1122 	struct qede_agg_info *tpa_info;
1123 	struct rte_mbuf *curr_frag; /* Pointer to currently filled TPA seg */
1124 	uint16_t cons_idx;
1125 
1126 	/* Under certain conditions it is possible that FW may not consume
1127 	 * additional or new BD. So decision to consume the BD must be made
1128 	 * based on len_list[0].
1129 	 */
1130 	if (rte_le_to_cpu_16(len)) {
1131 		tpa_info = &rxq->tpa_info[agg_index];
1132 		cons_idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
1133 		curr_frag = rxq->sw_rx_ring[cons_idx].mbuf;
1134 		assert(curr_frag);
1135 		curr_frag->nb_segs = 1;
1136 		curr_frag->pkt_len = rte_le_to_cpu_16(len);
1137 		curr_frag->data_len = curr_frag->pkt_len;
1138 		tpa_info->tpa_tail->next = curr_frag;
1139 		tpa_info->tpa_tail = curr_frag;
1140 		qede_rx_bd_ring_consume(rxq);
1141 		if (unlikely(qede_alloc_rx_buffer(rxq) != 0)) {
1142 			PMD_RX_LOG(ERR, rxq, "mbuf allocation fails\n");
1143 			rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1144 			rxq->rx_alloc_errors++;
1145 		}
1146 	}
1147 }
1148 
1149 static inline void
1150 qede_rx_process_tpa_cont_cqe(struct qede_dev *qdev,
1151 			     struct qede_rx_queue *rxq,
1152 			     struct eth_fast_path_rx_tpa_cont_cqe *cqe)
1153 {
1154 	PMD_RX_LOG(INFO, rxq, "TPA cont[%d] - len [%d]\n",
1155 		   cqe->tpa_agg_index, rte_le_to_cpu_16(cqe->len_list[0]));
1156 	/* only len_list[0] will have value */
1157 	qede_rx_process_tpa_cmn_cont_end_cqe(qdev, rxq, cqe->tpa_agg_index,
1158 					     cqe->len_list[0]);
1159 }
1160 
1161 static inline void
1162 qede_rx_process_tpa_end_cqe(struct qede_dev *qdev,
1163 			    struct qede_rx_queue *rxq,
1164 			    struct eth_fast_path_rx_tpa_end_cqe *cqe)
1165 {
1166 	struct rte_mbuf *rx_mb; /* Pointer to head of the chained agg */
1167 
1168 	qede_rx_process_tpa_cmn_cont_end_cqe(qdev, rxq, cqe->tpa_agg_index,
1169 					     cqe->len_list[0]);
1170 	/* Update total length and frags based on end TPA */
1171 	rx_mb = rxq->tpa_info[cqe->tpa_agg_index].tpa_head;
1172 	/* TODO:  Add Sanity Checks */
1173 	rx_mb->nb_segs = cqe->num_of_bds;
1174 	rx_mb->pkt_len = cqe->total_packet_len;
1175 
1176 	PMD_RX_LOG(INFO, rxq, "TPA End[%d] reason %d cqe_len %d nb_segs %d"
1177 		   " pkt_len %d\n", cqe->tpa_agg_index, cqe->end_reason,
1178 		   rte_le_to_cpu_16(cqe->len_list[0]), rx_mb->nb_segs,
1179 		   rx_mb->pkt_len);
1180 }
1181 
1182 static inline uint32_t qede_rx_cqe_to_tunn_pkt_type(uint16_t flags)
1183 {
1184 	uint32_t val;
1185 
1186 	/* Lookup table */
1187 	static const uint32_t
1188 	ptype_tunn_lkup_tbl[QEDE_PKT_TYPE_TUNN_MAX_TYPE] __rte_cache_aligned = {
1189 		[QEDE_PKT_TYPE_UNKNOWN] = RTE_PTYPE_UNKNOWN,
1190 		[QEDE_PKT_TYPE_TUNN_GENEVE] = RTE_PTYPE_TUNNEL_GENEVE,
1191 		[QEDE_PKT_TYPE_TUNN_GRE] = RTE_PTYPE_TUNNEL_GRE,
1192 		[QEDE_PKT_TYPE_TUNN_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
1193 		[QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_GENEVE] =
1194 				RTE_PTYPE_TUNNEL_GENEVE,
1195 		[QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_GRE] =
1196 				RTE_PTYPE_TUNNEL_GRE,
1197 		[QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_VXLAN] =
1198 				RTE_PTYPE_TUNNEL_VXLAN,
1199 		[QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_GENEVE] =
1200 				RTE_PTYPE_TUNNEL_GENEVE,
1201 		[QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_GRE] =
1202 				RTE_PTYPE_TUNNEL_GRE,
1203 		[QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_VXLAN] =
1204 				RTE_PTYPE_TUNNEL_VXLAN,
1205 		[QEDE_PKT_TYPE_TUNN_IPV4_TENID_NOEXIST_GENEVE] =
1206 				RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV4,
1207 		[QEDE_PKT_TYPE_TUNN_IPV4_TENID_NOEXIST_GRE] =
1208 				RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV4,
1209 		[QEDE_PKT_TYPE_TUNN_IPV4_TENID_NOEXIST_VXLAN] =
1210 				RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV4,
1211 		[QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_GENEVE] =
1212 				RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV4,
1213 		[QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_GRE] =
1214 				RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV4,
1215 		[QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_VXLAN] =
1216 				RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV4,
1217 		[QEDE_PKT_TYPE_TUNN_IPV6_TENID_NOEXIST_GENEVE] =
1218 				RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV6,
1219 		[QEDE_PKT_TYPE_TUNN_IPV6_TENID_NOEXIST_GRE] =
1220 				RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV6,
1221 		[QEDE_PKT_TYPE_TUNN_IPV6_TENID_NOEXIST_VXLAN] =
1222 				RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV6,
1223 		[QEDE_PKT_TYPE_TUNN_IPV6_TENID_EXIST_GENEVE] =
1224 				RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV6,
1225 		[QEDE_PKT_TYPE_TUNN_IPV6_TENID_EXIST_GRE] =
1226 				RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV6,
1227 		[QEDE_PKT_TYPE_TUNN_IPV6_TENID_EXIST_VXLAN] =
1228 				RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV6,
1229 	};
1230 
1231 	/* Cover bits[4-0] to include tunn_type and next protocol */
1232 	val = ((ETH_TUNNEL_PARSING_FLAGS_TYPE_MASK <<
1233 		ETH_TUNNEL_PARSING_FLAGS_TYPE_SHIFT) |
1234 		(ETH_TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_MASK <<
1235 		ETH_TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_SHIFT)) & flags;
1236 
1237 	if (val < QEDE_PKT_TYPE_TUNN_MAX_TYPE)
1238 		return ptype_tunn_lkup_tbl[val];
1239 	else
1240 		return RTE_PTYPE_UNKNOWN;
1241 }
1242 
1243 static inline int
1244 qede_process_sg_pkts(void *p_rxq,  struct rte_mbuf *rx_mb,
1245 		     uint8_t num_segs, uint16_t pkt_len)
1246 {
1247 	struct qede_rx_queue *rxq = p_rxq;
1248 	struct qede_dev *qdev = rxq->qdev;
1249 	register struct rte_mbuf *seg1 = NULL;
1250 	register struct rte_mbuf *seg2 = NULL;
1251 	uint16_t sw_rx_index;
1252 	uint16_t cur_size;
1253 
1254 	seg1 = rx_mb;
1255 	while (num_segs) {
1256 		cur_size = pkt_len > rxq->rx_buf_size ? rxq->rx_buf_size :
1257 							pkt_len;
1258 		if (unlikely(!cur_size)) {
1259 			PMD_RX_LOG(ERR, rxq, "Length is 0 while %u BDs"
1260 				   " left for mapping jumbo\n", num_segs);
1261 			qede_recycle_rx_bd_ring(rxq, qdev, num_segs);
1262 			return -EINVAL;
1263 		}
1264 		sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
1265 		seg2 = rxq->sw_rx_ring[sw_rx_index].mbuf;
1266 		qede_rx_bd_ring_consume(rxq);
1267 		pkt_len -= cur_size;
1268 		seg2->data_len = cur_size;
1269 		seg1->next = seg2;
1270 		seg1 = seg1->next;
1271 		num_segs--;
1272 		rxq->rx_segs++;
1273 	}
1274 
1275 	return 0;
1276 }
1277 
1278 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX
1279 static inline void
1280 print_rx_bd_info(struct rte_mbuf *m, struct qede_rx_queue *rxq,
1281 		 uint8_t bitfield)
1282 {
1283 	PMD_RX_LOG(INFO, rxq,
1284 		"len 0x%04x bf 0x%04x hash_val 0x%x"
1285 		" ol_flags 0x%04lx l2=%s l3=%s l4=%s tunn=%s"
1286 		" inner_l2=%s inner_l3=%s inner_l4=%s\n",
1287 		m->data_len, bitfield, m->hash.rss,
1288 		(unsigned long)m->ol_flags,
1289 		rte_get_ptype_l2_name(m->packet_type),
1290 		rte_get_ptype_l3_name(m->packet_type),
1291 		rte_get_ptype_l4_name(m->packet_type),
1292 		rte_get_ptype_tunnel_name(m->packet_type),
1293 		rte_get_ptype_inner_l2_name(m->packet_type),
1294 		rte_get_ptype_inner_l3_name(m->packet_type),
1295 		rte_get_ptype_inner_l4_name(m->packet_type));
1296 }
1297 #endif
1298 
1299 uint16_t
1300 qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1301 {
1302 	struct qede_rx_queue *rxq = p_rxq;
1303 	struct qede_dev *qdev = rxq->qdev;
1304 	struct ecore_dev *edev = &qdev->edev;
1305 	uint16_t hw_comp_cons, sw_comp_cons, sw_rx_index;
1306 	uint16_t rx_pkt = 0;
1307 	union eth_rx_cqe *cqe;
1308 	struct eth_fast_path_rx_reg_cqe *fp_cqe = NULL;
1309 	register struct rte_mbuf *rx_mb = NULL;
1310 	register struct rte_mbuf *seg1 = NULL;
1311 	enum eth_rx_cqe_type cqe_type;
1312 	uint16_t pkt_len = 0; /* Sum of all BD segments */
1313 	uint16_t len; /* Length of first BD */
1314 	uint8_t num_segs = 1;
1315 	uint16_t preload_idx;
1316 	uint16_t parse_flag;
1317 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX
1318 	uint8_t bitfield_val;
1319 #endif
1320 	uint8_t tunn_parse_flag;
1321 	uint8_t j;
1322 	struct eth_fast_path_rx_tpa_start_cqe *cqe_start_tpa;
1323 	uint64_t ol_flags;
1324 	uint32_t packet_type;
1325 	uint16_t vlan_tci;
1326 	bool tpa_start_flg;
1327 	uint8_t offset, tpa_agg_idx, flags;
1328 	struct qede_agg_info *tpa_info = NULL;
1329 	uint32_t rss_hash;
1330 
1331 	hw_comp_cons = rte_le_to_cpu_16(*rxq->hw_cons_ptr);
1332 	sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring);
1333 
1334 	rte_rmb();
1335 
1336 	if (hw_comp_cons == sw_comp_cons)
1337 		return 0;
1338 
1339 	while (sw_comp_cons != hw_comp_cons) {
1340 		ol_flags = 0;
1341 		packet_type = RTE_PTYPE_UNKNOWN;
1342 		vlan_tci = 0;
1343 		tpa_start_flg = false;
1344 		rss_hash = 0;
1345 
1346 		/* Get the CQE from the completion ring */
1347 		cqe =
1348 		    (union eth_rx_cqe *)ecore_chain_consume(&rxq->rx_comp_ring);
1349 		cqe_type = cqe->fast_path_regular.type;
1350 		PMD_RX_LOG(INFO, rxq, "Rx CQE type %d\n", cqe_type);
1351 
1352 		switch (cqe_type) {
1353 		case ETH_RX_CQE_TYPE_REGULAR:
1354 			fp_cqe = &cqe->fast_path_regular;
1355 		break;
1356 		case ETH_RX_CQE_TYPE_TPA_START:
1357 			cqe_start_tpa = &cqe->fast_path_tpa_start;
1358 			tpa_info = &rxq->tpa_info[cqe_start_tpa->tpa_agg_index];
1359 			tpa_start_flg = true;
1360 			/* Mark it as LRO packet */
1361 			ol_flags |= PKT_RX_LRO;
1362 			/* In split mode,  seg_len is same as len_on_first_bd
1363 			 * and ext_bd_len_list will be empty since there are
1364 			 * no additional buffers
1365 			 */
1366 			PMD_RX_LOG(INFO, rxq,
1367 			    "TPA start[%d] - len_on_first_bd %d header %d"
1368 			    " [bd_list[0] %d], [seg_len %d]\n",
1369 			    cqe_start_tpa->tpa_agg_index,
1370 			    rte_le_to_cpu_16(cqe_start_tpa->len_on_first_bd),
1371 			    cqe_start_tpa->header_len,
1372 			    rte_le_to_cpu_16(cqe_start_tpa->ext_bd_len_list[0]),
1373 			    rte_le_to_cpu_16(cqe_start_tpa->seg_len));
1374 
1375 		break;
1376 		case ETH_RX_CQE_TYPE_TPA_CONT:
1377 			qede_rx_process_tpa_cont_cqe(qdev, rxq,
1378 						     &cqe->fast_path_tpa_cont);
1379 			goto next_cqe;
1380 		case ETH_RX_CQE_TYPE_TPA_END:
1381 			qede_rx_process_tpa_end_cqe(qdev, rxq,
1382 						    &cqe->fast_path_tpa_end);
1383 			tpa_agg_idx = cqe->fast_path_tpa_end.tpa_agg_index;
1384 			tpa_info = &rxq->tpa_info[tpa_agg_idx];
1385 			rx_mb = rxq->tpa_info[tpa_agg_idx].tpa_head;
1386 			goto tpa_end;
1387 		case ETH_RX_CQE_TYPE_SLOW_PATH:
1388 			PMD_RX_LOG(INFO, rxq, "Got unexpected slowpath CQE\n");
1389 			ecore_eth_cqe_completion(
1390 				&edev->hwfns[rxq->queue_id % edev->num_hwfns],
1391 				(struct eth_slow_path_rx_cqe *)cqe);
1392 			/* fall-thru */
1393 		default:
1394 			goto next_cqe;
1395 		}
1396 
1397 		/* Get the data from the SW ring */
1398 		sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
1399 		rx_mb = rxq->sw_rx_ring[sw_rx_index].mbuf;
1400 		assert(rx_mb != NULL);
1401 
1402 		/* Handle regular CQE or TPA start CQE */
1403 		if (!tpa_start_flg) {
1404 			parse_flag = rte_le_to_cpu_16(fp_cqe->pars_flags.flags);
1405 			offset = fp_cqe->placement_offset;
1406 			len = rte_le_to_cpu_16(fp_cqe->len_on_first_bd);
1407 			pkt_len = rte_le_to_cpu_16(fp_cqe->pkt_len);
1408 			vlan_tci = rte_le_to_cpu_16(fp_cqe->vlan_tag);
1409 			rss_hash = rte_le_to_cpu_32(fp_cqe->rss_hash);
1410 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX
1411 			bitfield_val = fp_cqe->bitfields;
1412 #endif
1413 		} else {
1414 			parse_flag =
1415 			    rte_le_to_cpu_16(cqe_start_tpa->pars_flags.flags);
1416 			offset = cqe_start_tpa->placement_offset;
1417 			/* seg_len = len_on_first_bd */
1418 			len = rte_le_to_cpu_16(cqe_start_tpa->len_on_first_bd);
1419 			vlan_tci = rte_le_to_cpu_16(cqe_start_tpa->vlan_tag);
1420 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX
1421 			bitfield_val = cqe_start_tpa->bitfields;
1422 #endif
1423 			rss_hash = rte_le_to_cpu_32(cqe_start_tpa->rss_hash);
1424 		}
1425 		if (qede_tunn_exist(parse_flag)) {
1426 			PMD_RX_LOG(INFO, rxq, "Rx tunneled packet\n");
1427 			if (unlikely(qede_check_tunn_csum_l4(parse_flag))) {
1428 				PMD_RX_LOG(ERR, rxq,
1429 					    "L4 csum failed, flags = 0x%x\n",
1430 					    parse_flag);
1431 				rxq->rx_hw_errors++;
1432 				ol_flags |= PKT_RX_L4_CKSUM_BAD;
1433 			} else {
1434 				ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1435 			}
1436 
1437 			if (unlikely(qede_check_tunn_csum_l3(parse_flag))) {
1438 				PMD_RX_LOG(ERR, rxq,
1439 					"Outer L3 csum failed, flags = 0x%x\n",
1440 					parse_flag);
1441 				  rxq->rx_hw_errors++;
1442 				  ol_flags |= PKT_RX_EIP_CKSUM_BAD;
1443 			} else {
1444 				  ol_flags |= PKT_RX_IP_CKSUM_GOOD;
1445 			}
1446 
1447 			if (tpa_start_flg)
1448 				flags = cqe_start_tpa->tunnel_pars_flags.flags;
1449 			else
1450 				flags = fp_cqe->tunnel_pars_flags.flags;
1451 			tunn_parse_flag = flags;
1452 
1453 			/* Tunnel_type */
1454 			packet_type =
1455 				qede_rx_cqe_to_tunn_pkt_type(tunn_parse_flag);
1456 
1457 			/* Inner header */
1458 			packet_type |=
1459 			      qede_rx_cqe_to_pkt_type_inner(parse_flag);
1460 
1461 			/* Outer L3/L4 types is not available in CQE */
1462 			packet_type |= qede_rx_cqe_to_pkt_type_outer(rx_mb);
1463 
1464 			/* Outer L3/L4 types is not available in CQE.
1465 			 * Need to add offset to parse correctly,
1466 			 */
1467 			rx_mb->data_off = offset + RTE_PKTMBUF_HEADROOM;
1468 			packet_type |= qede_rx_cqe_to_pkt_type_outer(rx_mb);
1469 		} else {
1470 			packet_type |= qede_rx_cqe_to_pkt_type(parse_flag);
1471 		}
1472 
1473 		/* Common handling for non-tunnel packets and for inner
1474 		 * headers in the case of tunnel.
1475 		 */
1476 		if (unlikely(qede_check_notunn_csum_l4(parse_flag))) {
1477 			PMD_RX_LOG(ERR, rxq,
1478 				    "L4 csum failed, flags = 0x%x\n",
1479 				    parse_flag);
1480 			rxq->rx_hw_errors++;
1481 			ol_flags |= PKT_RX_L4_CKSUM_BAD;
1482 		} else {
1483 			ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1484 		}
1485 		if (unlikely(qede_check_notunn_csum_l3(rx_mb, parse_flag))) {
1486 			PMD_RX_LOG(ERR, rxq, "IP csum failed, flags = 0x%x\n",
1487 				   parse_flag);
1488 			rxq->rx_hw_errors++;
1489 			ol_flags |= PKT_RX_IP_CKSUM_BAD;
1490 		} else {
1491 			ol_flags |= PKT_RX_IP_CKSUM_GOOD;
1492 		}
1493 
1494 		if (CQE_HAS_VLAN(parse_flag) ||
1495 		    CQE_HAS_OUTER_VLAN(parse_flag)) {
1496 			/* Note: FW doesn't indicate Q-in-Q packet */
1497 			ol_flags |= PKT_RX_VLAN;
1498 			if (qdev->vlan_strip_flg) {
1499 				ol_flags |= PKT_RX_VLAN_STRIPPED;
1500 				rx_mb->vlan_tci = vlan_tci;
1501 			}
1502 		}
1503 
1504 		/* RSS Hash */
1505 		if (qdev->rss_enable) {
1506 			ol_flags |= PKT_RX_RSS_HASH;
1507 			rx_mb->hash.rss = rss_hash;
1508 		}
1509 
1510 		if (unlikely(qede_alloc_rx_buffer(rxq) != 0)) {
1511 			PMD_RX_LOG(ERR, rxq,
1512 				   "New buffer allocation failed,"
1513 				   "dropping incoming packet\n");
1514 			qede_recycle_rx_bd_ring(rxq, qdev, fp_cqe->bd_num);
1515 			rte_eth_devices[rxq->port_id].
1516 			    data->rx_mbuf_alloc_failed++;
1517 			rxq->rx_alloc_errors++;
1518 			break;
1519 		}
1520 		qede_rx_bd_ring_consume(rxq);
1521 
1522 		if (!tpa_start_flg && fp_cqe->bd_num > 1) {
1523 			PMD_RX_LOG(DEBUG, rxq, "Jumbo-over-BD packet: %02x BDs"
1524 				   " len on first: %04x Total Len: %04x",
1525 				   fp_cqe->bd_num, len, pkt_len);
1526 			num_segs = fp_cqe->bd_num - 1;
1527 			seg1 = rx_mb;
1528 			if (qede_process_sg_pkts(p_rxq, seg1, num_segs,
1529 						 pkt_len - len))
1530 				goto next_cqe;
1531 			for (j = 0; j < num_segs; j++) {
1532 				if (qede_alloc_rx_buffer(rxq)) {
1533 					PMD_RX_LOG(ERR, rxq,
1534 						"Buffer allocation failed");
1535 					rte_eth_devices[rxq->port_id].
1536 						data->rx_mbuf_alloc_failed++;
1537 					rxq->rx_alloc_errors++;
1538 					break;
1539 				}
1540 				rxq->rx_segs++;
1541 			}
1542 		}
1543 		rxq->rx_segs++; /* for the first segment */
1544 
1545 		/* Prefetch next mbuf while processing current one. */
1546 		preload_idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
1547 		rte_prefetch0(rxq->sw_rx_ring[preload_idx].mbuf);
1548 
1549 		/* Update rest of the MBUF fields */
1550 		rx_mb->data_off = offset + RTE_PKTMBUF_HEADROOM;
1551 		rx_mb->port = rxq->port_id;
1552 		rx_mb->ol_flags = ol_flags;
1553 		rx_mb->data_len = len;
1554 		rx_mb->packet_type = packet_type;
1555 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX
1556 		print_rx_bd_info(rx_mb, rxq, bitfield_val);
1557 #endif
1558 		if (!tpa_start_flg) {
1559 			rx_mb->nb_segs = fp_cqe->bd_num;
1560 			rx_mb->pkt_len = pkt_len;
1561 		} else {
1562 			/* store ref to the updated mbuf */
1563 			tpa_info->tpa_head = rx_mb;
1564 			tpa_info->tpa_tail = tpa_info->tpa_head;
1565 		}
1566 		rte_prefetch1(rte_pktmbuf_mtod(rx_mb, void *));
1567 tpa_end:
1568 		if (!tpa_start_flg) {
1569 			rx_pkts[rx_pkt] = rx_mb;
1570 			rx_pkt++;
1571 		}
1572 next_cqe:
1573 		ecore_chain_recycle_consumed(&rxq->rx_comp_ring);
1574 		sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring);
1575 		if (rx_pkt == nb_pkts) {
1576 			PMD_RX_LOG(DEBUG, rxq,
1577 				   "Budget reached nb_pkts=%u received=%u",
1578 				   rx_pkt, nb_pkts);
1579 			break;
1580 		}
1581 	}
1582 
1583 	qede_update_rx_prod(qdev, rxq);
1584 
1585 	rxq->rcv_pkts += rx_pkt;
1586 
1587 	PMD_RX_LOG(DEBUG, rxq, "rx_pkts=%u core=%d", rx_pkt, rte_lcore_id());
1588 
1589 	return rx_pkt;
1590 }
1591 
1592 
1593 /* Populate scatter gather buffer descriptor fields */
1594 static inline uint16_t
1595 qede_encode_sg_bd(struct qede_tx_queue *p_txq, struct rte_mbuf *m_seg,
1596 		  struct eth_tx_2nd_bd **bd2, struct eth_tx_3rd_bd **bd3,
1597 		  uint16_t start_seg)
1598 {
1599 	struct qede_tx_queue *txq = p_txq;
1600 	struct eth_tx_bd *tx_bd = NULL;
1601 	dma_addr_t mapping;
1602 	uint16_t nb_segs = 0;
1603 
1604 	/* Check for scattered buffers */
1605 	while (m_seg) {
1606 		if (start_seg == 0) {
1607 			if (!*bd2) {
1608 				*bd2 = (struct eth_tx_2nd_bd *)
1609 					ecore_chain_produce(&txq->tx_pbl);
1610 				memset(*bd2, 0, sizeof(struct eth_tx_2nd_bd));
1611 				nb_segs++;
1612 			}
1613 			mapping = rte_mbuf_data_iova(m_seg);
1614 			QEDE_BD_SET_ADDR_LEN(*bd2, mapping, m_seg->data_len);
1615 			PMD_TX_LOG(DEBUG, txq, "BD2 len %04x", m_seg->data_len);
1616 		} else if (start_seg == 1) {
1617 			if (!*bd3) {
1618 				*bd3 = (struct eth_tx_3rd_bd *)
1619 					ecore_chain_produce(&txq->tx_pbl);
1620 				memset(*bd3, 0, sizeof(struct eth_tx_3rd_bd));
1621 				nb_segs++;
1622 			}
1623 			mapping = rte_mbuf_data_iova(m_seg);
1624 			QEDE_BD_SET_ADDR_LEN(*bd3, mapping, m_seg->data_len);
1625 			PMD_TX_LOG(DEBUG, txq, "BD3 len %04x", m_seg->data_len);
1626 		} else {
1627 			tx_bd = (struct eth_tx_bd *)
1628 				ecore_chain_produce(&txq->tx_pbl);
1629 			memset(tx_bd, 0, sizeof(*tx_bd));
1630 			nb_segs++;
1631 			mapping = rte_mbuf_data_iova(m_seg);
1632 			QEDE_BD_SET_ADDR_LEN(tx_bd, mapping, m_seg->data_len);
1633 			PMD_TX_LOG(DEBUG, txq, "BD len %04x", m_seg->data_len);
1634 		}
1635 		start_seg++;
1636 		m_seg = m_seg->next;
1637 	}
1638 
1639 	/* Return total scattered buffers */
1640 	return nb_segs;
1641 }
1642 
1643 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
1644 static inline void
1645 print_tx_bd_info(struct qede_tx_queue *txq,
1646 		 struct eth_tx_1st_bd *bd1,
1647 		 struct eth_tx_2nd_bd *bd2,
1648 		 struct eth_tx_3rd_bd *bd3,
1649 		 uint64_t tx_ol_flags)
1650 {
1651 	char ol_buf[256] = { 0 }; /* for verbose prints */
1652 
1653 	if (bd1)
1654 		PMD_TX_LOG(INFO, txq,
1655 		   "BD1: nbytes=0x%04x nbds=0x%04x bd_flags=0x%04x bf=0x%04x",
1656 		   rte_cpu_to_le_16(bd1->nbytes), bd1->data.nbds,
1657 		   bd1->data.bd_flags.bitfields,
1658 		   rte_cpu_to_le_16(bd1->data.bitfields));
1659 	if (bd2)
1660 		PMD_TX_LOG(INFO, txq,
1661 		   "BD2: nbytes=0x%04x bf1=0x%04x bf2=0x%04x tunn_ip=0x%04x\n",
1662 		   rte_cpu_to_le_16(bd2->nbytes), bd2->data.bitfields1,
1663 		   bd2->data.bitfields2, bd2->data.tunn_ip_size);
1664 	if (bd3)
1665 		PMD_TX_LOG(INFO, txq,
1666 		   "BD3: nbytes=0x%04x bf=0x%04x MSS=0x%04x "
1667 		   "tunn_l4_hdr_start_offset_w=0x%04x tunn_hdr_size=0x%04x\n",
1668 		   rte_cpu_to_le_16(bd3->nbytes),
1669 		   rte_cpu_to_le_16(bd3->data.bitfields),
1670 		   rte_cpu_to_le_16(bd3->data.lso_mss),
1671 		   bd3->data.tunn_l4_hdr_start_offset_w,
1672 		   bd3->data.tunn_hdr_size_w);
1673 
1674 	rte_get_tx_ol_flag_list(tx_ol_flags, ol_buf, sizeof(ol_buf));
1675 	PMD_TX_LOG(INFO, txq, "TX offloads = %s\n", ol_buf);
1676 }
1677 #endif
1678 
1679 /* TX prepare to check packets meets TX conditions */
1680 uint16_t
1681 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
1682 qede_xmit_prep_pkts(void *p_txq, struct rte_mbuf **tx_pkts,
1683 		    uint16_t nb_pkts)
1684 {
1685 	struct qede_tx_queue *txq = p_txq;
1686 #else
1687 qede_xmit_prep_pkts(__rte_unused void *p_txq, struct rte_mbuf **tx_pkts,
1688 		    uint16_t nb_pkts)
1689 {
1690 #endif
1691 	uint64_t ol_flags;
1692 	struct rte_mbuf *m;
1693 	uint16_t i;
1694 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
1695 	int ret;
1696 #endif
1697 
1698 	for (i = 0; i < nb_pkts; i++) {
1699 		m = tx_pkts[i];
1700 		ol_flags = m->ol_flags;
1701 		if (ol_flags & PKT_TX_TCP_SEG) {
1702 			if (m->nb_segs >= ETH_TX_MAX_BDS_PER_LSO_PACKET) {
1703 				rte_errno = -EINVAL;
1704 				break;
1705 			}
1706 			/* TBD: confirm its ~9700B for both ? */
1707 			if (m->tso_segsz > ETH_TX_MAX_NON_LSO_PKT_LEN) {
1708 				rte_errno = -EINVAL;
1709 				break;
1710 			}
1711 		} else {
1712 			if (m->nb_segs >= ETH_TX_MAX_BDS_PER_NON_LSO_PACKET) {
1713 				rte_errno = -EINVAL;
1714 				break;
1715 			}
1716 		}
1717 		if (ol_flags & QEDE_TX_OFFLOAD_NOTSUP_MASK) {
1718 			rte_errno = -ENOTSUP;
1719 			break;
1720 		}
1721 
1722 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
1723 		ret = rte_validate_tx_offload(m);
1724 		if (ret != 0) {
1725 			rte_errno = ret;
1726 			break;
1727 		}
1728 #endif
1729 	}
1730 
1731 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
1732 	if (unlikely(i != nb_pkts))
1733 		PMD_TX_LOG(ERR, txq, "TX prepare failed for %u\n",
1734 			   nb_pkts - i);
1735 #endif
1736 	return i;
1737 }
1738 
1739 #define MPLSINUDP_HDR_SIZE			(12)
1740 
1741 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
1742 static inline void
1743 qede_mpls_tunn_tx_sanity_check(struct rte_mbuf *mbuf,
1744 			       struct qede_tx_queue *txq)
1745 {
1746 	if (((mbuf->outer_l2_len + mbuf->outer_l3_len) / 2) > 0xff)
1747 		PMD_TX_LOG(ERR, txq, "tunn_l4_hdr_start_offset overflow\n");
1748 	if (((mbuf->outer_l2_len + mbuf->outer_l3_len +
1749 		MPLSINUDP_HDR_SIZE) / 2) > 0xff)
1750 		PMD_TX_LOG(ERR, txq, "tunn_hdr_size overflow\n");
1751 	if (((mbuf->l2_len - MPLSINUDP_HDR_SIZE) / 2) >
1752 		ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_MASK)
1753 		PMD_TX_LOG(ERR, txq, "inner_l2_hdr_size overflow\n");
1754 	if (((mbuf->l2_len - MPLSINUDP_HDR_SIZE + mbuf->l3_len) / 2) >
1755 		ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK)
1756 		PMD_TX_LOG(ERR, txq, "inner_l2_hdr_size overflow\n");
1757 }
1758 #endif
1759 
1760 uint16_t
1761 qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1762 {
1763 	struct qede_tx_queue *txq = p_txq;
1764 	struct qede_dev *qdev = txq->qdev;
1765 	struct ecore_dev *edev = &qdev->edev;
1766 	struct rte_mbuf *mbuf;
1767 	struct rte_mbuf *m_seg = NULL;
1768 	uint16_t nb_tx_pkts;
1769 	uint16_t bd_prod;
1770 	uint16_t idx;
1771 	uint16_t nb_frags;
1772 	uint16_t nb_pkt_sent = 0;
1773 	uint8_t nbds;
1774 	bool lso_flg;
1775 	bool mplsoudp_flg;
1776 	__rte_unused bool tunn_flg;
1777 	bool tunn_ipv6_ext_flg;
1778 	struct eth_tx_1st_bd *bd1;
1779 	struct eth_tx_2nd_bd *bd2;
1780 	struct eth_tx_3rd_bd *bd3;
1781 	uint64_t tx_ol_flags;
1782 	uint16_t hdr_size;
1783 	/* BD1 */
1784 	uint16_t bd1_bf;
1785 	uint8_t bd1_bd_flags_bf;
1786 	uint16_t vlan;
1787 	/* BD2 */
1788 	uint16_t bd2_bf1;
1789 	uint16_t bd2_bf2;
1790 	/* BD3 */
1791 	uint16_t mss;
1792 	uint16_t bd3_bf;
1793 
1794 	uint8_t tunn_l4_hdr_start_offset;
1795 	uint8_t tunn_hdr_size;
1796 	uint8_t inner_l2_hdr_size;
1797 	uint16_t inner_l4_hdr_offset;
1798 
1799 	if (unlikely(txq->nb_tx_avail < txq->tx_free_thresh)) {
1800 		PMD_TX_LOG(DEBUG, txq, "send=%u avail=%u free_thresh=%u",
1801 			   nb_pkts, txq->nb_tx_avail, txq->tx_free_thresh);
1802 		qede_process_tx_compl(edev, txq);
1803 	}
1804 
1805 	nb_tx_pkts  = nb_pkts;
1806 	bd_prod = rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl));
1807 	while (nb_tx_pkts--) {
1808 		/* Init flags/values */
1809 		tunn_flg = false;
1810 		lso_flg = false;
1811 		nbds = 0;
1812 		vlan = 0;
1813 		bd1 = NULL;
1814 		bd2 = NULL;
1815 		bd3 = NULL;
1816 		hdr_size = 0;
1817 		bd1_bf = 0;
1818 		bd1_bd_flags_bf = 0;
1819 		bd2_bf1 = 0;
1820 		bd2_bf2 = 0;
1821 		mss = 0;
1822 		bd3_bf = 0;
1823 		mplsoudp_flg = false;
1824 		tunn_ipv6_ext_flg = false;
1825 		tunn_hdr_size = 0;
1826 		tunn_l4_hdr_start_offset = 0;
1827 
1828 		mbuf = *tx_pkts++;
1829 		assert(mbuf);
1830 
1831 		/* Check minimum TX BDS availability against available BDs */
1832 		if (unlikely(txq->nb_tx_avail < mbuf->nb_segs))
1833 			break;
1834 
1835 		tx_ol_flags = mbuf->ol_flags;
1836 		bd1_bd_flags_bf |= 1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT;
1837 
1838 		/* TX prepare would have already checked supported tunnel Tx
1839 		 * offloads. Don't rely on pkt_type marked by Rx, instead use
1840 		 * tx_ol_flags to decide.
1841 		 */
1842 		tunn_flg = !!(tx_ol_flags & PKT_TX_TUNNEL_MASK);
1843 
1844 		if (tunn_flg) {
1845 			/* Check against max which is Tunnel IPv6 + ext */
1846 			if (unlikely(txq->nb_tx_avail <
1847 				ETH_TX_MIN_BDS_PER_TUNN_IPV6_WITH_EXT_PKT))
1848 					break;
1849 
1850 			/* First indicate its a tunnel pkt */
1851 			bd1_bf |= ETH_TX_DATA_1ST_BD_TUNN_FLAG_MASK <<
1852 				  ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT;
1853 			/* Legacy FW had flipped behavior in regard to this bit
1854 			 * i.e. it needed to set to prevent FW from touching
1855 			 * encapsulated packets when it didn't need to.
1856 			 */
1857 			if (unlikely(txq->is_legacy)) {
1858 				bd1_bf ^= 1 <<
1859 					ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT;
1860 			}
1861 
1862 			/* Outer IP checksum offload */
1863 			if (tx_ol_flags & (PKT_TX_OUTER_IP_CKSUM |
1864 					   PKT_TX_OUTER_IPV4)) {
1865 				bd1_bd_flags_bf |=
1866 					ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_MASK <<
1867 					ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_SHIFT;
1868 			}
1869 
1870 			/**
1871 			 * Currently, only inner checksum offload in MPLS-in-UDP
1872 			 * tunnel with one MPLS label is supported. Both outer
1873 			 * and inner layers  lengths need to be provided in
1874 			 * mbuf.
1875 			 */
1876 			if ((tx_ol_flags & PKT_TX_TUNNEL_MASK) ==
1877 						PKT_TX_TUNNEL_MPLSINUDP) {
1878 				mplsoudp_flg = true;
1879 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
1880 				qede_mpls_tunn_tx_sanity_check(mbuf, txq);
1881 #endif
1882 				/* Outer L4 offset in two byte words */
1883 				tunn_l4_hdr_start_offset =
1884 				  (mbuf->outer_l2_len + mbuf->outer_l3_len) / 2;
1885 				/* Tunnel header size in two byte words */
1886 				tunn_hdr_size = (mbuf->outer_l2_len +
1887 						mbuf->outer_l3_len +
1888 						MPLSINUDP_HDR_SIZE) / 2;
1889 				/* Inner L2 header size in two byte words */
1890 				inner_l2_hdr_size = (mbuf->l2_len -
1891 						MPLSINUDP_HDR_SIZE) / 2;
1892 				/* Inner L4 header offset from the beggining
1893 				 * of inner packet in two byte words
1894 				 */
1895 				inner_l4_hdr_offset = (mbuf->l2_len -
1896 					MPLSINUDP_HDR_SIZE + mbuf->l3_len) / 2;
1897 
1898 				/* Inner L2 size and address type */
1899 				bd2_bf1 |= (inner_l2_hdr_size &
1900 					ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_MASK) <<
1901 					ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_SHIFT;
1902 				bd2_bf1 |= (UNICAST_ADDRESS &
1903 					ETH_TX_DATA_2ND_BD_TUNN_INNER_ETH_TYPE_MASK) <<
1904 					ETH_TX_DATA_2ND_BD_TUNN_INNER_ETH_TYPE_SHIFT;
1905 				/* Treated as IPv6+Ext */
1906 				bd2_bf1 |=
1907 				    1 << ETH_TX_DATA_2ND_BD_TUNN_IPV6_EXT_SHIFT;
1908 
1909 				/* Mark inner IPv6 if present */
1910 				if (tx_ol_flags & PKT_TX_IPV6)
1911 					bd2_bf1 |=
1912 						1 << ETH_TX_DATA_2ND_BD_TUNN_INNER_IPV6_SHIFT;
1913 
1914 				/* Inner L4 offsets */
1915 				if ((tx_ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)) &&
1916 				     (tx_ol_flags & (PKT_TX_UDP_CKSUM |
1917 							PKT_TX_TCP_CKSUM))) {
1918 					/* Determines if BD3 is needed */
1919 					tunn_ipv6_ext_flg = true;
1920 					if ((tx_ol_flags & PKT_TX_L4_MASK) ==
1921 							PKT_TX_UDP_CKSUM) {
1922 						bd2_bf1 |=
1923 							1 << ETH_TX_DATA_2ND_BD_L4_UDP_SHIFT;
1924 					}
1925 
1926 					/* TODO other pseudo checksum modes are
1927 					 * not supported
1928 					 */
1929 					bd2_bf1 |=
1930 					ETH_L4_PSEUDO_CSUM_CORRECT_LENGTH <<
1931 					ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE_SHIFT;
1932 					bd2_bf2 |= (inner_l4_hdr_offset &
1933 						ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK) <<
1934 						ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_SHIFT;
1935 				}
1936 			} /* End MPLSoUDP */
1937 		} /* End Tunnel handling */
1938 
1939 		if (tx_ol_flags & PKT_TX_TCP_SEG) {
1940 			lso_flg = true;
1941 			if (unlikely(txq->nb_tx_avail <
1942 						ETH_TX_MIN_BDS_PER_LSO_PKT))
1943 				break;
1944 			/* For LSO, packet header and payload must reside on
1945 			 * buffers pointed by different BDs. Using BD1 for HDR
1946 			 * and BD2 onwards for data.
1947 			 */
1948 			hdr_size = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len;
1949 			if (tunn_flg)
1950 				hdr_size += mbuf->outer_l2_len +
1951 					    mbuf->outer_l3_len;
1952 
1953 			bd1_bd_flags_bf |= 1 << ETH_TX_1ST_BD_FLAGS_LSO_SHIFT;
1954 			bd1_bd_flags_bf |=
1955 					1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT;
1956 			/* PKT_TX_TCP_SEG implies PKT_TX_TCP_CKSUM */
1957 			bd1_bd_flags_bf |=
1958 					1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT;
1959 			mss = rte_cpu_to_le_16(mbuf->tso_segsz);
1960 			/* Using one header BD */
1961 			bd3_bf |= rte_cpu_to_le_16(1 <<
1962 					ETH_TX_DATA_3RD_BD_HDR_NBD_SHIFT);
1963 		} else {
1964 			if (unlikely(txq->nb_tx_avail <
1965 					ETH_TX_MIN_BDS_PER_NON_LSO_PKT))
1966 				break;
1967 			bd1_bf |=
1968 			       (mbuf->pkt_len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK)
1969 				<< ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT;
1970 		}
1971 
1972 		/* Descriptor based VLAN insertion */
1973 		if (tx_ol_flags & (PKT_TX_VLAN_PKT | PKT_TX_QINQ_PKT)) {
1974 			vlan = rte_cpu_to_le_16(mbuf->vlan_tci);
1975 			bd1_bd_flags_bf |=
1976 			    1 << ETH_TX_1ST_BD_FLAGS_VLAN_INSERTION_SHIFT;
1977 		}
1978 
1979 		/* Offload the IP checksum in the hardware */
1980 		if (tx_ol_flags & PKT_TX_IP_CKSUM) {
1981 			bd1_bd_flags_bf |=
1982 				1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT;
1983 			/* There's no DPDK flag to request outer-L4 csum
1984 			 * offload. But in the case of tunnel if inner L3 or L4
1985 			 * csum offload is requested then we need to force
1986 			 * recalculation of L4 tunnel header csum also.
1987 			 */
1988 			if (tunn_flg && ((tx_ol_flags & PKT_TX_TUNNEL_MASK) !=
1989 							PKT_TX_TUNNEL_GRE)) {
1990 				bd1_bd_flags_bf |=
1991 					ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_MASK <<
1992 					ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_SHIFT;
1993 			}
1994 		}
1995 
1996 		/* L4 checksum offload (tcp or udp) */
1997 		if ((tx_ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)) &&
1998 		    (tx_ol_flags & (PKT_TX_UDP_CKSUM | PKT_TX_TCP_CKSUM))) {
1999 			bd1_bd_flags_bf |=
2000 				1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT;
2001 			/* There's no DPDK flag to request outer-L4 csum
2002 			 * offload. But in the case of tunnel if inner L3 or L4
2003 			 * csum offload is requested then we need to force
2004 			 * recalculation of L4 tunnel header csum also.
2005 			 */
2006 			if (tunn_flg) {
2007 				bd1_bd_flags_bf |=
2008 					ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_MASK <<
2009 					ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_SHIFT;
2010 			}
2011 		}
2012 
2013 		/* Fill the entry in the SW ring and the BDs in the FW ring */
2014 		idx = TX_PROD(txq);
2015 		txq->sw_tx_ring[idx].mbuf = mbuf;
2016 
2017 		/* BD1 */
2018 		bd1 = (struct eth_tx_1st_bd *)ecore_chain_produce(&txq->tx_pbl);
2019 		memset(bd1, 0, sizeof(struct eth_tx_1st_bd));
2020 		nbds++;
2021 
2022 		/* Map MBUF linear data for DMA and set in the BD1 */
2023 		QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_iova(mbuf),
2024 				     mbuf->data_len);
2025 		bd1->data.bitfields = rte_cpu_to_le_16(bd1_bf);
2026 		bd1->data.bd_flags.bitfields = bd1_bd_flags_bf;
2027 		bd1->data.vlan = vlan;
2028 
2029 		if (lso_flg || mplsoudp_flg) {
2030 			bd2 = (struct eth_tx_2nd_bd *)ecore_chain_produce
2031 							(&txq->tx_pbl);
2032 			memset(bd2, 0, sizeof(struct eth_tx_2nd_bd));
2033 			nbds++;
2034 
2035 			/* BD1 */
2036 			QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_iova(mbuf),
2037 					     hdr_size);
2038 			/* BD2 */
2039 			QEDE_BD_SET_ADDR_LEN(bd2, (hdr_size +
2040 					     rte_mbuf_data_iova(mbuf)),
2041 					     mbuf->data_len - hdr_size);
2042 			bd2->data.bitfields1 = rte_cpu_to_le_16(bd2_bf1);
2043 			if (mplsoudp_flg) {
2044 				bd2->data.bitfields2 =
2045 					rte_cpu_to_le_16(bd2_bf2);
2046 				/* Outer L3 size */
2047 				bd2->data.tunn_ip_size =
2048 					rte_cpu_to_le_16(mbuf->outer_l3_len);
2049 			}
2050 			/* BD3 */
2051 			if (lso_flg || (mplsoudp_flg && tunn_ipv6_ext_flg)) {
2052 				bd3 = (struct eth_tx_3rd_bd *)
2053 					ecore_chain_produce(&txq->tx_pbl);
2054 				memset(bd3, 0, sizeof(struct eth_tx_3rd_bd));
2055 				nbds++;
2056 				bd3->data.bitfields = rte_cpu_to_le_16(bd3_bf);
2057 				if (lso_flg)
2058 					bd3->data.lso_mss = mss;
2059 				if (mplsoudp_flg) {
2060 					bd3->data.tunn_l4_hdr_start_offset_w =
2061 						tunn_l4_hdr_start_offset;
2062 					bd3->data.tunn_hdr_size_w =
2063 						tunn_hdr_size;
2064 				}
2065 			}
2066 		}
2067 
2068 		/* Handle fragmented MBUF */
2069 		m_seg = mbuf->next;
2070 
2071 		/* Encode scatter gather buffer descriptors if required */
2072 		nb_frags = qede_encode_sg_bd(txq, m_seg, &bd2, &bd3, nbds - 1);
2073 		bd1->data.nbds = nbds + nb_frags;
2074 
2075 		txq->nb_tx_avail -= bd1->data.nbds;
2076 		txq->sw_tx_prod++;
2077 		rte_prefetch0(txq->sw_tx_ring[TX_PROD(txq)].mbuf);
2078 		bd_prod =
2079 		    rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl));
2080 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
2081 		print_tx_bd_info(txq, bd1, bd2, bd3, tx_ol_flags);
2082 #endif
2083 		nb_pkt_sent++;
2084 		txq->xmit_pkts++;
2085 	}
2086 
2087 	/* Write value of prod idx into bd_prod */
2088 	txq->tx_db.data.bd_prod = bd_prod;
2089 	rte_wmb();
2090 	rte_compiler_barrier();
2091 	DIRECT_REG_WR_RELAXED(edev, txq->doorbell_addr, txq->tx_db.raw);
2092 	rte_wmb();
2093 
2094 	/* Check again for Tx completions */
2095 	qede_process_tx_compl(edev, txq);
2096 
2097 	PMD_TX_LOG(DEBUG, txq, "to_send=%u sent=%u bd_prod=%u core=%d",
2098 		   nb_pkts, nb_pkt_sent, TX_PROD(txq), rte_lcore_id());
2099 
2100 	return nb_pkt_sent;
2101 }
2102 
2103 uint16_t
2104 qede_rxtx_pkts_dummy(__rte_unused void *p_rxq,
2105 		     __rte_unused struct rte_mbuf **pkts,
2106 		     __rte_unused uint16_t nb_pkts)
2107 {
2108 	return 0;
2109 }
2110