xref: /dpdk/drivers/net/qede/qede_rxtx.c (revision 3cc6ecfdfe85d2577fef30e1791bb7534e3d60b3)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2016 - 2018 Cavium Inc.
3  * All rights reserved.
4  * www.cavium.com
5  */
6 
7 #include <rte_net.h>
8 #include "qede_rxtx.h"
9 
10 static inline int qede_alloc_rx_buffer(struct qede_rx_queue *rxq)
11 {
12 	struct rte_mbuf *new_mb = NULL;
13 	struct eth_rx_bd *rx_bd;
14 	dma_addr_t mapping;
15 	uint16_t idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq);
16 
17 	new_mb = rte_mbuf_raw_alloc(rxq->mb_pool);
18 	if (unlikely(!new_mb)) {
19 		PMD_RX_LOG(ERR, rxq,
20 			   "Failed to allocate rx buffer "
21 			   "sw_rx_prod %u sw_rx_cons %u mp entries %u free %u",
22 			   idx, rxq->sw_rx_cons & NUM_RX_BDS(rxq),
23 			   rte_mempool_avail_count(rxq->mb_pool),
24 			   rte_mempool_in_use_count(rxq->mb_pool));
25 		return -ENOMEM;
26 	}
27 	rxq->sw_rx_ring[idx].mbuf = new_mb;
28 	rxq->sw_rx_ring[idx].page_offset = 0;
29 	mapping = rte_mbuf_data_iova_default(new_mb);
30 	/* Advance PROD and get BD pointer */
31 	rx_bd = (struct eth_rx_bd *)ecore_chain_produce(&rxq->rx_bd_ring);
32 	rx_bd->addr.hi = rte_cpu_to_le_32(U64_HI(mapping));
33 	rx_bd->addr.lo = rte_cpu_to_le_32(U64_LO(mapping));
34 	rxq->sw_rx_prod++;
35 	return 0;
36 }
37 
38 #define QEDE_MAX_BULK_ALLOC_COUNT 512
39 
40 static inline int qede_alloc_rx_bulk_mbufs(struct qede_rx_queue *rxq, int count)
41 {
42 	void *obj_p[QEDE_MAX_BULK_ALLOC_COUNT] __rte_cache_aligned;
43 	struct rte_mbuf *mbuf = NULL;
44 	struct eth_rx_bd *rx_bd;
45 	dma_addr_t mapping;
46 	int i, ret = 0;
47 	uint16_t idx;
48 
49 	if (count > QEDE_MAX_BULK_ALLOC_COUNT)
50 		count = QEDE_MAX_BULK_ALLOC_COUNT;
51 
52 	ret = rte_mempool_get_bulk(rxq->mb_pool, obj_p, count);
53 	if (unlikely(ret)) {
54 		PMD_RX_LOG(ERR, rxq,
55 			   "Failed to allocate %d rx buffers "
56 			    "sw_rx_prod %u sw_rx_cons %u mp entries %u free %u",
57 			    count,
58 			    rxq->sw_rx_prod & NUM_RX_BDS(rxq),
59 			    rxq->sw_rx_cons & NUM_RX_BDS(rxq),
60 			    rte_mempool_avail_count(rxq->mb_pool),
61 			    rte_mempool_in_use_count(rxq->mb_pool));
62 		return -ENOMEM;
63 	}
64 
65 	for (i = 0; i < count; i++) {
66 		mbuf = obj_p[i];
67 		if (likely(i < count - 1))
68 			rte_prefetch0(obj_p[i + 1]);
69 
70 		idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq);
71 		rxq->sw_rx_ring[idx].mbuf = mbuf;
72 		rxq->sw_rx_ring[idx].page_offset = 0;
73 		mapping = rte_mbuf_data_iova_default(mbuf);
74 		rx_bd = (struct eth_rx_bd *)
75 			ecore_chain_produce(&rxq->rx_bd_ring);
76 		rx_bd->addr.hi = rte_cpu_to_le_32(U64_HI(mapping));
77 		rx_bd->addr.lo = rte_cpu_to_le_32(U64_LO(mapping));
78 		rxq->sw_rx_prod++;
79 	}
80 
81 	return 0;
82 }
83 
84 /* Criterias for calculating Rx buffer size -
85  * 1) rx_buf_size should not exceed the size of mbuf
86  * 2) In scattered_rx mode - minimum rx_buf_size should be
87  *    (MTU + Maximum L2 Header Size + 2) / ETH_RX_MAX_BUFF_PER_PKT
88  * 3) In regular mode - minimum rx_buf_size should be
89  *    (MTU + Maximum L2 Header Size + 2)
90  *    In above cases +2 corrosponds to 2 bytes padding in front of L2
91  *    header.
92  * 4) rx_buf_size should be cacheline-size aligned. So considering
93  *    criteria 1, we need to adjust the size to floor instead of ceil,
94  *    so that we don't exceed mbuf size while ceiling rx_buf_size.
95  */
96 int
97 qede_calc_rx_buf_size(struct rte_eth_dev *dev, uint16_t mbufsz,
98 		      uint16_t max_frame_size)
99 {
100 	struct qede_dev *qdev = QEDE_INIT_QDEV(dev);
101 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
102 	int rx_buf_size;
103 
104 	if (dev->data->scattered_rx) {
105 		/* per HW limitation, only ETH_RX_MAX_BUFF_PER_PKT number of
106 		 * bufferes can be used for single packet. So need to make sure
107 		 * mbuf size is sufficient enough for this.
108 		 */
109 		if ((mbufsz * ETH_RX_MAX_BUFF_PER_PKT) <
110 		     (max_frame_size + QEDE_ETH_OVERHEAD)) {
111 			DP_ERR(edev, "mbuf %d size is not enough to hold max fragments (%d) for max rx packet length (%d)\n",
112 			       mbufsz, ETH_RX_MAX_BUFF_PER_PKT, max_frame_size);
113 			return -EINVAL;
114 		}
115 
116 		rx_buf_size = RTE_MAX(mbufsz,
117 				      (max_frame_size + QEDE_ETH_OVERHEAD) /
118 				       ETH_RX_MAX_BUFF_PER_PKT);
119 	} else {
120 		rx_buf_size = max_frame_size + QEDE_ETH_OVERHEAD;
121 	}
122 
123 	/* Align to cache-line size if needed */
124 	return QEDE_FLOOR_TO_CACHE_LINE_SIZE(rx_buf_size);
125 }
126 
127 static struct qede_rx_queue *
128 qede_alloc_rx_queue_mem(struct rte_eth_dev *dev,
129 			uint16_t queue_idx,
130 			uint16_t nb_desc,
131 			unsigned int socket_id,
132 			struct rte_mempool *mp,
133 			uint16_t bufsz)
134 {
135 	struct qede_dev *qdev = QEDE_INIT_QDEV(dev);
136 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
137 	struct qede_rx_queue *rxq;
138 	size_t size;
139 	int rc;
140 
141 	/* First allocate the rx queue data structure */
142 	rxq = rte_zmalloc_socket("qede_rx_queue", sizeof(struct qede_rx_queue),
143 				 RTE_CACHE_LINE_SIZE, socket_id);
144 
145 	if (!rxq) {
146 		DP_ERR(edev, "Unable to allocate memory for rxq on socket %u",
147 			  socket_id);
148 		return NULL;
149 	}
150 
151 	rxq->qdev = qdev;
152 	rxq->mb_pool = mp;
153 	rxq->nb_rx_desc = nb_desc;
154 	rxq->queue_id = queue_idx;
155 	rxq->port_id = dev->data->port_id;
156 
157 
158 	rxq->rx_buf_size = bufsz;
159 
160 	DP_INFO(edev, "mtu %u mbufsz %u bd_max_bytes %u scatter_mode %d\n",
161 		qdev->mtu, bufsz, rxq->rx_buf_size, dev->data->scattered_rx);
162 
163 	/* Allocate the parallel driver ring for Rx buffers */
164 	size = sizeof(*rxq->sw_rx_ring) * rxq->nb_rx_desc;
165 	rxq->sw_rx_ring = rte_zmalloc_socket("sw_rx_ring", size,
166 					     RTE_CACHE_LINE_SIZE, socket_id);
167 	if (!rxq->sw_rx_ring) {
168 		DP_ERR(edev, "Memory allocation fails for sw_rx_ring on"
169 		       " socket %u\n", socket_id);
170 		rte_free(rxq);
171 		return NULL;
172 	}
173 
174 	/* Allocate FW Rx ring  */
175 	rc = qdev->ops->common->chain_alloc(edev,
176 					    ECORE_CHAIN_USE_TO_CONSUME_PRODUCE,
177 					    ECORE_CHAIN_MODE_NEXT_PTR,
178 					    ECORE_CHAIN_CNT_TYPE_U16,
179 					    rxq->nb_rx_desc,
180 					    sizeof(struct eth_rx_bd),
181 					    &rxq->rx_bd_ring,
182 					    NULL);
183 
184 	if (rc != ECORE_SUCCESS) {
185 		DP_ERR(edev, "Memory allocation fails for RX BD ring"
186 		       " on socket %u\n", socket_id);
187 		rte_free(rxq->sw_rx_ring);
188 		rte_free(rxq);
189 		return NULL;
190 	}
191 
192 	/* Allocate FW completion ring */
193 	rc = qdev->ops->common->chain_alloc(edev,
194 					    ECORE_CHAIN_USE_TO_CONSUME,
195 					    ECORE_CHAIN_MODE_PBL,
196 					    ECORE_CHAIN_CNT_TYPE_U16,
197 					    rxq->nb_rx_desc,
198 					    sizeof(union eth_rx_cqe),
199 					    &rxq->rx_comp_ring,
200 					    NULL);
201 
202 	if (rc != ECORE_SUCCESS) {
203 		DP_ERR(edev, "Memory allocation fails for RX CQE ring"
204 		       " on socket %u\n", socket_id);
205 		qdev->ops->common->chain_free(edev, &rxq->rx_bd_ring);
206 		rte_free(rxq->sw_rx_ring);
207 		rte_free(rxq);
208 		return NULL;
209 	}
210 
211 	return rxq;
212 }
213 
214 int
215 qede_rx_queue_setup(struct rte_eth_dev *dev, uint16_t qid,
216 		    uint16_t nb_desc, unsigned int socket_id,
217 		    __rte_unused const struct rte_eth_rxconf *rx_conf,
218 		    struct rte_mempool *mp)
219 {
220 	struct qede_dev *qdev = QEDE_INIT_QDEV(dev);
221 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
222 	struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
223 	struct qede_rx_queue *rxq;
224 	uint16_t max_rx_pkt_len;
225 	uint16_t bufsz;
226 	int rc;
227 
228 	PMD_INIT_FUNC_TRACE(edev);
229 
230 	/* Note: Ring size/align is controlled by struct rte_eth_desc_lim */
231 	if (!rte_is_power_of_2(nb_desc)) {
232 		DP_ERR(edev, "Ring size %u is not power of 2\n",
233 			  nb_desc);
234 		return -EINVAL;
235 	}
236 
237 	/* Free memory prior to re-allocation if needed... */
238 	if (dev->data->rx_queues[qid] != NULL) {
239 		qede_rx_queue_release(dev->data->rx_queues[qid]);
240 		dev->data->rx_queues[qid] = NULL;
241 	}
242 
243 	max_rx_pkt_len = (uint16_t)rxmode->max_rx_pkt_len;
244 
245 	/* Fix up RX buffer size */
246 	bufsz = (uint16_t)rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
247 	/* cache align the mbuf size to simplfy rx_buf_size calculation */
248 	bufsz = QEDE_FLOOR_TO_CACHE_LINE_SIZE(bufsz);
249 	if ((rxmode->offloads & DEV_RX_OFFLOAD_SCATTER)	||
250 	    (max_rx_pkt_len + QEDE_ETH_OVERHEAD) > bufsz) {
251 		if (!dev->data->scattered_rx) {
252 			DP_INFO(edev, "Forcing scatter-gather mode\n");
253 			dev->data->scattered_rx = 1;
254 		}
255 	}
256 
257 	rc = qede_calc_rx_buf_size(dev, bufsz, max_rx_pkt_len);
258 	if (rc < 0)
259 		return rc;
260 
261 	bufsz = rc;
262 
263 	if (ECORE_IS_CMT(edev)) {
264 		rxq = qede_alloc_rx_queue_mem(dev, qid * 2, nb_desc,
265 					      socket_id, mp, bufsz);
266 		if (!rxq)
267 			return -ENOMEM;
268 
269 		qdev->fp_array[qid * 2].rxq = rxq;
270 		rxq = qede_alloc_rx_queue_mem(dev, qid * 2 + 1, nb_desc,
271 					      socket_id, mp, bufsz);
272 		if (!rxq)
273 			return -ENOMEM;
274 
275 		qdev->fp_array[qid * 2 + 1].rxq = rxq;
276 		/* provide per engine fp struct as rx queue */
277 		dev->data->rx_queues[qid] = &qdev->fp_array_cmt[qid];
278 	} else {
279 		rxq = qede_alloc_rx_queue_mem(dev, qid, nb_desc,
280 					      socket_id, mp, bufsz);
281 		if (!rxq)
282 			return -ENOMEM;
283 
284 		dev->data->rx_queues[qid] = rxq;
285 		qdev->fp_array[qid].rxq = rxq;
286 	}
287 
288 	DP_INFO(edev, "rxq %d num_desc %u rx_buf_size=%u socket %u\n",
289 		  qid, nb_desc, rxq->rx_buf_size, socket_id);
290 
291 	return 0;
292 }
293 
294 static void
295 qede_rx_queue_reset(__rte_unused struct qede_dev *qdev,
296 		    struct qede_rx_queue *rxq)
297 {
298 	DP_INFO(&qdev->edev, "Reset RX queue %u\n", rxq->queue_id);
299 	ecore_chain_reset(&rxq->rx_bd_ring);
300 	ecore_chain_reset(&rxq->rx_comp_ring);
301 	rxq->sw_rx_prod = 0;
302 	rxq->sw_rx_cons = 0;
303 	*rxq->hw_cons_ptr = 0;
304 }
305 
306 static void qede_rx_queue_release_mbufs(struct qede_rx_queue *rxq)
307 {
308 	uint16_t i;
309 
310 	if (rxq->sw_rx_ring) {
311 		for (i = 0; i < rxq->nb_rx_desc; i++) {
312 			if (rxq->sw_rx_ring[i].mbuf) {
313 				rte_pktmbuf_free(rxq->sw_rx_ring[i].mbuf);
314 				rxq->sw_rx_ring[i].mbuf = NULL;
315 			}
316 		}
317 	}
318 }
319 
320 static void _qede_rx_queue_release(struct qede_dev *qdev,
321 				   struct ecore_dev *edev,
322 				   struct qede_rx_queue *rxq)
323 {
324 	qede_rx_queue_release_mbufs(rxq);
325 	qdev->ops->common->chain_free(edev, &rxq->rx_bd_ring);
326 	qdev->ops->common->chain_free(edev, &rxq->rx_comp_ring);
327 	rte_free(rxq->sw_rx_ring);
328 	rte_free(rxq);
329 }
330 
331 void qede_rx_queue_release(void *rx_queue)
332 {
333 	struct qede_rx_queue *rxq = rx_queue;
334 	struct qede_fastpath_cmt *fp_cmt;
335 	struct qede_dev *qdev;
336 	struct ecore_dev *edev;
337 
338 	if (rxq) {
339 		qdev = rxq->qdev;
340 		edev = QEDE_INIT_EDEV(qdev);
341 		PMD_INIT_FUNC_TRACE(edev);
342 		if (ECORE_IS_CMT(edev)) {
343 			fp_cmt = rx_queue;
344 			_qede_rx_queue_release(qdev, edev, fp_cmt->fp0->rxq);
345 			_qede_rx_queue_release(qdev, edev, fp_cmt->fp1->rxq);
346 		} else {
347 			_qede_rx_queue_release(qdev, edev, rxq);
348 		}
349 	}
350 }
351 
352 /* Stops a given RX queue in the HW */
353 static int qede_rx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
354 {
355 	struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
356 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
357 	struct ecore_hwfn *p_hwfn;
358 	struct qede_rx_queue *rxq;
359 	int hwfn_index;
360 	int rc;
361 
362 	if (rx_queue_id < qdev->num_rx_queues) {
363 		rxq = qdev->fp_array[rx_queue_id].rxq;
364 		hwfn_index = rx_queue_id % edev->num_hwfns;
365 		p_hwfn = &edev->hwfns[hwfn_index];
366 		rc = ecore_eth_rx_queue_stop(p_hwfn, rxq->handle,
367 				true, false);
368 		if (rc != ECORE_SUCCESS) {
369 			DP_ERR(edev, "RX queue %u stop fails\n", rx_queue_id);
370 			return -1;
371 		}
372 		qede_rx_queue_release_mbufs(rxq);
373 		qede_rx_queue_reset(qdev, rxq);
374 		eth_dev->data->rx_queue_state[rx_queue_id] =
375 			RTE_ETH_QUEUE_STATE_STOPPED;
376 		DP_INFO(edev, "RX queue %u stopped\n", rx_queue_id);
377 	} else {
378 		DP_ERR(edev, "RX queue %u is not in range\n", rx_queue_id);
379 		rc = -EINVAL;
380 	}
381 
382 	return rc;
383 }
384 
385 static struct qede_tx_queue *
386 qede_alloc_tx_queue_mem(struct rte_eth_dev *dev,
387 			uint16_t queue_idx,
388 			uint16_t nb_desc,
389 			unsigned int socket_id,
390 			const struct rte_eth_txconf *tx_conf)
391 {
392 	struct qede_dev *qdev = dev->data->dev_private;
393 	struct ecore_dev *edev = &qdev->edev;
394 	struct qede_tx_queue *txq;
395 	int rc;
396 
397 	txq = rte_zmalloc_socket("qede_tx_queue", sizeof(struct qede_tx_queue),
398 				 RTE_CACHE_LINE_SIZE, socket_id);
399 
400 	if (txq == NULL) {
401 		DP_ERR(edev,
402 		       "Unable to allocate memory for txq on socket %u",
403 		       socket_id);
404 		return NULL;
405 	}
406 
407 	txq->nb_tx_desc = nb_desc;
408 	txq->qdev = qdev;
409 	txq->port_id = dev->data->port_id;
410 
411 	rc = qdev->ops->common->chain_alloc(edev,
412 					    ECORE_CHAIN_USE_TO_CONSUME_PRODUCE,
413 					    ECORE_CHAIN_MODE_PBL,
414 					    ECORE_CHAIN_CNT_TYPE_U16,
415 					    txq->nb_tx_desc,
416 					    sizeof(union eth_tx_bd_types),
417 					    &txq->tx_pbl,
418 					    NULL);
419 	if (rc != ECORE_SUCCESS) {
420 		DP_ERR(edev,
421 		       "Unable to allocate memory for txbd ring on socket %u",
422 		       socket_id);
423 		qede_tx_queue_release(txq);
424 		return NULL;
425 	}
426 
427 	/* Allocate software ring */
428 	txq->sw_tx_ring = rte_zmalloc_socket("txq->sw_tx_ring",
429 					     (sizeof(struct qede_tx_entry) *
430 					      txq->nb_tx_desc),
431 					     RTE_CACHE_LINE_SIZE, socket_id);
432 
433 	if (!txq->sw_tx_ring) {
434 		DP_ERR(edev,
435 		       "Unable to allocate memory for txbd ring on socket %u",
436 		       socket_id);
437 		qdev->ops->common->chain_free(edev, &txq->tx_pbl);
438 		qede_tx_queue_release(txq);
439 		return NULL;
440 	}
441 
442 	txq->queue_id = queue_idx;
443 
444 	txq->nb_tx_avail = txq->nb_tx_desc;
445 
446 	txq->tx_free_thresh =
447 	    tx_conf->tx_free_thresh ? tx_conf->tx_free_thresh :
448 	    (txq->nb_tx_desc - QEDE_DEFAULT_TX_FREE_THRESH);
449 
450 	DP_INFO(edev,
451 		  "txq %u num_desc %u tx_free_thresh %u socket %u\n",
452 		  queue_idx, nb_desc, txq->tx_free_thresh, socket_id);
453 	return txq;
454 }
455 
456 int
457 qede_tx_queue_setup(struct rte_eth_dev *dev,
458 		    uint16_t queue_idx,
459 		    uint16_t nb_desc,
460 		    unsigned int socket_id,
461 		    const struct rte_eth_txconf *tx_conf)
462 {
463 	struct qede_dev *qdev = dev->data->dev_private;
464 	struct ecore_dev *edev = &qdev->edev;
465 	struct qede_tx_queue *txq;
466 
467 	PMD_INIT_FUNC_TRACE(edev);
468 
469 	if (!rte_is_power_of_2(nb_desc)) {
470 		DP_ERR(edev, "Ring size %u is not power of 2\n",
471 		       nb_desc);
472 		return -EINVAL;
473 	}
474 
475 	/* Free memory prior to re-allocation if needed... */
476 	if (dev->data->tx_queues[queue_idx] != NULL) {
477 		qede_tx_queue_release(dev->data->tx_queues[queue_idx]);
478 		dev->data->tx_queues[queue_idx] = NULL;
479 	}
480 
481 	if (ECORE_IS_CMT(edev)) {
482 		txq = qede_alloc_tx_queue_mem(dev, queue_idx * 2, nb_desc,
483 					      socket_id, tx_conf);
484 		if (!txq)
485 			return -ENOMEM;
486 
487 		qdev->fp_array[queue_idx * 2].txq = txq;
488 		txq = qede_alloc_tx_queue_mem(dev, (queue_idx * 2) + 1, nb_desc,
489 					      socket_id, tx_conf);
490 		if (!txq)
491 			return -ENOMEM;
492 
493 		qdev->fp_array[(queue_idx * 2) + 1].txq = txq;
494 		dev->data->tx_queues[queue_idx] =
495 					&qdev->fp_array_cmt[queue_idx];
496 	} else {
497 		txq = qede_alloc_tx_queue_mem(dev, queue_idx, nb_desc,
498 					      socket_id, tx_conf);
499 		if (!txq)
500 			return -ENOMEM;
501 
502 		dev->data->tx_queues[queue_idx] = txq;
503 		qdev->fp_array[queue_idx].txq = txq;
504 	}
505 
506 	return 0;
507 }
508 
509 static void
510 qede_tx_queue_reset(__rte_unused struct qede_dev *qdev,
511 		    struct qede_tx_queue *txq)
512 {
513 	DP_INFO(&qdev->edev, "Reset TX queue %u\n", txq->queue_id);
514 	ecore_chain_reset(&txq->tx_pbl);
515 	txq->sw_tx_cons = 0;
516 	txq->sw_tx_prod = 0;
517 	*txq->hw_cons_ptr = 0;
518 }
519 
520 static void qede_tx_queue_release_mbufs(struct qede_tx_queue *txq)
521 {
522 	uint16_t i;
523 
524 	if (txq->sw_tx_ring) {
525 		for (i = 0; i < txq->nb_tx_desc; i++) {
526 			if (txq->sw_tx_ring[i].mbuf) {
527 				rte_pktmbuf_free(txq->sw_tx_ring[i].mbuf);
528 				txq->sw_tx_ring[i].mbuf = NULL;
529 			}
530 		}
531 	}
532 }
533 
534 static void _qede_tx_queue_release(struct qede_dev *qdev,
535 				   struct ecore_dev *edev,
536 				   struct qede_tx_queue *txq)
537 {
538 	qede_tx_queue_release_mbufs(txq);
539 	qdev->ops->common->chain_free(edev, &txq->tx_pbl);
540 	rte_free(txq->sw_tx_ring);
541 	rte_free(txq);
542 }
543 
544 void qede_tx_queue_release(void *tx_queue)
545 {
546 	struct qede_tx_queue *txq = tx_queue;
547 	struct qede_fastpath_cmt *fp_cmt;
548 	struct qede_dev *qdev;
549 	struct ecore_dev *edev;
550 
551 	if (txq) {
552 		qdev = txq->qdev;
553 		edev = QEDE_INIT_EDEV(qdev);
554 		PMD_INIT_FUNC_TRACE(edev);
555 
556 		if (ECORE_IS_CMT(edev)) {
557 			fp_cmt = tx_queue;
558 			_qede_tx_queue_release(qdev, edev, fp_cmt->fp0->txq);
559 			_qede_tx_queue_release(qdev, edev, fp_cmt->fp1->txq);
560 		} else {
561 			_qede_tx_queue_release(qdev, edev, txq);
562 		}
563 	}
564 }
565 
566 /* This function allocates fast-path status block memory */
567 static int
568 qede_alloc_mem_sb(struct qede_dev *qdev, struct ecore_sb_info *sb_info,
569 		  uint16_t sb_id)
570 {
571 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
572 	struct status_block *sb_virt;
573 	dma_addr_t sb_phys;
574 	int rc;
575 
576 	sb_virt = OSAL_DMA_ALLOC_COHERENT(edev, &sb_phys,
577 					  sizeof(struct status_block));
578 	if (!sb_virt) {
579 		DP_ERR(edev, "Status block allocation failed\n");
580 		return -ENOMEM;
581 	}
582 	rc = qdev->ops->common->sb_init(edev, sb_info, sb_virt,
583 					sb_phys, sb_id);
584 	if (rc) {
585 		DP_ERR(edev, "Status block initialization failed\n");
586 		OSAL_DMA_FREE_COHERENT(edev, sb_virt, sb_phys,
587 				       sizeof(struct status_block));
588 		return rc;
589 	}
590 
591 	return 0;
592 }
593 
594 int qede_alloc_fp_resc(struct qede_dev *qdev)
595 {
596 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
597 	struct qede_fastpath *fp;
598 	uint32_t num_sbs;
599 	uint16_t sb_idx;
600 	int i;
601 
602 	PMD_INIT_FUNC_TRACE(edev);
603 
604 	if (IS_VF(edev))
605 		ecore_vf_get_num_sbs(ECORE_LEADING_HWFN(edev), &num_sbs);
606 	else
607 		num_sbs = ecore_cxt_get_proto_cid_count
608 			  (ECORE_LEADING_HWFN(edev), PROTOCOLID_ETH, NULL);
609 
610 	if (num_sbs == 0) {
611 		DP_ERR(edev, "No status blocks available\n");
612 		return -EINVAL;
613 	}
614 
615 	qdev->fp_array = rte_calloc("fp", QEDE_RXTX_MAX(qdev),
616 				sizeof(*qdev->fp_array), RTE_CACHE_LINE_SIZE);
617 
618 	if (!qdev->fp_array) {
619 		DP_ERR(edev, "fp array allocation failed\n");
620 		return -ENOMEM;
621 	}
622 
623 	memset((void *)qdev->fp_array, 0, QEDE_RXTX_MAX(qdev) *
624 			sizeof(*qdev->fp_array));
625 
626 	if (ECORE_IS_CMT(edev)) {
627 		qdev->fp_array_cmt = rte_calloc("fp_cmt",
628 						QEDE_RXTX_MAX(qdev) / 2,
629 						sizeof(*qdev->fp_array_cmt),
630 						RTE_CACHE_LINE_SIZE);
631 
632 		if (!qdev->fp_array_cmt) {
633 			DP_ERR(edev, "fp array for CMT allocation failed\n");
634 			return -ENOMEM;
635 		}
636 
637 		memset((void *)qdev->fp_array_cmt, 0,
638 		       (QEDE_RXTX_MAX(qdev) / 2) * sizeof(*qdev->fp_array_cmt));
639 
640 		/* Establish the mapping of fp_array with fp_array_cmt */
641 		for (i = 0; i < QEDE_RXTX_MAX(qdev) / 2; i++) {
642 			qdev->fp_array_cmt[i].qdev = qdev;
643 			qdev->fp_array_cmt[i].fp0 = &qdev->fp_array[i * 2];
644 			qdev->fp_array_cmt[i].fp1 = &qdev->fp_array[i * 2 + 1];
645 		}
646 	}
647 
648 	for (sb_idx = 0; sb_idx < QEDE_RXTX_MAX(qdev); sb_idx++) {
649 		fp = &qdev->fp_array[sb_idx];
650 		fp->sb_info = rte_calloc("sb", 1, sizeof(struct ecore_sb_info),
651 				RTE_CACHE_LINE_SIZE);
652 		if (!fp->sb_info) {
653 			DP_ERR(edev, "FP sb_info allocation fails\n");
654 			return -1;
655 		}
656 		if (qede_alloc_mem_sb(qdev, fp->sb_info, sb_idx)) {
657 			DP_ERR(edev, "FP status block allocation fails\n");
658 			return -1;
659 		}
660 		DP_INFO(edev, "sb_info idx 0x%x initialized\n",
661 				fp->sb_info->igu_sb_id);
662 	}
663 
664 	return 0;
665 }
666 
667 void qede_dealloc_fp_resc(struct rte_eth_dev *eth_dev)
668 {
669 	struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
670 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
671 	struct qede_fastpath *fp;
672 	uint16_t sb_idx;
673 	uint8_t i;
674 
675 	PMD_INIT_FUNC_TRACE(edev);
676 
677 	for (sb_idx = 0; sb_idx < QEDE_RXTX_MAX(qdev); sb_idx++) {
678 		fp = &qdev->fp_array[sb_idx];
679 		DP_INFO(edev, "Free sb_info index 0x%x\n",
680 				fp->sb_info->igu_sb_id);
681 		if (fp->sb_info) {
682 			OSAL_DMA_FREE_COHERENT(edev, fp->sb_info->sb_virt,
683 				fp->sb_info->sb_phys,
684 				sizeof(struct status_block));
685 			rte_free(fp->sb_info);
686 			fp->sb_info = NULL;
687 		}
688 	}
689 
690 	/* Free packet buffers and ring memories */
691 	for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
692 		if (eth_dev->data->rx_queues[i]) {
693 			qede_rx_queue_release(eth_dev->data->rx_queues[i]);
694 			eth_dev->data->rx_queues[i] = NULL;
695 		}
696 	}
697 
698 	for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
699 		if (eth_dev->data->tx_queues[i]) {
700 			qede_tx_queue_release(eth_dev->data->tx_queues[i]);
701 			eth_dev->data->tx_queues[i] = NULL;
702 		}
703 	}
704 
705 	if (qdev->fp_array)
706 		rte_free(qdev->fp_array);
707 	qdev->fp_array = NULL;
708 
709 	if (qdev->fp_array_cmt)
710 		rte_free(qdev->fp_array_cmt);
711 	qdev->fp_array_cmt = NULL;
712 }
713 
714 static inline void
715 qede_update_rx_prod(__rte_unused struct qede_dev *edev,
716 		    struct qede_rx_queue *rxq)
717 {
718 	uint16_t bd_prod = ecore_chain_get_prod_idx(&rxq->rx_bd_ring);
719 	uint16_t cqe_prod = ecore_chain_get_prod_idx(&rxq->rx_comp_ring);
720 	struct eth_rx_prod_data rx_prods = { 0 };
721 
722 	/* Update producers */
723 	rx_prods.bd_prod = rte_cpu_to_le_16(bd_prod);
724 	rx_prods.cqe_prod = rte_cpu_to_le_16(cqe_prod);
725 
726 	/* Make sure that the BD and SGE data is updated before updating the
727 	 * producers since FW might read the BD/SGE right after the producer
728 	 * is updated.
729 	 */
730 	rte_wmb();
731 
732 	internal_ram_wr(rxq->hw_rxq_prod_addr, sizeof(rx_prods),
733 			(uint32_t *)&rx_prods);
734 
735 	/* mmiowb is needed to synchronize doorbell writes from more than one
736 	 * processor. It guarantees that the write arrives to the device before
737 	 * the napi lock is released and another qede_poll is called (possibly
738 	 * on another CPU). Without this barrier, the next doorbell can bypass
739 	 * this doorbell. This is applicable to IA64/Altix systems.
740 	 */
741 	rte_wmb();
742 
743 	PMD_RX_LOG(DEBUG, rxq, "bd_prod %u  cqe_prod %u", bd_prod, cqe_prod);
744 }
745 
746 /* Starts a given RX queue in HW */
747 static int
748 qede_rx_queue_start(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
749 {
750 	struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
751 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
752 	struct ecore_queue_start_common_params params;
753 	struct ecore_rxq_start_ret_params ret_params;
754 	struct qede_rx_queue *rxq;
755 	struct qede_fastpath *fp;
756 	struct ecore_hwfn *p_hwfn;
757 	dma_addr_t p_phys_table;
758 	uint16_t page_cnt;
759 	uint16_t j;
760 	int hwfn_index;
761 	int rc;
762 
763 	if (rx_queue_id < qdev->num_rx_queues) {
764 		fp = &qdev->fp_array[rx_queue_id];
765 		rxq = fp->rxq;
766 		/* Allocate buffers for the Rx ring */
767 		for (j = 0; j < rxq->nb_rx_desc; j++) {
768 			rc = qede_alloc_rx_buffer(rxq);
769 			if (rc) {
770 				DP_ERR(edev, "RX buffer allocation failed"
771 						" for rxq = %u\n", rx_queue_id);
772 				return -ENOMEM;
773 			}
774 		}
775 		/* disable interrupts */
776 		ecore_sb_ack(fp->sb_info, IGU_INT_DISABLE, 0);
777 		/* Prepare ramrod */
778 		memset(&params, 0, sizeof(params));
779 		params.queue_id = rx_queue_id / edev->num_hwfns;
780 		params.vport_id = 0;
781 		params.stats_id = params.vport_id;
782 		params.p_sb = fp->sb_info;
783 		DP_INFO(edev, "rxq %u igu_sb_id 0x%x\n",
784 				fp->rxq->queue_id, fp->sb_info->igu_sb_id);
785 		params.sb_idx = RX_PI;
786 		hwfn_index = rx_queue_id % edev->num_hwfns;
787 		p_hwfn = &edev->hwfns[hwfn_index];
788 		p_phys_table = ecore_chain_get_pbl_phys(&fp->rxq->rx_comp_ring);
789 		page_cnt = ecore_chain_get_page_cnt(&fp->rxq->rx_comp_ring);
790 		memset(&ret_params, 0, sizeof(ret_params));
791 		rc = ecore_eth_rx_queue_start(p_hwfn,
792 				p_hwfn->hw_info.opaque_fid,
793 				&params, fp->rxq->rx_buf_size,
794 				fp->rxq->rx_bd_ring.p_phys_addr,
795 				p_phys_table, page_cnt,
796 				&ret_params);
797 		if (rc) {
798 			DP_ERR(edev, "RX queue %u could not be started, rc = %d\n",
799 					rx_queue_id, rc);
800 			return -1;
801 		}
802 		/* Update with the returned parameters */
803 		fp->rxq->hw_rxq_prod_addr = ret_params.p_prod;
804 		fp->rxq->handle = ret_params.p_handle;
805 
806 		fp->rxq->hw_cons_ptr = &fp->sb_info->sb_pi_array[RX_PI];
807 		qede_update_rx_prod(qdev, fp->rxq);
808 		eth_dev->data->rx_queue_state[rx_queue_id] =
809 			RTE_ETH_QUEUE_STATE_STARTED;
810 		DP_INFO(edev, "RX queue %u started\n", rx_queue_id);
811 	} else {
812 		DP_ERR(edev, "RX queue %u is not in range\n", rx_queue_id);
813 		rc = -EINVAL;
814 	}
815 
816 	return rc;
817 }
818 
819 static int
820 qede_tx_queue_start(struct rte_eth_dev *eth_dev, uint16_t tx_queue_id)
821 {
822 	struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
823 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
824 	struct ecore_queue_start_common_params params;
825 	struct ecore_txq_start_ret_params ret_params;
826 	struct ecore_hwfn *p_hwfn;
827 	dma_addr_t p_phys_table;
828 	struct qede_tx_queue *txq;
829 	struct qede_fastpath *fp;
830 	uint16_t page_cnt;
831 	int hwfn_index;
832 	int rc;
833 
834 	if (tx_queue_id < qdev->num_tx_queues) {
835 		fp = &qdev->fp_array[tx_queue_id];
836 		txq = fp->txq;
837 		memset(&params, 0, sizeof(params));
838 		params.queue_id = tx_queue_id / edev->num_hwfns;
839 		params.vport_id = 0;
840 		params.stats_id = params.vport_id;
841 		params.p_sb = fp->sb_info;
842 		DP_INFO(edev, "txq %u igu_sb_id 0x%x\n",
843 				fp->txq->queue_id, fp->sb_info->igu_sb_id);
844 		params.sb_idx = TX_PI(0); /* tc = 0 */
845 		p_phys_table = ecore_chain_get_pbl_phys(&txq->tx_pbl);
846 		page_cnt = ecore_chain_get_page_cnt(&txq->tx_pbl);
847 		hwfn_index = tx_queue_id % edev->num_hwfns;
848 		p_hwfn = &edev->hwfns[hwfn_index];
849 		if (qdev->dev_info.is_legacy)
850 			fp->txq->is_legacy = true;
851 		rc = ecore_eth_tx_queue_start(p_hwfn,
852 				p_hwfn->hw_info.opaque_fid,
853 				&params, 0 /* tc */,
854 				p_phys_table, page_cnt,
855 				&ret_params);
856 		if (rc != ECORE_SUCCESS) {
857 			DP_ERR(edev, "TX queue %u couldn't be started, rc=%d\n",
858 					tx_queue_id, rc);
859 			return -1;
860 		}
861 		txq->doorbell_addr = ret_params.p_doorbell;
862 		txq->handle = ret_params.p_handle;
863 
864 		txq->hw_cons_ptr = &fp->sb_info->sb_pi_array[TX_PI(0)];
865 		SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_DEST,
866 				DB_DEST_XCM);
867 		SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_CMD,
868 				DB_AGG_CMD_SET);
869 		SET_FIELD(txq->tx_db.data.params,
870 				ETH_DB_DATA_AGG_VAL_SEL,
871 				DQ_XCM_ETH_TX_BD_PROD_CMD);
872 		txq->tx_db.data.agg_flags = DQ_XCM_ETH_DQ_CF_CMD;
873 		eth_dev->data->tx_queue_state[tx_queue_id] =
874 			RTE_ETH_QUEUE_STATE_STARTED;
875 		DP_INFO(edev, "TX queue %u started\n", tx_queue_id);
876 	} else {
877 		DP_ERR(edev, "TX queue %u is not in range\n", tx_queue_id);
878 		rc = -EINVAL;
879 	}
880 
881 	return rc;
882 }
883 
884 static inline void
885 qede_free_tx_pkt(struct qede_tx_queue *txq)
886 {
887 	struct rte_mbuf *mbuf;
888 	uint16_t nb_segs;
889 	uint16_t idx;
890 
891 	idx = TX_CONS(txq);
892 	mbuf = txq->sw_tx_ring[idx].mbuf;
893 	if (mbuf) {
894 		nb_segs = mbuf->nb_segs;
895 		PMD_TX_LOG(DEBUG, txq, "nb_segs to free %u\n", nb_segs);
896 		while (nb_segs) {
897 			/* It's like consuming rxbuf in recv() */
898 			ecore_chain_consume(&txq->tx_pbl);
899 			txq->nb_tx_avail++;
900 			nb_segs--;
901 		}
902 		rte_pktmbuf_free(mbuf);
903 		txq->sw_tx_ring[idx].mbuf = NULL;
904 		txq->sw_tx_cons++;
905 		PMD_TX_LOG(DEBUG, txq, "Freed tx packet\n");
906 	} else {
907 		ecore_chain_consume(&txq->tx_pbl);
908 		txq->nb_tx_avail++;
909 	}
910 }
911 
912 static inline void
913 qede_process_tx_compl(__rte_unused struct ecore_dev *edev,
914 		      struct qede_tx_queue *txq)
915 {
916 	uint16_t hw_bd_cons;
917 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
918 	uint16_t sw_tx_cons;
919 #endif
920 
921 	rte_compiler_barrier();
922 	hw_bd_cons = rte_le_to_cpu_16(*txq->hw_cons_ptr);
923 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
924 	sw_tx_cons = ecore_chain_get_cons_idx(&txq->tx_pbl);
925 	PMD_TX_LOG(DEBUG, txq, "Tx Completions = %u\n",
926 		   abs(hw_bd_cons - sw_tx_cons));
927 #endif
928 	while (hw_bd_cons !=  ecore_chain_get_cons_idx(&txq->tx_pbl))
929 		qede_free_tx_pkt(txq);
930 }
931 
932 static int qede_drain_txq(struct qede_dev *qdev,
933 			  struct qede_tx_queue *txq, bool allow_drain)
934 {
935 	struct ecore_dev *edev = &qdev->edev;
936 	int rc, cnt = 1000;
937 
938 	while (txq->sw_tx_cons != txq->sw_tx_prod) {
939 		qede_process_tx_compl(edev, txq);
940 		if (!cnt) {
941 			if (allow_drain) {
942 				DP_ERR(edev, "Tx queue[%u] is stuck,"
943 					  "requesting MCP to drain\n",
944 					  txq->queue_id);
945 				rc = qdev->ops->common->drain(edev);
946 				if (rc)
947 					return rc;
948 				return qede_drain_txq(qdev, txq, false);
949 			}
950 			DP_ERR(edev, "Timeout waiting for tx queue[%d]:"
951 				  "PROD=%d, CONS=%d\n",
952 				  txq->queue_id, txq->sw_tx_prod,
953 				  txq->sw_tx_cons);
954 			return -1;
955 		}
956 		cnt--;
957 		DELAY(1000);
958 		rte_compiler_barrier();
959 	}
960 
961 	/* FW finished processing, wait for HW to transmit all tx packets */
962 	DELAY(2000);
963 
964 	return 0;
965 }
966 
967 /* Stops a given TX queue in the HW */
968 static int qede_tx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t tx_queue_id)
969 {
970 	struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
971 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
972 	struct ecore_hwfn *p_hwfn;
973 	struct qede_tx_queue *txq;
974 	int hwfn_index;
975 	int rc;
976 
977 	if (tx_queue_id < qdev->num_tx_queues) {
978 		txq = qdev->fp_array[tx_queue_id].txq;
979 		/* Drain txq */
980 		if (qede_drain_txq(qdev, txq, true))
981 			return -1; /* For the lack of retcodes */
982 		/* Stop txq */
983 		hwfn_index = tx_queue_id % edev->num_hwfns;
984 		p_hwfn = &edev->hwfns[hwfn_index];
985 		rc = ecore_eth_tx_queue_stop(p_hwfn, txq->handle);
986 		if (rc != ECORE_SUCCESS) {
987 			DP_ERR(edev, "TX queue %u stop fails\n", tx_queue_id);
988 			return -1;
989 		}
990 		qede_tx_queue_release_mbufs(txq);
991 		qede_tx_queue_reset(qdev, txq);
992 		eth_dev->data->tx_queue_state[tx_queue_id] =
993 			RTE_ETH_QUEUE_STATE_STOPPED;
994 		DP_INFO(edev, "TX queue %u stopped\n", tx_queue_id);
995 	} else {
996 		DP_ERR(edev, "TX queue %u is not in range\n", tx_queue_id);
997 		rc = -EINVAL;
998 	}
999 
1000 	return rc;
1001 }
1002 
1003 int qede_start_queues(struct rte_eth_dev *eth_dev)
1004 {
1005 	struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
1006 	uint8_t id;
1007 	int rc = -1;
1008 
1009 	for (id = 0; id < qdev->num_rx_queues; id++) {
1010 		rc = qede_rx_queue_start(eth_dev, id);
1011 		if (rc != ECORE_SUCCESS)
1012 			return -1;
1013 	}
1014 
1015 	for (id = 0; id < qdev->num_tx_queues; id++) {
1016 		rc = qede_tx_queue_start(eth_dev, id);
1017 		if (rc != ECORE_SUCCESS)
1018 			return -1;
1019 	}
1020 
1021 	return rc;
1022 }
1023 
1024 void qede_stop_queues(struct rte_eth_dev *eth_dev)
1025 {
1026 	struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
1027 	uint8_t id;
1028 
1029 	/* Stopping RX/TX queues */
1030 	for (id = 0; id < qdev->num_tx_queues; id++)
1031 		qede_tx_queue_stop(eth_dev, id);
1032 
1033 	for (id = 0; id < qdev->num_rx_queues; id++)
1034 		qede_rx_queue_stop(eth_dev, id);
1035 }
1036 
1037 static inline bool qede_tunn_exist(uint16_t flag)
1038 {
1039 	return !!((PARSING_AND_ERR_FLAGS_TUNNELEXIST_MASK <<
1040 		    PARSING_AND_ERR_FLAGS_TUNNELEXIST_SHIFT) & flag);
1041 }
1042 
1043 static inline uint8_t qede_check_tunn_csum_l3(uint16_t flag)
1044 {
1045 	return !!((PARSING_AND_ERR_FLAGS_TUNNELIPHDRERROR_MASK <<
1046 		PARSING_AND_ERR_FLAGS_TUNNELIPHDRERROR_SHIFT) & flag);
1047 }
1048 
1049 /*
1050  * qede_check_tunn_csum_l4:
1051  * Returns:
1052  * 1 : If L4 csum is enabled AND if the validation has failed.
1053  * 0 : Otherwise
1054  */
1055 static inline uint8_t qede_check_tunn_csum_l4(uint16_t flag)
1056 {
1057 	if ((PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_MASK <<
1058 	     PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_SHIFT) & flag)
1059 		return !!((PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_MASK <<
1060 			PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_SHIFT) & flag);
1061 
1062 	return 0;
1063 }
1064 
1065 static inline uint8_t qede_check_notunn_csum_l4(uint16_t flag)
1066 {
1067 	if ((PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK <<
1068 	     PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_SHIFT) & flag)
1069 		return !!((PARSING_AND_ERR_FLAGS_L4CHKSMERROR_MASK <<
1070 			   PARSING_AND_ERR_FLAGS_L4CHKSMERROR_SHIFT) & flag);
1071 
1072 	return 0;
1073 }
1074 
1075 /* Returns outer L2, L3 and L4 packet_type for tunneled packets */
1076 static inline uint32_t qede_rx_cqe_to_pkt_type_outer(struct rte_mbuf *m)
1077 {
1078 	uint32_t packet_type = RTE_PTYPE_UNKNOWN;
1079 	struct rte_ether_hdr *eth_hdr;
1080 	struct rte_ipv4_hdr *ipv4_hdr;
1081 	struct rte_ipv6_hdr *ipv6_hdr;
1082 	struct rte_vlan_hdr *vlan_hdr;
1083 	uint16_t ethertype;
1084 	bool vlan_tagged = 0;
1085 	uint16_t len;
1086 
1087 	eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
1088 	len = sizeof(struct rte_ether_hdr);
1089 	ethertype = rte_cpu_to_be_16(eth_hdr->ether_type);
1090 
1091 	 /* Note: Valid only if VLAN stripping is disabled */
1092 	if (ethertype == RTE_ETHER_TYPE_VLAN) {
1093 		vlan_tagged = 1;
1094 		vlan_hdr = (struct rte_vlan_hdr *)(eth_hdr + 1);
1095 		len += sizeof(struct rte_vlan_hdr);
1096 		ethertype = rte_cpu_to_be_16(vlan_hdr->eth_proto);
1097 	}
1098 
1099 	if (ethertype == RTE_ETHER_TYPE_IPV4) {
1100 		packet_type |= RTE_PTYPE_L3_IPV4;
1101 		ipv4_hdr = rte_pktmbuf_mtod_offset(m,
1102 					struct rte_ipv4_hdr *, len);
1103 		if (ipv4_hdr->next_proto_id == IPPROTO_TCP)
1104 			packet_type |= RTE_PTYPE_L4_TCP;
1105 		else if (ipv4_hdr->next_proto_id == IPPROTO_UDP)
1106 			packet_type |= RTE_PTYPE_L4_UDP;
1107 	} else if (ethertype == RTE_ETHER_TYPE_IPV6) {
1108 		packet_type |= RTE_PTYPE_L3_IPV6;
1109 		ipv6_hdr = rte_pktmbuf_mtod_offset(m,
1110 						struct rte_ipv6_hdr *, len);
1111 		if (ipv6_hdr->proto == IPPROTO_TCP)
1112 			packet_type |= RTE_PTYPE_L4_TCP;
1113 		else if (ipv6_hdr->proto == IPPROTO_UDP)
1114 			packet_type |= RTE_PTYPE_L4_UDP;
1115 	}
1116 
1117 	if (vlan_tagged)
1118 		packet_type |= RTE_PTYPE_L2_ETHER_VLAN;
1119 	else
1120 		packet_type |= RTE_PTYPE_L2_ETHER;
1121 
1122 	return packet_type;
1123 }
1124 
1125 static inline uint32_t qede_rx_cqe_to_pkt_type_inner(uint16_t flags)
1126 {
1127 	uint16_t val;
1128 
1129 	/* Lookup table */
1130 	static const uint32_t
1131 	ptype_lkup_tbl[QEDE_PKT_TYPE_MAX] __rte_cache_aligned = {
1132 		[QEDE_PKT_TYPE_IPV4] = RTE_PTYPE_INNER_L3_IPV4		|
1133 				       RTE_PTYPE_INNER_L2_ETHER,
1134 		[QEDE_PKT_TYPE_IPV6] = RTE_PTYPE_INNER_L3_IPV6		|
1135 				       RTE_PTYPE_INNER_L2_ETHER,
1136 		[QEDE_PKT_TYPE_IPV4_TCP] = RTE_PTYPE_INNER_L3_IPV4	|
1137 					   RTE_PTYPE_INNER_L4_TCP	|
1138 					   RTE_PTYPE_INNER_L2_ETHER,
1139 		[QEDE_PKT_TYPE_IPV6_TCP] = RTE_PTYPE_INNER_L3_IPV6	|
1140 					   RTE_PTYPE_INNER_L4_TCP	|
1141 					   RTE_PTYPE_INNER_L2_ETHER,
1142 		[QEDE_PKT_TYPE_IPV4_UDP] = RTE_PTYPE_INNER_L3_IPV4	|
1143 					   RTE_PTYPE_INNER_L4_UDP	|
1144 					   RTE_PTYPE_INNER_L2_ETHER,
1145 		[QEDE_PKT_TYPE_IPV6_UDP] = RTE_PTYPE_INNER_L3_IPV6	|
1146 					   RTE_PTYPE_INNER_L4_UDP	|
1147 					   RTE_PTYPE_INNER_L2_ETHER,
1148 		/* Frags with no VLAN */
1149 		[QEDE_PKT_TYPE_IPV4_FRAG] = RTE_PTYPE_INNER_L3_IPV4	|
1150 					    RTE_PTYPE_INNER_L4_FRAG	|
1151 					    RTE_PTYPE_INNER_L2_ETHER,
1152 		[QEDE_PKT_TYPE_IPV6_FRAG] = RTE_PTYPE_INNER_L3_IPV6	|
1153 					    RTE_PTYPE_INNER_L4_FRAG	|
1154 					    RTE_PTYPE_INNER_L2_ETHER,
1155 		/* VLANs */
1156 		[QEDE_PKT_TYPE_IPV4_VLAN] = RTE_PTYPE_INNER_L3_IPV4	|
1157 					    RTE_PTYPE_INNER_L2_ETHER_VLAN,
1158 		[QEDE_PKT_TYPE_IPV6_VLAN] = RTE_PTYPE_INNER_L3_IPV6	|
1159 					    RTE_PTYPE_INNER_L2_ETHER_VLAN,
1160 		[QEDE_PKT_TYPE_IPV4_TCP_VLAN] = RTE_PTYPE_INNER_L3_IPV4	|
1161 						RTE_PTYPE_INNER_L4_TCP	|
1162 						RTE_PTYPE_INNER_L2_ETHER_VLAN,
1163 		[QEDE_PKT_TYPE_IPV6_TCP_VLAN] = RTE_PTYPE_INNER_L3_IPV6	|
1164 						RTE_PTYPE_INNER_L4_TCP	|
1165 						RTE_PTYPE_INNER_L2_ETHER_VLAN,
1166 		[QEDE_PKT_TYPE_IPV4_UDP_VLAN] = RTE_PTYPE_INNER_L3_IPV4	|
1167 						RTE_PTYPE_INNER_L4_UDP	|
1168 						RTE_PTYPE_INNER_L2_ETHER_VLAN,
1169 		[QEDE_PKT_TYPE_IPV6_UDP_VLAN] = RTE_PTYPE_INNER_L3_IPV6	|
1170 						RTE_PTYPE_INNER_L4_UDP	|
1171 						RTE_PTYPE_INNER_L2_ETHER_VLAN,
1172 		/* Frags with VLAN */
1173 		[QEDE_PKT_TYPE_IPV4_VLAN_FRAG] = RTE_PTYPE_INNER_L3_IPV4 |
1174 						 RTE_PTYPE_INNER_L4_FRAG |
1175 						 RTE_PTYPE_INNER_L2_ETHER_VLAN,
1176 		[QEDE_PKT_TYPE_IPV6_VLAN_FRAG] = RTE_PTYPE_INNER_L3_IPV6 |
1177 						 RTE_PTYPE_INNER_L4_FRAG |
1178 						 RTE_PTYPE_INNER_L2_ETHER_VLAN,
1179 	};
1180 
1181 	/* Bits (0..3) provides L3/L4 protocol type */
1182 	/* Bits (4,5) provides frag and VLAN info */
1183 	val = ((PARSING_AND_ERR_FLAGS_L3TYPE_MASK <<
1184 	       PARSING_AND_ERR_FLAGS_L3TYPE_SHIFT) |
1185 	       (PARSING_AND_ERR_FLAGS_L4PROTOCOL_MASK <<
1186 		PARSING_AND_ERR_FLAGS_L4PROTOCOL_SHIFT) |
1187 	       (PARSING_AND_ERR_FLAGS_IPV4FRAG_MASK <<
1188 		PARSING_AND_ERR_FLAGS_IPV4FRAG_SHIFT) |
1189 		(PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK <<
1190 		 PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT)) & flags;
1191 
1192 	if (val < QEDE_PKT_TYPE_MAX)
1193 		return ptype_lkup_tbl[val];
1194 
1195 	return RTE_PTYPE_UNKNOWN;
1196 }
1197 
1198 static inline uint32_t qede_rx_cqe_to_pkt_type(uint16_t flags)
1199 {
1200 	uint16_t val;
1201 
1202 	/* Lookup table */
1203 	static const uint32_t
1204 	ptype_lkup_tbl[QEDE_PKT_TYPE_MAX] __rte_cache_aligned = {
1205 		[QEDE_PKT_TYPE_IPV4] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L2_ETHER,
1206 		[QEDE_PKT_TYPE_IPV6] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER,
1207 		[QEDE_PKT_TYPE_IPV4_TCP] = RTE_PTYPE_L3_IPV4	|
1208 					   RTE_PTYPE_L4_TCP	|
1209 					   RTE_PTYPE_L2_ETHER,
1210 		[QEDE_PKT_TYPE_IPV6_TCP] = RTE_PTYPE_L3_IPV6	|
1211 					   RTE_PTYPE_L4_TCP	|
1212 					   RTE_PTYPE_L2_ETHER,
1213 		[QEDE_PKT_TYPE_IPV4_UDP] = RTE_PTYPE_L3_IPV4	|
1214 					   RTE_PTYPE_L4_UDP	|
1215 					   RTE_PTYPE_L2_ETHER,
1216 		[QEDE_PKT_TYPE_IPV6_UDP] = RTE_PTYPE_L3_IPV6	|
1217 					   RTE_PTYPE_L4_UDP	|
1218 					   RTE_PTYPE_L2_ETHER,
1219 		/* Frags with no VLAN */
1220 		[QEDE_PKT_TYPE_IPV4_FRAG] = RTE_PTYPE_L3_IPV4	|
1221 					    RTE_PTYPE_L4_FRAG	|
1222 					    RTE_PTYPE_L2_ETHER,
1223 		[QEDE_PKT_TYPE_IPV6_FRAG] = RTE_PTYPE_L3_IPV6	|
1224 					    RTE_PTYPE_L4_FRAG	|
1225 					    RTE_PTYPE_L2_ETHER,
1226 		/* VLANs */
1227 		[QEDE_PKT_TYPE_IPV4_VLAN] = RTE_PTYPE_L3_IPV4		|
1228 					    RTE_PTYPE_L2_ETHER_VLAN,
1229 		[QEDE_PKT_TYPE_IPV6_VLAN] = RTE_PTYPE_L3_IPV6		|
1230 					    RTE_PTYPE_L2_ETHER_VLAN,
1231 		[QEDE_PKT_TYPE_IPV4_TCP_VLAN] = RTE_PTYPE_L3_IPV4	|
1232 						RTE_PTYPE_L4_TCP	|
1233 						RTE_PTYPE_L2_ETHER_VLAN,
1234 		[QEDE_PKT_TYPE_IPV6_TCP_VLAN] = RTE_PTYPE_L3_IPV6	|
1235 						RTE_PTYPE_L4_TCP	|
1236 						RTE_PTYPE_L2_ETHER_VLAN,
1237 		[QEDE_PKT_TYPE_IPV4_UDP_VLAN] = RTE_PTYPE_L3_IPV4	|
1238 						RTE_PTYPE_L4_UDP	|
1239 						RTE_PTYPE_L2_ETHER_VLAN,
1240 		[QEDE_PKT_TYPE_IPV6_UDP_VLAN] = RTE_PTYPE_L3_IPV6	|
1241 						RTE_PTYPE_L4_UDP	|
1242 						RTE_PTYPE_L2_ETHER_VLAN,
1243 		/* Frags with VLAN */
1244 		[QEDE_PKT_TYPE_IPV4_VLAN_FRAG] = RTE_PTYPE_L3_IPV4	|
1245 						 RTE_PTYPE_L4_FRAG	|
1246 						 RTE_PTYPE_L2_ETHER_VLAN,
1247 		[QEDE_PKT_TYPE_IPV6_VLAN_FRAG] = RTE_PTYPE_L3_IPV6	|
1248 						 RTE_PTYPE_L4_FRAG	|
1249 						 RTE_PTYPE_L2_ETHER_VLAN,
1250 	};
1251 
1252 	/* Bits (0..3) provides L3/L4 protocol type */
1253 	/* Bits (4,5) provides frag and VLAN info */
1254 	val = ((PARSING_AND_ERR_FLAGS_L3TYPE_MASK <<
1255 	       PARSING_AND_ERR_FLAGS_L3TYPE_SHIFT) |
1256 	       (PARSING_AND_ERR_FLAGS_L4PROTOCOL_MASK <<
1257 		PARSING_AND_ERR_FLAGS_L4PROTOCOL_SHIFT) |
1258 	       (PARSING_AND_ERR_FLAGS_IPV4FRAG_MASK <<
1259 		PARSING_AND_ERR_FLAGS_IPV4FRAG_SHIFT) |
1260 		(PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK <<
1261 		 PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT)) & flags;
1262 
1263 	if (val < QEDE_PKT_TYPE_MAX)
1264 		return ptype_lkup_tbl[val];
1265 
1266 	return RTE_PTYPE_UNKNOWN;
1267 }
1268 
1269 static inline uint8_t
1270 qede_check_notunn_csum_l3(struct rte_mbuf *m, uint16_t flag)
1271 {
1272 	struct rte_ipv4_hdr *ip;
1273 	uint16_t pkt_csum;
1274 	uint16_t calc_csum;
1275 	uint16_t val;
1276 
1277 	val = ((PARSING_AND_ERR_FLAGS_IPHDRERROR_MASK <<
1278 		PARSING_AND_ERR_FLAGS_IPHDRERROR_SHIFT) & flag);
1279 
1280 	if (unlikely(val)) {
1281 		m->packet_type = qede_rx_cqe_to_pkt_type(flag);
1282 		if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
1283 			ip = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
1284 					   sizeof(struct rte_ether_hdr));
1285 			pkt_csum = ip->hdr_checksum;
1286 			ip->hdr_checksum = 0;
1287 			calc_csum = rte_ipv4_cksum(ip);
1288 			ip->hdr_checksum = pkt_csum;
1289 			return (calc_csum != pkt_csum);
1290 		} else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
1291 			return 1;
1292 		}
1293 	}
1294 	return 0;
1295 }
1296 
1297 static inline void qede_rx_bd_ring_consume(struct qede_rx_queue *rxq)
1298 {
1299 	ecore_chain_consume(&rxq->rx_bd_ring);
1300 	rxq->sw_rx_cons++;
1301 }
1302 
1303 static inline void
1304 qede_reuse_page(__rte_unused struct qede_dev *qdev,
1305 		struct qede_rx_queue *rxq, struct qede_rx_entry *curr_cons)
1306 {
1307 	struct eth_rx_bd *rx_bd_prod = ecore_chain_produce(&rxq->rx_bd_ring);
1308 	uint16_t idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq);
1309 	struct qede_rx_entry *curr_prod;
1310 	dma_addr_t new_mapping;
1311 
1312 	curr_prod = &rxq->sw_rx_ring[idx];
1313 	*curr_prod = *curr_cons;
1314 
1315 	new_mapping = rte_mbuf_data_iova_default(curr_prod->mbuf) +
1316 		      curr_prod->page_offset;
1317 
1318 	rx_bd_prod->addr.hi = rte_cpu_to_le_32(U64_HI(new_mapping));
1319 	rx_bd_prod->addr.lo = rte_cpu_to_le_32(U64_LO(new_mapping));
1320 
1321 	rxq->sw_rx_prod++;
1322 }
1323 
1324 static inline void
1325 qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq,
1326 			struct qede_dev *qdev, uint8_t count)
1327 {
1328 	struct qede_rx_entry *curr_cons;
1329 
1330 	for (; count > 0; count--) {
1331 		curr_cons = &rxq->sw_rx_ring[rxq->sw_rx_cons & NUM_RX_BDS(rxq)];
1332 		qede_reuse_page(qdev, rxq, curr_cons);
1333 		qede_rx_bd_ring_consume(rxq);
1334 	}
1335 }
1336 
1337 static inline void
1338 qede_rx_process_tpa_cmn_cont_end_cqe(__rte_unused struct qede_dev *qdev,
1339 				     struct qede_rx_queue *rxq,
1340 				     uint8_t agg_index, uint16_t len)
1341 {
1342 	struct qede_agg_info *tpa_info;
1343 	struct rte_mbuf *curr_frag; /* Pointer to currently filled TPA seg */
1344 	uint16_t cons_idx;
1345 
1346 	/* Under certain conditions it is possible that FW may not consume
1347 	 * additional or new BD. So decision to consume the BD must be made
1348 	 * based on len_list[0].
1349 	 */
1350 	if (rte_le_to_cpu_16(len)) {
1351 		tpa_info = &rxq->tpa_info[agg_index];
1352 		cons_idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
1353 		curr_frag = rxq->sw_rx_ring[cons_idx].mbuf;
1354 		assert(curr_frag);
1355 		curr_frag->nb_segs = 1;
1356 		curr_frag->pkt_len = rte_le_to_cpu_16(len);
1357 		curr_frag->data_len = curr_frag->pkt_len;
1358 		tpa_info->tpa_tail->next = curr_frag;
1359 		tpa_info->tpa_tail = curr_frag;
1360 		qede_rx_bd_ring_consume(rxq);
1361 		if (unlikely(qede_alloc_rx_buffer(rxq) != 0)) {
1362 			PMD_RX_LOG(ERR, rxq, "mbuf allocation fails\n");
1363 			rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1364 			rxq->rx_alloc_errors++;
1365 		}
1366 	}
1367 }
1368 
1369 static inline void
1370 qede_rx_process_tpa_cont_cqe(struct qede_dev *qdev,
1371 			     struct qede_rx_queue *rxq,
1372 			     struct eth_fast_path_rx_tpa_cont_cqe *cqe)
1373 {
1374 	PMD_RX_LOG(INFO, rxq, "TPA cont[%d] - len [%d]\n",
1375 		   cqe->tpa_agg_index, rte_le_to_cpu_16(cqe->len_list[0]));
1376 	/* only len_list[0] will have value */
1377 	qede_rx_process_tpa_cmn_cont_end_cqe(qdev, rxq, cqe->tpa_agg_index,
1378 					     cqe->len_list[0]);
1379 }
1380 
1381 static inline void
1382 qede_rx_process_tpa_end_cqe(struct qede_dev *qdev,
1383 			    struct qede_rx_queue *rxq,
1384 			    struct eth_fast_path_rx_tpa_end_cqe *cqe)
1385 {
1386 	struct rte_mbuf *rx_mb; /* Pointer to head of the chained agg */
1387 
1388 	qede_rx_process_tpa_cmn_cont_end_cqe(qdev, rxq, cqe->tpa_agg_index,
1389 					     cqe->len_list[0]);
1390 	/* Update total length and frags based on end TPA */
1391 	rx_mb = rxq->tpa_info[cqe->tpa_agg_index].tpa_head;
1392 	/* TODO:  Add Sanity Checks */
1393 	rx_mb->nb_segs = cqe->num_of_bds;
1394 	rx_mb->pkt_len = cqe->total_packet_len;
1395 
1396 	PMD_RX_LOG(INFO, rxq, "TPA End[%d] reason %d cqe_len %d nb_segs %d"
1397 		   " pkt_len %d\n", cqe->tpa_agg_index, cqe->end_reason,
1398 		   rte_le_to_cpu_16(cqe->len_list[0]), rx_mb->nb_segs,
1399 		   rx_mb->pkt_len);
1400 }
1401 
1402 static inline uint32_t qede_rx_cqe_to_tunn_pkt_type(uint16_t flags)
1403 {
1404 	uint32_t val;
1405 
1406 	/* Lookup table */
1407 	static const uint32_t
1408 	ptype_tunn_lkup_tbl[QEDE_PKT_TYPE_TUNN_MAX_TYPE] __rte_cache_aligned = {
1409 		[QEDE_PKT_TYPE_UNKNOWN] = RTE_PTYPE_UNKNOWN,
1410 		[QEDE_PKT_TYPE_TUNN_GENEVE] = RTE_PTYPE_TUNNEL_GENEVE,
1411 		[QEDE_PKT_TYPE_TUNN_GRE] = RTE_PTYPE_TUNNEL_GRE,
1412 		[QEDE_PKT_TYPE_TUNN_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
1413 		[QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_GENEVE] =
1414 				RTE_PTYPE_TUNNEL_GENEVE,
1415 		[QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_GRE] =
1416 				RTE_PTYPE_TUNNEL_GRE,
1417 		[QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_VXLAN] =
1418 				RTE_PTYPE_TUNNEL_VXLAN,
1419 		[QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_GENEVE] =
1420 				RTE_PTYPE_TUNNEL_GENEVE,
1421 		[QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_GRE] =
1422 				RTE_PTYPE_TUNNEL_GRE,
1423 		[QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_VXLAN] =
1424 				RTE_PTYPE_TUNNEL_VXLAN,
1425 		[QEDE_PKT_TYPE_TUNN_IPV4_TENID_NOEXIST_GENEVE] =
1426 				RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV4,
1427 		[QEDE_PKT_TYPE_TUNN_IPV4_TENID_NOEXIST_GRE] =
1428 				RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV4,
1429 		[QEDE_PKT_TYPE_TUNN_IPV4_TENID_NOEXIST_VXLAN] =
1430 				RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV4,
1431 		[QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_GENEVE] =
1432 				RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV4,
1433 		[QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_GRE] =
1434 				RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV4,
1435 		[QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_VXLAN] =
1436 				RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV4,
1437 		[QEDE_PKT_TYPE_TUNN_IPV6_TENID_NOEXIST_GENEVE] =
1438 				RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV6,
1439 		[QEDE_PKT_TYPE_TUNN_IPV6_TENID_NOEXIST_GRE] =
1440 				RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV6,
1441 		[QEDE_PKT_TYPE_TUNN_IPV6_TENID_NOEXIST_VXLAN] =
1442 				RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV6,
1443 		[QEDE_PKT_TYPE_TUNN_IPV6_TENID_EXIST_GENEVE] =
1444 				RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV6,
1445 		[QEDE_PKT_TYPE_TUNN_IPV6_TENID_EXIST_GRE] =
1446 				RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV6,
1447 		[QEDE_PKT_TYPE_TUNN_IPV6_TENID_EXIST_VXLAN] =
1448 				RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV6,
1449 	};
1450 
1451 	/* Cover bits[4-0] to include tunn_type and next protocol */
1452 	val = ((ETH_TUNNEL_PARSING_FLAGS_TYPE_MASK <<
1453 		ETH_TUNNEL_PARSING_FLAGS_TYPE_SHIFT) |
1454 		(ETH_TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_MASK <<
1455 		ETH_TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_SHIFT)) & flags;
1456 
1457 	if (val < QEDE_PKT_TYPE_TUNN_MAX_TYPE)
1458 		return ptype_tunn_lkup_tbl[val];
1459 	else
1460 		return RTE_PTYPE_UNKNOWN;
1461 }
1462 
1463 static inline int
1464 qede_process_sg_pkts(void *p_rxq,  struct rte_mbuf *rx_mb,
1465 		     uint8_t num_segs, uint16_t pkt_len)
1466 {
1467 	struct qede_rx_queue *rxq = p_rxq;
1468 	struct qede_dev *qdev = rxq->qdev;
1469 	register struct rte_mbuf *seg1 = NULL;
1470 	register struct rte_mbuf *seg2 = NULL;
1471 	uint16_t sw_rx_index;
1472 	uint16_t cur_size;
1473 
1474 	seg1 = rx_mb;
1475 	while (num_segs) {
1476 		cur_size = pkt_len > rxq->rx_buf_size ? rxq->rx_buf_size :
1477 							pkt_len;
1478 		if (unlikely(!cur_size)) {
1479 			PMD_RX_LOG(ERR, rxq, "Length is 0 while %u BDs"
1480 				   " left for mapping jumbo\n", num_segs);
1481 			qede_recycle_rx_bd_ring(rxq, qdev, num_segs);
1482 			return -EINVAL;
1483 		}
1484 		sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
1485 		seg2 = rxq->sw_rx_ring[sw_rx_index].mbuf;
1486 		qede_rx_bd_ring_consume(rxq);
1487 		pkt_len -= cur_size;
1488 		seg2->data_len = cur_size;
1489 		seg1->next = seg2;
1490 		seg1 = seg1->next;
1491 		num_segs--;
1492 		rxq->rx_segs++;
1493 	}
1494 
1495 	return 0;
1496 }
1497 
1498 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX
1499 static inline void
1500 print_rx_bd_info(struct rte_mbuf *m, struct qede_rx_queue *rxq,
1501 		 uint8_t bitfield)
1502 {
1503 	PMD_RX_LOG(INFO, rxq,
1504 		"len 0x%04x bf 0x%04x hash_val 0x%x"
1505 		" ol_flags 0x%04lx l2=%s l3=%s l4=%s tunn=%s"
1506 		" inner_l2=%s inner_l3=%s inner_l4=%s\n",
1507 		m->data_len, bitfield, m->hash.rss,
1508 		(unsigned long)m->ol_flags,
1509 		rte_get_ptype_l2_name(m->packet_type),
1510 		rte_get_ptype_l3_name(m->packet_type),
1511 		rte_get_ptype_l4_name(m->packet_type),
1512 		rte_get_ptype_tunnel_name(m->packet_type),
1513 		rte_get_ptype_inner_l2_name(m->packet_type),
1514 		rte_get_ptype_inner_l3_name(m->packet_type),
1515 		rte_get_ptype_inner_l4_name(m->packet_type));
1516 }
1517 #endif
1518 
1519 uint16_t
1520 qede_recv_pkts_regular(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1521 {
1522 	struct eth_fast_path_rx_reg_cqe *fp_cqe = NULL;
1523 	register struct rte_mbuf *rx_mb = NULL;
1524 	struct qede_rx_queue *rxq = p_rxq;
1525 	struct qede_dev *qdev = rxq->qdev;
1526 	struct ecore_dev *edev = &qdev->edev;
1527 	union eth_rx_cqe *cqe;
1528 	uint64_t ol_flags;
1529 	enum eth_rx_cqe_type cqe_type;
1530 	int rss_enable = qdev->rss_enable;
1531 	int rx_alloc_count = 0;
1532 	uint32_t packet_type;
1533 	uint32_t rss_hash;
1534 	uint16_t vlan_tci, port_id;
1535 	uint16_t hw_comp_cons, sw_comp_cons, sw_rx_index, num_rx_bds;
1536 	uint16_t rx_pkt = 0;
1537 	uint16_t pkt_len = 0;
1538 	uint16_t len; /* Length of first BD */
1539 	uint16_t preload_idx;
1540 	uint16_t parse_flag;
1541 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX
1542 	uint8_t bitfield_val;
1543 #endif
1544 	uint8_t offset, flags, bd_num;
1545 
1546 
1547 	/* Allocate buffers that we used in previous loop */
1548 	if (rxq->rx_alloc_count) {
1549 		if (unlikely(qede_alloc_rx_bulk_mbufs(rxq,
1550 			     rxq->rx_alloc_count))) {
1551 			struct rte_eth_dev *dev;
1552 
1553 			PMD_RX_LOG(ERR, rxq,
1554 				   "New buffer allocation failed,"
1555 				   "dropping incoming packetn");
1556 			dev = &rte_eth_devices[rxq->port_id];
1557 			dev->data->rx_mbuf_alloc_failed +=
1558 							rxq->rx_alloc_count;
1559 			rxq->rx_alloc_errors += rxq->rx_alloc_count;
1560 			return 0;
1561 		}
1562 		qede_update_rx_prod(qdev, rxq);
1563 		rxq->rx_alloc_count = 0;
1564 	}
1565 
1566 	hw_comp_cons = rte_le_to_cpu_16(*rxq->hw_cons_ptr);
1567 	sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring);
1568 
1569 	rte_rmb();
1570 
1571 	if (hw_comp_cons == sw_comp_cons)
1572 		return 0;
1573 
1574 	num_rx_bds =  NUM_RX_BDS(rxq);
1575 	port_id = rxq->port_id;
1576 
1577 	while (sw_comp_cons != hw_comp_cons) {
1578 		ol_flags = 0;
1579 		packet_type = RTE_PTYPE_UNKNOWN;
1580 		vlan_tci = 0;
1581 		rss_hash = 0;
1582 
1583 		/* Get the CQE from the completion ring */
1584 		cqe =
1585 		    (union eth_rx_cqe *)ecore_chain_consume(&rxq->rx_comp_ring);
1586 		cqe_type = cqe->fast_path_regular.type;
1587 		PMD_RX_LOG(INFO, rxq, "Rx CQE type %d\n", cqe_type);
1588 
1589 		if (likely(cqe_type == ETH_RX_CQE_TYPE_REGULAR)) {
1590 			fp_cqe = &cqe->fast_path_regular;
1591 		} else {
1592 			if (cqe_type == ETH_RX_CQE_TYPE_SLOW_PATH) {
1593 				PMD_RX_LOG(INFO, rxq, "Got unexpected slowpath CQE\n");
1594 				ecore_eth_cqe_completion
1595 					(&edev->hwfns[rxq->queue_id %
1596 						      edev->num_hwfns],
1597 					 (struct eth_slow_path_rx_cqe *)cqe);
1598 			}
1599 			goto next_cqe;
1600 		}
1601 
1602 		/* Get the data from the SW ring */
1603 		sw_rx_index = rxq->sw_rx_cons & num_rx_bds;
1604 		rx_mb = rxq->sw_rx_ring[sw_rx_index].mbuf;
1605 		assert(rx_mb != NULL);
1606 
1607 		parse_flag = rte_le_to_cpu_16(fp_cqe->pars_flags.flags);
1608 		offset = fp_cqe->placement_offset;
1609 		len = rte_le_to_cpu_16(fp_cqe->len_on_first_bd);
1610 		pkt_len = rte_le_to_cpu_16(fp_cqe->pkt_len);
1611 		vlan_tci = rte_le_to_cpu_16(fp_cqe->vlan_tag);
1612 		rss_hash = rte_le_to_cpu_32(fp_cqe->rss_hash);
1613 		bd_num = fp_cqe->bd_num;
1614 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX
1615 		bitfield_val = fp_cqe->bitfields;
1616 #endif
1617 
1618 		if (unlikely(qede_tunn_exist(parse_flag))) {
1619 			PMD_RX_LOG(INFO, rxq, "Rx tunneled packet\n");
1620 			if (unlikely(qede_check_tunn_csum_l4(parse_flag))) {
1621 				PMD_RX_LOG(ERR, rxq,
1622 					    "L4 csum failed, flags = 0x%x\n",
1623 					    parse_flag);
1624 				rxq->rx_hw_errors++;
1625 				ol_flags |= PKT_RX_L4_CKSUM_BAD;
1626 			} else {
1627 				ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1628 			}
1629 
1630 			if (unlikely(qede_check_tunn_csum_l3(parse_flag))) {
1631 				PMD_RX_LOG(ERR, rxq,
1632 					"Outer L3 csum failed, flags = 0x%x\n",
1633 					parse_flag);
1634 				rxq->rx_hw_errors++;
1635 				ol_flags |= PKT_RX_EIP_CKSUM_BAD;
1636 			} else {
1637 				ol_flags |= PKT_RX_IP_CKSUM_GOOD;
1638 			}
1639 
1640 			flags = fp_cqe->tunnel_pars_flags.flags;
1641 
1642 			/* Tunnel_type */
1643 			packet_type =
1644 				qede_rx_cqe_to_tunn_pkt_type(flags);
1645 
1646 			/* Inner header */
1647 			packet_type |=
1648 			      qede_rx_cqe_to_pkt_type_inner(parse_flag);
1649 
1650 			/* Outer L3/L4 types is not available in CQE */
1651 			packet_type |= qede_rx_cqe_to_pkt_type_outer(rx_mb);
1652 
1653 			/* Outer L3/L4 types is not available in CQE.
1654 			 * Need to add offset to parse correctly,
1655 			 */
1656 			rx_mb->data_off = offset + RTE_PKTMBUF_HEADROOM;
1657 			packet_type |= qede_rx_cqe_to_pkt_type_outer(rx_mb);
1658 		} else {
1659 			packet_type |= qede_rx_cqe_to_pkt_type(parse_flag);
1660 		}
1661 
1662 		/* Common handling for non-tunnel packets and for inner
1663 		 * headers in the case of tunnel.
1664 		 */
1665 		if (unlikely(qede_check_notunn_csum_l4(parse_flag))) {
1666 			PMD_RX_LOG(ERR, rxq,
1667 				    "L4 csum failed, flags = 0x%x\n",
1668 				    parse_flag);
1669 			rxq->rx_hw_errors++;
1670 			ol_flags |= PKT_RX_L4_CKSUM_BAD;
1671 		} else {
1672 			ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1673 		}
1674 		if (unlikely(qede_check_notunn_csum_l3(rx_mb, parse_flag))) {
1675 			PMD_RX_LOG(ERR, rxq, "IP csum failed, flags = 0x%x\n",
1676 				   parse_flag);
1677 			rxq->rx_hw_errors++;
1678 			ol_flags |= PKT_RX_IP_CKSUM_BAD;
1679 		} else {
1680 			ol_flags |= PKT_RX_IP_CKSUM_GOOD;
1681 		}
1682 
1683 		if (unlikely(CQE_HAS_VLAN(parse_flag) ||
1684 			     CQE_HAS_OUTER_VLAN(parse_flag))) {
1685 			/* Note: FW doesn't indicate Q-in-Q packet */
1686 			ol_flags |= PKT_RX_VLAN;
1687 			if (qdev->vlan_strip_flg) {
1688 				ol_flags |= PKT_RX_VLAN_STRIPPED;
1689 				rx_mb->vlan_tci = vlan_tci;
1690 			}
1691 		}
1692 
1693 		if (rss_enable) {
1694 			ol_flags |= PKT_RX_RSS_HASH;
1695 			rx_mb->hash.rss = rss_hash;
1696 		}
1697 
1698 		rx_alloc_count++;
1699 		qede_rx_bd_ring_consume(rxq);
1700 
1701 		/* Prefetch next mbuf while processing current one. */
1702 		preload_idx = rxq->sw_rx_cons & num_rx_bds;
1703 		rte_prefetch0(rxq->sw_rx_ring[preload_idx].mbuf);
1704 
1705 		/* Update rest of the MBUF fields */
1706 		rx_mb->data_off = offset + RTE_PKTMBUF_HEADROOM;
1707 		rx_mb->port = port_id;
1708 		rx_mb->ol_flags = ol_flags;
1709 		rx_mb->data_len = len;
1710 		rx_mb->packet_type = packet_type;
1711 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX
1712 		print_rx_bd_info(rx_mb, rxq, bitfield_val);
1713 #endif
1714 		rx_mb->nb_segs = bd_num;
1715 		rx_mb->pkt_len = pkt_len;
1716 
1717 		rx_pkts[rx_pkt] = rx_mb;
1718 		rx_pkt++;
1719 
1720 next_cqe:
1721 		ecore_chain_recycle_consumed(&rxq->rx_comp_ring);
1722 		sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring);
1723 		if (rx_pkt == nb_pkts) {
1724 			PMD_RX_LOG(DEBUG, rxq,
1725 				   "Budget reached nb_pkts=%u received=%u",
1726 				   rx_pkt, nb_pkts);
1727 			break;
1728 		}
1729 	}
1730 
1731 	/* Request number of bufferes to be allocated in next loop */
1732 	rxq->rx_alloc_count = rx_alloc_count;
1733 
1734 	rxq->rcv_pkts += rx_pkt;
1735 	rxq->rx_segs += rx_pkt;
1736 	PMD_RX_LOG(DEBUG, rxq, "rx_pkts=%u core=%d", rx_pkt, rte_lcore_id());
1737 
1738 	return rx_pkt;
1739 }
1740 
1741 uint16_t
1742 qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1743 {
1744 	struct qede_rx_queue *rxq = p_rxq;
1745 	struct qede_dev *qdev = rxq->qdev;
1746 	struct ecore_dev *edev = &qdev->edev;
1747 	uint16_t hw_comp_cons, sw_comp_cons, sw_rx_index;
1748 	uint16_t rx_pkt = 0;
1749 	union eth_rx_cqe *cqe;
1750 	struct eth_fast_path_rx_reg_cqe *fp_cqe = NULL;
1751 	register struct rte_mbuf *rx_mb = NULL;
1752 	register struct rte_mbuf *seg1 = NULL;
1753 	enum eth_rx_cqe_type cqe_type;
1754 	uint16_t pkt_len = 0; /* Sum of all BD segments */
1755 	uint16_t len; /* Length of first BD */
1756 	uint8_t num_segs = 1;
1757 	uint16_t preload_idx;
1758 	uint16_t parse_flag;
1759 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX
1760 	uint8_t bitfield_val;
1761 #endif
1762 	uint8_t tunn_parse_flag;
1763 	struct eth_fast_path_rx_tpa_start_cqe *cqe_start_tpa;
1764 	uint64_t ol_flags;
1765 	uint32_t packet_type;
1766 	uint16_t vlan_tci;
1767 	bool tpa_start_flg;
1768 	uint8_t offset, tpa_agg_idx, flags;
1769 	struct qede_agg_info *tpa_info = NULL;
1770 	uint32_t rss_hash;
1771 	int rx_alloc_count = 0;
1772 
1773 
1774 	/* Allocate buffers that we used in previous loop */
1775 	if (rxq->rx_alloc_count) {
1776 		if (unlikely(qede_alloc_rx_bulk_mbufs(rxq,
1777 			     rxq->rx_alloc_count))) {
1778 			struct rte_eth_dev *dev;
1779 
1780 			PMD_RX_LOG(ERR, rxq,
1781 				   "New buffer allocation failed,"
1782 				   "dropping incoming packetn");
1783 			dev = &rte_eth_devices[rxq->port_id];
1784 			dev->data->rx_mbuf_alloc_failed +=
1785 							rxq->rx_alloc_count;
1786 			rxq->rx_alloc_errors += rxq->rx_alloc_count;
1787 			return 0;
1788 		}
1789 		qede_update_rx_prod(qdev, rxq);
1790 		rxq->rx_alloc_count = 0;
1791 	}
1792 
1793 	hw_comp_cons = rte_le_to_cpu_16(*rxq->hw_cons_ptr);
1794 	sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring);
1795 
1796 	rte_rmb();
1797 
1798 	if (hw_comp_cons == sw_comp_cons)
1799 		return 0;
1800 
1801 	while (sw_comp_cons != hw_comp_cons) {
1802 		ol_flags = 0;
1803 		packet_type = RTE_PTYPE_UNKNOWN;
1804 		vlan_tci = 0;
1805 		tpa_start_flg = false;
1806 		rss_hash = 0;
1807 
1808 		/* Get the CQE from the completion ring */
1809 		cqe =
1810 		    (union eth_rx_cqe *)ecore_chain_consume(&rxq->rx_comp_ring);
1811 		cqe_type = cqe->fast_path_regular.type;
1812 		PMD_RX_LOG(INFO, rxq, "Rx CQE type %d\n", cqe_type);
1813 
1814 		switch (cqe_type) {
1815 		case ETH_RX_CQE_TYPE_REGULAR:
1816 			fp_cqe = &cqe->fast_path_regular;
1817 		break;
1818 		case ETH_RX_CQE_TYPE_TPA_START:
1819 			cqe_start_tpa = &cqe->fast_path_tpa_start;
1820 			tpa_info = &rxq->tpa_info[cqe_start_tpa->tpa_agg_index];
1821 			tpa_start_flg = true;
1822 			/* Mark it as LRO packet */
1823 			ol_flags |= PKT_RX_LRO;
1824 			/* In split mode,  seg_len is same as len_on_first_bd
1825 			 * and bw_ext_bd_len_list will be empty since there are
1826 			 * no additional buffers
1827 			 */
1828 			PMD_RX_LOG(INFO, rxq,
1829 			 "TPA start[%d] - len_on_first_bd %d header %d"
1830 			 " [bd_list[0] %d], [seg_len %d]\n",
1831 			 cqe_start_tpa->tpa_agg_index,
1832 			 rte_le_to_cpu_16(cqe_start_tpa->len_on_first_bd),
1833 			 cqe_start_tpa->header_len,
1834 			 rte_le_to_cpu_16(cqe_start_tpa->bw_ext_bd_len_list[0]),
1835 			 rte_le_to_cpu_16(cqe_start_tpa->seg_len));
1836 
1837 		break;
1838 		case ETH_RX_CQE_TYPE_TPA_CONT:
1839 			qede_rx_process_tpa_cont_cqe(qdev, rxq,
1840 						     &cqe->fast_path_tpa_cont);
1841 			goto next_cqe;
1842 		case ETH_RX_CQE_TYPE_TPA_END:
1843 			qede_rx_process_tpa_end_cqe(qdev, rxq,
1844 						    &cqe->fast_path_tpa_end);
1845 			tpa_agg_idx = cqe->fast_path_tpa_end.tpa_agg_index;
1846 			tpa_info = &rxq->tpa_info[tpa_agg_idx];
1847 			rx_mb = rxq->tpa_info[tpa_agg_idx].tpa_head;
1848 			goto tpa_end;
1849 		case ETH_RX_CQE_TYPE_SLOW_PATH:
1850 			PMD_RX_LOG(INFO, rxq, "Got unexpected slowpath CQE\n");
1851 			ecore_eth_cqe_completion(
1852 				&edev->hwfns[rxq->queue_id % edev->num_hwfns],
1853 				(struct eth_slow_path_rx_cqe *)cqe);
1854 			/* fall-thru */
1855 		default:
1856 			goto next_cqe;
1857 		}
1858 
1859 		/* Get the data from the SW ring */
1860 		sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
1861 		rx_mb = rxq->sw_rx_ring[sw_rx_index].mbuf;
1862 		assert(rx_mb != NULL);
1863 
1864 		/* Handle regular CQE or TPA start CQE */
1865 		if (!tpa_start_flg) {
1866 			parse_flag = rte_le_to_cpu_16(fp_cqe->pars_flags.flags);
1867 			offset = fp_cqe->placement_offset;
1868 			len = rte_le_to_cpu_16(fp_cqe->len_on_first_bd);
1869 			pkt_len = rte_le_to_cpu_16(fp_cqe->pkt_len);
1870 			vlan_tci = rte_le_to_cpu_16(fp_cqe->vlan_tag);
1871 			rss_hash = rte_le_to_cpu_32(fp_cqe->rss_hash);
1872 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX
1873 			bitfield_val = fp_cqe->bitfields;
1874 #endif
1875 		} else {
1876 			parse_flag =
1877 			    rte_le_to_cpu_16(cqe_start_tpa->pars_flags.flags);
1878 			offset = cqe_start_tpa->placement_offset;
1879 			/* seg_len = len_on_first_bd */
1880 			len = rte_le_to_cpu_16(cqe_start_tpa->len_on_first_bd);
1881 			vlan_tci = rte_le_to_cpu_16(cqe_start_tpa->vlan_tag);
1882 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX
1883 			bitfield_val = cqe_start_tpa->bitfields;
1884 #endif
1885 			rss_hash = rte_le_to_cpu_32(cqe_start_tpa->rss_hash);
1886 		}
1887 		if (qede_tunn_exist(parse_flag)) {
1888 			PMD_RX_LOG(INFO, rxq, "Rx tunneled packet\n");
1889 			if (unlikely(qede_check_tunn_csum_l4(parse_flag))) {
1890 				PMD_RX_LOG(ERR, rxq,
1891 					    "L4 csum failed, flags = 0x%x\n",
1892 					    parse_flag);
1893 				rxq->rx_hw_errors++;
1894 				ol_flags |= PKT_RX_L4_CKSUM_BAD;
1895 			} else {
1896 				ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1897 			}
1898 
1899 			if (unlikely(qede_check_tunn_csum_l3(parse_flag))) {
1900 				PMD_RX_LOG(ERR, rxq,
1901 					"Outer L3 csum failed, flags = 0x%x\n",
1902 					parse_flag);
1903 				  rxq->rx_hw_errors++;
1904 				  ol_flags |= PKT_RX_EIP_CKSUM_BAD;
1905 			} else {
1906 				  ol_flags |= PKT_RX_IP_CKSUM_GOOD;
1907 			}
1908 
1909 			if (tpa_start_flg)
1910 				flags = cqe_start_tpa->tunnel_pars_flags.flags;
1911 			else
1912 				flags = fp_cqe->tunnel_pars_flags.flags;
1913 			tunn_parse_flag = flags;
1914 
1915 			/* Tunnel_type */
1916 			packet_type =
1917 				qede_rx_cqe_to_tunn_pkt_type(tunn_parse_flag);
1918 
1919 			/* Inner header */
1920 			packet_type |=
1921 			      qede_rx_cqe_to_pkt_type_inner(parse_flag);
1922 
1923 			/* Outer L3/L4 types is not available in CQE */
1924 			packet_type |= qede_rx_cqe_to_pkt_type_outer(rx_mb);
1925 
1926 			/* Outer L3/L4 types is not available in CQE.
1927 			 * Need to add offset to parse correctly,
1928 			 */
1929 			rx_mb->data_off = offset + RTE_PKTMBUF_HEADROOM;
1930 			packet_type |= qede_rx_cqe_to_pkt_type_outer(rx_mb);
1931 		} else {
1932 			packet_type |= qede_rx_cqe_to_pkt_type(parse_flag);
1933 		}
1934 
1935 		/* Common handling for non-tunnel packets and for inner
1936 		 * headers in the case of tunnel.
1937 		 */
1938 		if (unlikely(qede_check_notunn_csum_l4(parse_flag))) {
1939 			PMD_RX_LOG(ERR, rxq,
1940 				    "L4 csum failed, flags = 0x%x\n",
1941 				    parse_flag);
1942 			rxq->rx_hw_errors++;
1943 			ol_flags |= PKT_RX_L4_CKSUM_BAD;
1944 		} else {
1945 			ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1946 		}
1947 		if (unlikely(qede_check_notunn_csum_l3(rx_mb, parse_flag))) {
1948 			PMD_RX_LOG(ERR, rxq, "IP csum failed, flags = 0x%x\n",
1949 				   parse_flag);
1950 			rxq->rx_hw_errors++;
1951 			ol_flags |= PKT_RX_IP_CKSUM_BAD;
1952 		} else {
1953 			ol_flags |= PKT_RX_IP_CKSUM_GOOD;
1954 		}
1955 
1956 		if (CQE_HAS_VLAN(parse_flag) ||
1957 		    CQE_HAS_OUTER_VLAN(parse_flag)) {
1958 			/* Note: FW doesn't indicate Q-in-Q packet */
1959 			ol_flags |= PKT_RX_VLAN;
1960 			if (qdev->vlan_strip_flg) {
1961 				ol_flags |= PKT_RX_VLAN_STRIPPED;
1962 				rx_mb->vlan_tci = vlan_tci;
1963 			}
1964 		}
1965 
1966 		/* RSS Hash */
1967 		if (qdev->rss_enable) {
1968 			ol_flags |= PKT_RX_RSS_HASH;
1969 			rx_mb->hash.rss = rss_hash;
1970 		}
1971 
1972 		rx_alloc_count++;
1973 		qede_rx_bd_ring_consume(rxq);
1974 
1975 		if (!tpa_start_flg && fp_cqe->bd_num > 1) {
1976 			PMD_RX_LOG(DEBUG, rxq, "Jumbo-over-BD packet: %02x BDs"
1977 				   " len on first: %04x Total Len: %04x",
1978 				   fp_cqe->bd_num, len, pkt_len);
1979 			num_segs = fp_cqe->bd_num - 1;
1980 			seg1 = rx_mb;
1981 			if (qede_process_sg_pkts(p_rxq, seg1, num_segs,
1982 						 pkt_len - len))
1983 				goto next_cqe;
1984 
1985 			rx_alloc_count += num_segs;
1986 			rxq->rx_segs += num_segs;
1987 		}
1988 		rxq->rx_segs++; /* for the first segment */
1989 
1990 		/* Prefetch next mbuf while processing current one. */
1991 		preload_idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
1992 		rte_prefetch0(rxq->sw_rx_ring[preload_idx].mbuf);
1993 
1994 		/* Update rest of the MBUF fields */
1995 		rx_mb->data_off = offset + RTE_PKTMBUF_HEADROOM;
1996 		rx_mb->port = rxq->port_id;
1997 		rx_mb->ol_flags = ol_flags;
1998 		rx_mb->data_len = len;
1999 		rx_mb->packet_type = packet_type;
2000 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX
2001 		print_rx_bd_info(rx_mb, rxq, bitfield_val);
2002 #endif
2003 		if (!tpa_start_flg) {
2004 			rx_mb->nb_segs = fp_cqe->bd_num;
2005 			rx_mb->pkt_len = pkt_len;
2006 		} else {
2007 			/* store ref to the updated mbuf */
2008 			tpa_info->tpa_head = rx_mb;
2009 			tpa_info->tpa_tail = tpa_info->tpa_head;
2010 		}
2011 		rte_prefetch1(rte_pktmbuf_mtod(rx_mb, void *));
2012 tpa_end:
2013 		if (!tpa_start_flg) {
2014 			rx_pkts[rx_pkt] = rx_mb;
2015 			rx_pkt++;
2016 		}
2017 next_cqe:
2018 		ecore_chain_recycle_consumed(&rxq->rx_comp_ring);
2019 		sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring);
2020 		if (rx_pkt == nb_pkts) {
2021 			PMD_RX_LOG(DEBUG, rxq,
2022 				   "Budget reached nb_pkts=%u received=%u",
2023 				   rx_pkt, nb_pkts);
2024 			break;
2025 		}
2026 	}
2027 
2028 	/* Request number of bufferes to be allocated in next loop */
2029 	rxq->rx_alloc_count = rx_alloc_count;
2030 
2031 	rxq->rcv_pkts += rx_pkt;
2032 
2033 	PMD_RX_LOG(DEBUG, rxq, "rx_pkts=%u core=%d", rx_pkt, rte_lcore_id());
2034 
2035 	return rx_pkt;
2036 }
2037 
2038 uint16_t
2039 qede_recv_pkts_cmt(void *p_fp_cmt, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
2040 {
2041 	struct qede_fastpath_cmt *fp_cmt = p_fp_cmt;
2042 	uint16_t eng0_pkts, eng1_pkts;
2043 
2044 	eng0_pkts = nb_pkts / 2;
2045 
2046 	eng0_pkts = qede_recv_pkts(fp_cmt->fp0->rxq, rx_pkts, eng0_pkts);
2047 
2048 	eng1_pkts = nb_pkts - eng0_pkts;
2049 
2050 	eng1_pkts = qede_recv_pkts(fp_cmt->fp1->rxq, rx_pkts + eng0_pkts,
2051 				   eng1_pkts);
2052 
2053 	return eng0_pkts + eng1_pkts;
2054 }
2055 
2056 /* Populate scatter gather buffer descriptor fields */
2057 static inline uint16_t
2058 qede_encode_sg_bd(struct qede_tx_queue *p_txq, struct rte_mbuf *m_seg,
2059 		  struct eth_tx_2nd_bd **bd2, struct eth_tx_3rd_bd **bd3,
2060 		  uint16_t start_seg)
2061 {
2062 	struct qede_tx_queue *txq = p_txq;
2063 	struct eth_tx_bd *tx_bd = NULL;
2064 	dma_addr_t mapping;
2065 	uint16_t nb_segs = 0;
2066 
2067 	/* Check for scattered buffers */
2068 	while (m_seg) {
2069 		if (start_seg == 0) {
2070 			if (!*bd2) {
2071 				*bd2 = (struct eth_tx_2nd_bd *)
2072 					ecore_chain_produce(&txq->tx_pbl);
2073 				memset(*bd2, 0, sizeof(struct eth_tx_2nd_bd));
2074 				nb_segs++;
2075 			}
2076 			mapping = rte_mbuf_data_iova(m_seg);
2077 			QEDE_BD_SET_ADDR_LEN(*bd2, mapping, m_seg->data_len);
2078 			PMD_TX_LOG(DEBUG, txq, "BD2 len %04x", m_seg->data_len);
2079 		} else if (start_seg == 1) {
2080 			if (!*bd3) {
2081 				*bd3 = (struct eth_tx_3rd_bd *)
2082 					ecore_chain_produce(&txq->tx_pbl);
2083 				memset(*bd3, 0, sizeof(struct eth_tx_3rd_bd));
2084 				nb_segs++;
2085 			}
2086 			mapping = rte_mbuf_data_iova(m_seg);
2087 			QEDE_BD_SET_ADDR_LEN(*bd3, mapping, m_seg->data_len);
2088 			PMD_TX_LOG(DEBUG, txq, "BD3 len %04x", m_seg->data_len);
2089 		} else {
2090 			tx_bd = (struct eth_tx_bd *)
2091 				ecore_chain_produce(&txq->tx_pbl);
2092 			memset(tx_bd, 0, sizeof(*tx_bd));
2093 			nb_segs++;
2094 			mapping = rte_mbuf_data_iova(m_seg);
2095 			QEDE_BD_SET_ADDR_LEN(tx_bd, mapping, m_seg->data_len);
2096 			PMD_TX_LOG(DEBUG, txq, "BD len %04x", m_seg->data_len);
2097 		}
2098 		start_seg++;
2099 		m_seg = m_seg->next;
2100 	}
2101 
2102 	/* Return total scattered buffers */
2103 	return nb_segs;
2104 }
2105 
2106 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
2107 static inline void
2108 print_tx_bd_info(struct qede_tx_queue *txq,
2109 		 struct eth_tx_1st_bd *bd1,
2110 		 struct eth_tx_2nd_bd *bd2,
2111 		 struct eth_tx_3rd_bd *bd3,
2112 		 uint64_t tx_ol_flags)
2113 {
2114 	char ol_buf[256] = { 0 }; /* for verbose prints */
2115 
2116 	if (bd1)
2117 		PMD_TX_LOG(INFO, txq,
2118 		   "BD1: nbytes=0x%04x nbds=0x%04x bd_flags=0x%04x bf=0x%04x",
2119 		   rte_cpu_to_le_16(bd1->nbytes), bd1->data.nbds,
2120 		   bd1->data.bd_flags.bitfields,
2121 		   rte_cpu_to_le_16(bd1->data.bitfields));
2122 	if (bd2)
2123 		PMD_TX_LOG(INFO, txq,
2124 		   "BD2: nbytes=0x%04x bf1=0x%04x bf2=0x%04x tunn_ip=0x%04x\n",
2125 		   rte_cpu_to_le_16(bd2->nbytes), bd2->data.bitfields1,
2126 		   bd2->data.bitfields2, bd2->data.tunn_ip_size);
2127 	if (bd3)
2128 		PMD_TX_LOG(INFO, txq,
2129 		   "BD3: nbytes=0x%04x bf=0x%04x MSS=0x%04x "
2130 		   "tunn_l4_hdr_start_offset_w=0x%04x tunn_hdr_size=0x%04x\n",
2131 		   rte_cpu_to_le_16(bd3->nbytes),
2132 		   rte_cpu_to_le_16(bd3->data.bitfields),
2133 		   rte_cpu_to_le_16(bd3->data.lso_mss),
2134 		   bd3->data.tunn_l4_hdr_start_offset_w,
2135 		   bd3->data.tunn_hdr_size_w);
2136 
2137 	rte_get_tx_ol_flag_list(tx_ol_flags, ol_buf, sizeof(ol_buf));
2138 	PMD_TX_LOG(INFO, txq, "TX offloads = %s\n", ol_buf);
2139 }
2140 #endif
2141 
2142 /* TX prepare to check packets meets TX conditions */
2143 uint16_t
2144 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
2145 qede_xmit_prep_pkts(void *p_txq, struct rte_mbuf **tx_pkts,
2146 		    uint16_t nb_pkts)
2147 {
2148 	struct qede_tx_queue *txq = p_txq;
2149 #else
2150 qede_xmit_prep_pkts(__rte_unused void *p_txq, struct rte_mbuf **tx_pkts,
2151 		    uint16_t nb_pkts)
2152 {
2153 #endif
2154 	uint64_t ol_flags;
2155 	struct rte_mbuf *m;
2156 	uint16_t i;
2157 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
2158 	int ret;
2159 #endif
2160 
2161 	for (i = 0; i < nb_pkts; i++) {
2162 		m = tx_pkts[i];
2163 		ol_flags = m->ol_flags;
2164 		if (ol_flags & PKT_TX_TCP_SEG) {
2165 			if (m->nb_segs >= ETH_TX_MAX_BDS_PER_LSO_PACKET) {
2166 				rte_errno = EINVAL;
2167 				break;
2168 			}
2169 			/* TBD: confirm its ~9700B for both ? */
2170 			if (m->tso_segsz > ETH_TX_MAX_NON_LSO_PKT_LEN) {
2171 				rte_errno = EINVAL;
2172 				break;
2173 			}
2174 		} else {
2175 			if (m->nb_segs >= ETH_TX_MAX_BDS_PER_NON_LSO_PACKET) {
2176 				rte_errno = EINVAL;
2177 				break;
2178 			}
2179 		}
2180 		if (ol_flags & QEDE_TX_OFFLOAD_NOTSUP_MASK) {
2181 			/* We support only limited tunnel protocols */
2182 			if (ol_flags & PKT_TX_TUNNEL_MASK) {
2183 				uint64_t temp;
2184 
2185 				temp = ol_flags & PKT_TX_TUNNEL_MASK;
2186 				if (temp == PKT_TX_TUNNEL_VXLAN ||
2187 				    temp == PKT_TX_TUNNEL_GENEVE ||
2188 				    temp == PKT_TX_TUNNEL_MPLSINUDP ||
2189 				    temp == PKT_TX_TUNNEL_GRE)
2190 					continue;
2191 			}
2192 
2193 			rte_errno = ENOTSUP;
2194 			break;
2195 		}
2196 
2197 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
2198 		ret = rte_validate_tx_offload(m);
2199 		if (ret != 0) {
2200 			rte_errno = -ret;
2201 			break;
2202 		}
2203 #endif
2204 	}
2205 
2206 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
2207 	if (unlikely(i != nb_pkts))
2208 		PMD_TX_LOG(ERR, txq, "TX prepare failed for %u\n",
2209 			   nb_pkts - i);
2210 #endif
2211 	return i;
2212 }
2213 
2214 #define MPLSINUDP_HDR_SIZE			(12)
2215 
2216 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
2217 static inline void
2218 qede_mpls_tunn_tx_sanity_check(struct rte_mbuf *mbuf,
2219 			       struct qede_tx_queue *txq)
2220 {
2221 	if (((mbuf->outer_l2_len + mbuf->outer_l3_len) / 2) > 0xff)
2222 		PMD_TX_LOG(ERR, txq, "tunn_l4_hdr_start_offset overflow\n");
2223 	if (((mbuf->outer_l2_len + mbuf->outer_l3_len +
2224 		MPLSINUDP_HDR_SIZE) / 2) > 0xff)
2225 		PMD_TX_LOG(ERR, txq, "tunn_hdr_size overflow\n");
2226 	if (((mbuf->l2_len - MPLSINUDP_HDR_SIZE) / 2) >
2227 		ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_MASK)
2228 		PMD_TX_LOG(ERR, txq, "inner_l2_hdr_size overflow\n");
2229 	if (((mbuf->l2_len - MPLSINUDP_HDR_SIZE + mbuf->l3_len) / 2) >
2230 		ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK)
2231 		PMD_TX_LOG(ERR, txq, "inner_l2_hdr_size overflow\n");
2232 }
2233 #endif
2234 
2235 uint16_t
2236 qede_xmit_pkts_regular(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2237 {
2238 	struct qede_tx_queue *txq = p_txq;
2239 	struct qede_dev *qdev = txq->qdev;
2240 	struct ecore_dev *edev = &qdev->edev;
2241 	struct eth_tx_1st_bd *bd1;
2242 	struct eth_tx_2nd_bd *bd2;
2243 	struct eth_tx_3rd_bd *bd3;
2244 	struct rte_mbuf *m_seg = NULL;
2245 	struct rte_mbuf *mbuf;
2246 	struct qede_tx_entry *sw_tx_ring;
2247 	uint16_t nb_tx_pkts;
2248 	uint16_t bd_prod;
2249 	uint16_t idx;
2250 	uint16_t nb_frags = 0;
2251 	uint16_t nb_pkt_sent = 0;
2252 	uint8_t nbds;
2253 	uint64_t tx_ol_flags;
2254 	/* BD1 */
2255 	uint16_t bd1_bf;
2256 	uint8_t bd1_bd_flags_bf;
2257 
2258 	if (unlikely(txq->nb_tx_avail < txq->tx_free_thresh)) {
2259 		PMD_TX_LOG(DEBUG, txq, "send=%u avail=%u free_thresh=%u",
2260 			   nb_pkts, txq->nb_tx_avail, txq->tx_free_thresh);
2261 		qede_process_tx_compl(edev, txq);
2262 	}
2263 
2264 	nb_tx_pkts  = nb_pkts;
2265 	bd_prod = rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl));
2266 	sw_tx_ring = txq->sw_tx_ring;
2267 
2268 	while (nb_tx_pkts--) {
2269 		/* Init flags/values */
2270 		nbds = 0;
2271 		bd1 = NULL;
2272 		bd2 = NULL;
2273 		bd3 = NULL;
2274 		bd1_bf = 0;
2275 		bd1_bd_flags_bf = 0;
2276 		nb_frags = 0;
2277 
2278 		mbuf = *tx_pkts++;
2279 		assert(mbuf);
2280 
2281 
2282 		/* Check minimum TX BDS availability against available BDs */
2283 		if (unlikely(txq->nb_tx_avail < mbuf->nb_segs))
2284 			break;
2285 
2286 		tx_ol_flags = mbuf->ol_flags;
2287 		bd1_bd_flags_bf |= 1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT;
2288 
2289 		if (unlikely(txq->nb_tx_avail <
2290 				ETH_TX_MIN_BDS_PER_NON_LSO_PKT))
2291 			break;
2292 		bd1_bf |=
2293 		       (mbuf->pkt_len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK)
2294 			<< ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT;
2295 
2296 		/* Offload the IP checksum in the hardware */
2297 		if (tx_ol_flags & PKT_TX_IP_CKSUM)
2298 			bd1_bd_flags_bf |=
2299 				1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT;
2300 
2301 		/* L4 checksum offload (tcp or udp) */
2302 		if ((tx_ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)) &&
2303 		    (tx_ol_flags & (PKT_TX_UDP_CKSUM | PKT_TX_TCP_CKSUM)))
2304 			bd1_bd_flags_bf |=
2305 				1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT;
2306 
2307 		/* Fill the entry in the SW ring and the BDs in the FW ring */
2308 		idx = TX_PROD(txq);
2309 		sw_tx_ring[idx].mbuf = mbuf;
2310 
2311 		/* BD1 */
2312 		bd1 = (struct eth_tx_1st_bd *)ecore_chain_produce(&txq->tx_pbl);
2313 		memset(bd1, 0, sizeof(struct eth_tx_1st_bd));
2314 		nbds++;
2315 
2316 		/* Map MBUF linear data for DMA and set in the BD1 */
2317 		QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_iova(mbuf),
2318 				     mbuf->data_len);
2319 		bd1->data.bitfields = rte_cpu_to_le_16(bd1_bf);
2320 		bd1->data.bd_flags.bitfields = bd1_bd_flags_bf;
2321 
2322 		/* Handle fragmented MBUF */
2323 		if (unlikely(mbuf->nb_segs > 1)) {
2324 			m_seg = mbuf->next;
2325 
2326 			/* Encode scatter gather buffer descriptors */
2327 			nb_frags = qede_encode_sg_bd(txq, m_seg, &bd2, &bd3,
2328 						     nbds - 1);
2329 		}
2330 
2331 		bd1->data.nbds = nbds + nb_frags;
2332 
2333 		txq->nb_tx_avail -= bd1->data.nbds;
2334 		txq->sw_tx_prod++;
2335 		bd_prod =
2336 		    rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl));
2337 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
2338 		print_tx_bd_info(txq, bd1, bd2, bd3, tx_ol_flags);
2339 #endif
2340 		nb_pkt_sent++;
2341 		txq->xmit_pkts++;
2342 	}
2343 
2344 	/* Write value of prod idx into bd_prod */
2345 	txq->tx_db.data.bd_prod = bd_prod;
2346 	rte_wmb();
2347 	rte_compiler_barrier();
2348 	DIRECT_REG_WR_RELAXED(edev, txq->doorbell_addr, txq->tx_db.raw);
2349 	rte_wmb();
2350 
2351 	/* Check again for Tx completions */
2352 	qede_process_tx_compl(edev, txq);
2353 
2354 	PMD_TX_LOG(DEBUG, txq, "to_send=%u sent=%u bd_prod=%u core=%d",
2355 		   nb_pkts, nb_pkt_sent, TX_PROD(txq), rte_lcore_id());
2356 
2357 	return nb_pkt_sent;
2358 }
2359 
2360 uint16_t
2361 qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2362 {
2363 	struct qede_tx_queue *txq = p_txq;
2364 	struct qede_dev *qdev = txq->qdev;
2365 	struct ecore_dev *edev = &qdev->edev;
2366 	struct rte_mbuf *mbuf;
2367 	struct rte_mbuf *m_seg = NULL;
2368 	uint16_t nb_tx_pkts;
2369 	uint16_t bd_prod;
2370 	uint16_t idx;
2371 	uint16_t nb_frags;
2372 	uint16_t nb_pkt_sent = 0;
2373 	uint8_t nbds;
2374 	bool lso_flg;
2375 	bool mplsoudp_flg;
2376 	__rte_unused bool tunn_flg;
2377 	bool tunn_ipv6_ext_flg;
2378 	struct eth_tx_1st_bd *bd1;
2379 	struct eth_tx_2nd_bd *bd2;
2380 	struct eth_tx_3rd_bd *bd3;
2381 	uint64_t tx_ol_flags;
2382 	uint16_t hdr_size;
2383 	/* BD1 */
2384 	uint16_t bd1_bf;
2385 	uint8_t bd1_bd_flags_bf;
2386 	uint16_t vlan;
2387 	/* BD2 */
2388 	uint16_t bd2_bf1;
2389 	uint16_t bd2_bf2;
2390 	/* BD3 */
2391 	uint16_t mss;
2392 	uint16_t bd3_bf;
2393 
2394 	uint8_t tunn_l4_hdr_start_offset;
2395 	uint8_t tunn_hdr_size;
2396 	uint8_t inner_l2_hdr_size;
2397 	uint16_t inner_l4_hdr_offset;
2398 
2399 	if (unlikely(txq->nb_tx_avail < txq->tx_free_thresh)) {
2400 		PMD_TX_LOG(DEBUG, txq, "send=%u avail=%u free_thresh=%u",
2401 			   nb_pkts, txq->nb_tx_avail, txq->tx_free_thresh);
2402 		qede_process_tx_compl(edev, txq);
2403 	}
2404 
2405 	nb_tx_pkts  = nb_pkts;
2406 	bd_prod = rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl));
2407 	while (nb_tx_pkts--) {
2408 		/* Init flags/values */
2409 		tunn_flg = false;
2410 		lso_flg = false;
2411 		nbds = 0;
2412 		vlan = 0;
2413 		bd1 = NULL;
2414 		bd2 = NULL;
2415 		bd3 = NULL;
2416 		hdr_size = 0;
2417 		bd1_bf = 0;
2418 		bd1_bd_flags_bf = 0;
2419 		bd2_bf1 = 0;
2420 		bd2_bf2 = 0;
2421 		mss = 0;
2422 		bd3_bf = 0;
2423 		mplsoudp_flg = false;
2424 		tunn_ipv6_ext_flg = false;
2425 		tunn_hdr_size = 0;
2426 		tunn_l4_hdr_start_offset = 0;
2427 
2428 		mbuf = *tx_pkts++;
2429 		assert(mbuf);
2430 
2431 		/* Check minimum TX BDS availability against available BDs */
2432 		if (unlikely(txq->nb_tx_avail < mbuf->nb_segs))
2433 			break;
2434 
2435 		tx_ol_flags = mbuf->ol_flags;
2436 		bd1_bd_flags_bf |= 1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT;
2437 
2438 		/* TX prepare would have already checked supported tunnel Tx
2439 		 * offloads. Don't rely on pkt_type marked by Rx, instead use
2440 		 * tx_ol_flags to decide.
2441 		 */
2442 		tunn_flg = !!(tx_ol_flags & PKT_TX_TUNNEL_MASK);
2443 
2444 		if (tunn_flg) {
2445 			/* Check against max which is Tunnel IPv6 + ext */
2446 			if (unlikely(txq->nb_tx_avail <
2447 				ETH_TX_MIN_BDS_PER_TUNN_IPV6_WITH_EXT_PKT))
2448 					break;
2449 
2450 			/* First indicate its a tunnel pkt */
2451 			bd1_bf |= ETH_TX_DATA_1ST_BD_TUNN_FLAG_MASK <<
2452 				  ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT;
2453 			/* Legacy FW had flipped behavior in regard to this bit
2454 			 * i.e. it needed to set to prevent FW from touching
2455 			 * encapsulated packets when it didn't need to.
2456 			 */
2457 			if (unlikely(txq->is_legacy)) {
2458 				bd1_bf ^= 1 <<
2459 					ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT;
2460 			}
2461 
2462 			/* Outer IP checksum offload */
2463 			if (tx_ol_flags & (PKT_TX_OUTER_IP_CKSUM |
2464 					   PKT_TX_OUTER_IPV4)) {
2465 				bd1_bd_flags_bf |=
2466 					ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_MASK <<
2467 					ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_SHIFT;
2468 			}
2469 
2470 			/**
2471 			 * Currently, only inner checksum offload in MPLS-in-UDP
2472 			 * tunnel with one MPLS label is supported. Both outer
2473 			 * and inner layers  lengths need to be provided in
2474 			 * mbuf.
2475 			 */
2476 			if ((tx_ol_flags & PKT_TX_TUNNEL_MASK) ==
2477 						PKT_TX_TUNNEL_MPLSINUDP) {
2478 				mplsoudp_flg = true;
2479 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
2480 				qede_mpls_tunn_tx_sanity_check(mbuf, txq);
2481 #endif
2482 				/* Outer L4 offset in two byte words */
2483 				tunn_l4_hdr_start_offset =
2484 				  (mbuf->outer_l2_len + mbuf->outer_l3_len) / 2;
2485 				/* Tunnel header size in two byte words */
2486 				tunn_hdr_size = (mbuf->outer_l2_len +
2487 						mbuf->outer_l3_len +
2488 						MPLSINUDP_HDR_SIZE) / 2;
2489 				/* Inner L2 header size in two byte words */
2490 				inner_l2_hdr_size = (mbuf->l2_len -
2491 						MPLSINUDP_HDR_SIZE) / 2;
2492 				/* Inner L4 header offset from the beggining
2493 				 * of inner packet in two byte words
2494 				 */
2495 				inner_l4_hdr_offset = (mbuf->l2_len -
2496 					MPLSINUDP_HDR_SIZE + mbuf->l3_len) / 2;
2497 
2498 				/* Inner L2 size and address type */
2499 				bd2_bf1 |= (inner_l2_hdr_size &
2500 					ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_MASK) <<
2501 					ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_SHIFT;
2502 				bd2_bf1 |= (UNICAST_ADDRESS &
2503 					ETH_TX_DATA_2ND_BD_TUNN_INNER_ETH_TYPE_MASK) <<
2504 					ETH_TX_DATA_2ND_BD_TUNN_INNER_ETH_TYPE_SHIFT;
2505 				/* Treated as IPv6+Ext */
2506 				bd2_bf1 |=
2507 				    1 << ETH_TX_DATA_2ND_BD_TUNN_IPV6_EXT_SHIFT;
2508 
2509 				/* Mark inner IPv6 if present */
2510 				if (tx_ol_flags & PKT_TX_IPV6)
2511 					bd2_bf1 |=
2512 						1 << ETH_TX_DATA_2ND_BD_TUNN_INNER_IPV6_SHIFT;
2513 
2514 				/* Inner L4 offsets */
2515 				if ((tx_ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)) &&
2516 				     (tx_ol_flags & (PKT_TX_UDP_CKSUM |
2517 							PKT_TX_TCP_CKSUM))) {
2518 					/* Determines if BD3 is needed */
2519 					tunn_ipv6_ext_flg = true;
2520 					if ((tx_ol_flags & PKT_TX_L4_MASK) ==
2521 							PKT_TX_UDP_CKSUM) {
2522 						bd2_bf1 |=
2523 							1 << ETH_TX_DATA_2ND_BD_L4_UDP_SHIFT;
2524 					}
2525 
2526 					/* TODO other pseudo checksum modes are
2527 					 * not supported
2528 					 */
2529 					bd2_bf1 |=
2530 					ETH_L4_PSEUDO_CSUM_CORRECT_LENGTH <<
2531 					ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE_SHIFT;
2532 					bd2_bf2 |= (inner_l4_hdr_offset &
2533 						ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK) <<
2534 						ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_SHIFT;
2535 				}
2536 			} /* End MPLSoUDP */
2537 		} /* End Tunnel handling */
2538 
2539 		if (tx_ol_flags & PKT_TX_TCP_SEG) {
2540 			lso_flg = true;
2541 			if (unlikely(txq->nb_tx_avail <
2542 						ETH_TX_MIN_BDS_PER_LSO_PKT))
2543 				break;
2544 			/* For LSO, packet header and payload must reside on
2545 			 * buffers pointed by different BDs. Using BD1 for HDR
2546 			 * and BD2 onwards for data.
2547 			 */
2548 			hdr_size = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len;
2549 			if (tunn_flg)
2550 				hdr_size += mbuf->outer_l2_len +
2551 					    mbuf->outer_l3_len;
2552 
2553 			bd1_bd_flags_bf |= 1 << ETH_TX_1ST_BD_FLAGS_LSO_SHIFT;
2554 			bd1_bd_flags_bf |=
2555 					1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT;
2556 			/* PKT_TX_TCP_SEG implies PKT_TX_TCP_CKSUM */
2557 			bd1_bd_flags_bf |=
2558 					1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT;
2559 			mss = rte_cpu_to_le_16(mbuf->tso_segsz);
2560 			/* Using one header BD */
2561 			bd3_bf |= rte_cpu_to_le_16(1 <<
2562 					ETH_TX_DATA_3RD_BD_HDR_NBD_SHIFT);
2563 		} else {
2564 			if (unlikely(txq->nb_tx_avail <
2565 					ETH_TX_MIN_BDS_PER_NON_LSO_PKT))
2566 				break;
2567 			bd1_bf |=
2568 			       (mbuf->pkt_len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK)
2569 				<< ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT;
2570 		}
2571 
2572 		/* Descriptor based VLAN insertion */
2573 		if (tx_ol_flags & PKT_TX_VLAN_PKT) {
2574 			vlan = rte_cpu_to_le_16(mbuf->vlan_tci);
2575 			bd1_bd_flags_bf |=
2576 			    1 << ETH_TX_1ST_BD_FLAGS_VLAN_INSERTION_SHIFT;
2577 		}
2578 
2579 		/* Offload the IP checksum in the hardware */
2580 		if (tx_ol_flags & PKT_TX_IP_CKSUM) {
2581 			bd1_bd_flags_bf |=
2582 				1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT;
2583 			/* There's no DPDK flag to request outer-L4 csum
2584 			 * offload. But in the case of tunnel if inner L3 or L4
2585 			 * csum offload is requested then we need to force
2586 			 * recalculation of L4 tunnel header csum also.
2587 			 */
2588 			if (tunn_flg && ((tx_ol_flags & PKT_TX_TUNNEL_MASK) !=
2589 							PKT_TX_TUNNEL_GRE)) {
2590 				bd1_bd_flags_bf |=
2591 					ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_MASK <<
2592 					ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_SHIFT;
2593 			}
2594 		}
2595 
2596 		/* L4 checksum offload (tcp or udp) */
2597 		if ((tx_ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)) &&
2598 		    (tx_ol_flags & (PKT_TX_UDP_CKSUM | PKT_TX_TCP_CKSUM))) {
2599 			bd1_bd_flags_bf |=
2600 				1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT;
2601 			/* There's no DPDK flag to request outer-L4 csum
2602 			 * offload. But in the case of tunnel if inner L3 or L4
2603 			 * csum offload is requested then we need to force
2604 			 * recalculation of L4 tunnel header csum also.
2605 			 */
2606 			if (tunn_flg) {
2607 				bd1_bd_flags_bf |=
2608 					ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_MASK <<
2609 					ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_SHIFT;
2610 			}
2611 		}
2612 
2613 		/* Fill the entry in the SW ring and the BDs in the FW ring */
2614 		idx = TX_PROD(txq);
2615 		txq->sw_tx_ring[idx].mbuf = mbuf;
2616 
2617 		/* BD1 */
2618 		bd1 = (struct eth_tx_1st_bd *)ecore_chain_produce(&txq->tx_pbl);
2619 		memset(bd1, 0, sizeof(struct eth_tx_1st_bd));
2620 		nbds++;
2621 
2622 		/* Map MBUF linear data for DMA and set in the BD1 */
2623 		QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_iova(mbuf),
2624 				     mbuf->data_len);
2625 		bd1->data.bitfields = rte_cpu_to_le_16(bd1_bf);
2626 		bd1->data.bd_flags.bitfields = bd1_bd_flags_bf;
2627 		bd1->data.vlan = vlan;
2628 
2629 		if (lso_flg || mplsoudp_flg) {
2630 			bd2 = (struct eth_tx_2nd_bd *)ecore_chain_produce
2631 							(&txq->tx_pbl);
2632 			memset(bd2, 0, sizeof(struct eth_tx_2nd_bd));
2633 			nbds++;
2634 
2635 			/* BD1 */
2636 			QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_iova(mbuf),
2637 					     hdr_size);
2638 			/* BD2 */
2639 			QEDE_BD_SET_ADDR_LEN(bd2, (hdr_size +
2640 					     rte_mbuf_data_iova(mbuf)),
2641 					     mbuf->data_len - hdr_size);
2642 			bd2->data.bitfields1 = rte_cpu_to_le_16(bd2_bf1);
2643 			if (mplsoudp_flg) {
2644 				bd2->data.bitfields2 =
2645 					rte_cpu_to_le_16(bd2_bf2);
2646 				/* Outer L3 size */
2647 				bd2->data.tunn_ip_size =
2648 					rte_cpu_to_le_16(mbuf->outer_l3_len);
2649 			}
2650 			/* BD3 */
2651 			if (lso_flg || (mplsoudp_flg && tunn_ipv6_ext_flg)) {
2652 				bd3 = (struct eth_tx_3rd_bd *)
2653 					ecore_chain_produce(&txq->tx_pbl);
2654 				memset(bd3, 0, sizeof(struct eth_tx_3rd_bd));
2655 				nbds++;
2656 				bd3->data.bitfields = rte_cpu_to_le_16(bd3_bf);
2657 				if (lso_flg)
2658 					bd3->data.lso_mss = mss;
2659 				if (mplsoudp_flg) {
2660 					bd3->data.tunn_l4_hdr_start_offset_w =
2661 						tunn_l4_hdr_start_offset;
2662 					bd3->data.tunn_hdr_size_w =
2663 						tunn_hdr_size;
2664 				}
2665 			}
2666 		}
2667 
2668 		/* Handle fragmented MBUF */
2669 		m_seg = mbuf->next;
2670 
2671 		/* Encode scatter gather buffer descriptors if required */
2672 		nb_frags = qede_encode_sg_bd(txq, m_seg, &bd2, &bd3, nbds - 1);
2673 		bd1->data.nbds = nbds + nb_frags;
2674 
2675 		txq->nb_tx_avail -= bd1->data.nbds;
2676 		txq->sw_tx_prod++;
2677 		bd_prod =
2678 		    rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl));
2679 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
2680 		print_tx_bd_info(txq, bd1, bd2, bd3, tx_ol_flags);
2681 #endif
2682 		nb_pkt_sent++;
2683 		txq->xmit_pkts++;
2684 	}
2685 
2686 	/* Write value of prod idx into bd_prod */
2687 	txq->tx_db.data.bd_prod = bd_prod;
2688 	rte_wmb();
2689 	rte_compiler_barrier();
2690 	DIRECT_REG_WR_RELAXED(edev, txq->doorbell_addr, txq->tx_db.raw);
2691 	rte_wmb();
2692 
2693 	/* Check again for Tx completions */
2694 	qede_process_tx_compl(edev, txq);
2695 
2696 	PMD_TX_LOG(DEBUG, txq, "to_send=%u sent=%u bd_prod=%u core=%d",
2697 		   nb_pkts, nb_pkt_sent, TX_PROD(txq), rte_lcore_id());
2698 
2699 	return nb_pkt_sent;
2700 }
2701 
2702 uint16_t
2703 qede_xmit_pkts_cmt(void *p_fp_cmt, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2704 {
2705 	struct qede_fastpath_cmt *fp_cmt = p_fp_cmt;
2706 	uint16_t eng0_pkts, eng1_pkts;
2707 
2708 	eng0_pkts = nb_pkts / 2;
2709 
2710 	eng0_pkts = qede_xmit_pkts(fp_cmt->fp0->txq, tx_pkts, eng0_pkts);
2711 
2712 	eng1_pkts = nb_pkts - eng0_pkts;
2713 
2714 	eng1_pkts = qede_xmit_pkts(fp_cmt->fp1->txq, tx_pkts + eng0_pkts,
2715 				   eng1_pkts);
2716 
2717 	return eng0_pkts + eng1_pkts;
2718 }
2719 
2720 uint16_t
2721 qede_rxtx_pkts_dummy(__rte_unused void *p_rxq,
2722 		     __rte_unused struct rte_mbuf **pkts,
2723 		     __rte_unused uint16_t nb_pkts)
2724 {
2725 	return 0;
2726 }
2727 
2728 
2729 /* this function does a fake walk through over completion queue
2730  * to calculate number of BDs used by HW.
2731  * At the end, it restores the state of completion queue.
2732  */
2733 static uint16_t
2734 qede_parse_fp_cqe(struct qede_rx_queue *rxq)
2735 {
2736 	uint16_t hw_comp_cons, sw_comp_cons, bd_count = 0;
2737 	union eth_rx_cqe *cqe, *orig_cqe = NULL;
2738 
2739 	hw_comp_cons = rte_le_to_cpu_16(*rxq->hw_cons_ptr);
2740 	sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring);
2741 
2742 	if (hw_comp_cons == sw_comp_cons)
2743 		return 0;
2744 
2745 	/* Get the CQE from the completion ring */
2746 	cqe = (union eth_rx_cqe *)ecore_chain_consume(&rxq->rx_comp_ring);
2747 	orig_cqe = cqe;
2748 
2749 	while (sw_comp_cons != hw_comp_cons) {
2750 		switch (cqe->fast_path_regular.type) {
2751 		case ETH_RX_CQE_TYPE_REGULAR:
2752 			bd_count += cqe->fast_path_regular.bd_num;
2753 			break;
2754 		case ETH_RX_CQE_TYPE_TPA_END:
2755 			bd_count += cqe->fast_path_tpa_end.num_of_bds;
2756 			break;
2757 		default:
2758 			break;
2759 		}
2760 
2761 		cqe =
2762 		(union eth_rx_cqe *)ecore_chain_consume(&rxq->rx_comp_ring);
2763 		sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring);
2764 	}
2765 
2766 	/* revert comp_ring to original state */
2767 	ecore_chain_set_cons(&rxq->rx_comp_ring, sw_comp_cons, orig_cqe);
2768 
2769 	return bd_count;
2770 }
2771 
2772 int
2773 qede_rx_descriptor_status(void *p_rxq, uint16_t offset)
2774 {
2775 	uint16_t hw_bd_cons, sw_bd_cons, sw_bd_prod;
2776 	uint16_t produced, consumed;
2777 	struct qede_rx_queue *rxq = p_rxq;
2778 
2779 	if (offset > rxq->nb_rx_desc)
2780 		return -EINVAL;
2781 
2782 	sw_bd_cons = ecore_chain_get_cons_idx(&rxq->rx_bd_ring);
2783 	sw_bd_prod = ecore_chain_get_prod_idx(&rxq->rx_bd_ring);
2784 
2785 	/* find BDs used by HW from completion queue elements */
2786 	hw_bd_cons = sw_bd_cons + qede_parse_fp_cqe(rxq);
2787 
2788 	if (hw_bd_cons < sw_bd_cons)
2789 		/* wraparound case */
2790 		consumed = (0xffff - sw_bd_cons) + hw_bd_cons;
2791 	else
2792 		consumed = hw_bd_cons - sw_bd_cons;
2793 
2794 	if (offset <= consumed)
2795 		return RTE_ETH_RX_DESC_DONE;
2796 
2797 	if (sw_bd_prod < sw_bd_cons)
2798 		/* wraparound case */
2799 		produced = (0xffff - sw_bd_cons) + sw_bd_prod;
2800 	else
2801 		produced = sw_bd_prod - sw_bd_cons;
2802 
2803 	if (offset <= produced)
2804 		return RTE_ETH_RX_DESC_AVAIL;
2805 
2806 	return RTE_ETH_RX_DESC_UNAVAIL;
2807 }
2808