xref: /dpdk/drivers/net/mlx5/mlx5_rx.c (revision 3cddeba0ca38b00c7dc646277484d08a4cb2d862)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2021 6WIND S.A.
3  * Copyright 2021 Mellanox Technologies, Ltd
4  */
5 
6 #include <stdint.h>
7 #include <string.h>
8 #include <stdlib.h>
9 
10 #include <rte_mbuf.h>
11 #include <rte_mempool.h>
12 #include <rte_prefetch.h>
13 #include <rte_common.h>
14 #include <rte_branch_prediction.h>
15 #include <rte_ether.h>
16 #include <rte_cycles.h>
17 #include <rte_flow.h>
18 
19 #include <mlx5_prm.h>
20 #include <mlx5_common.h>
21 #include <mlx5_common_mr.h>
22 #include <rte_pmd_mlx5.h>
23 
24 #include "mlx5_autoconf.h"
25 #include "mlx5_defs.h"
26 #include "mlx5.h"
27 #include "mlx5_utils.h"
28 #include "mlx5_rxtx.h"
29 #include "mlx5_devx.h"
30 #include "mlx5_rx.h"
31 #ifdef HAVE_MLX5_MSTFLINT
32 #include <mstflint/mtcr.h>
33 #endif
34 
35 
36 static __rte_always_inline uint32_t
37 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
38 		   volatile struct mlx5_mini_cqe8 *mcqe);
39 
40 static __rte_always_inline int
41 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
42 		 uint16_t cqe_n, uint16_t cqe_mask,
43 		 volatile struct mlx5_mini_cqe8 **mcqe,
44 		 uint16_t *skip_cnt, bool mprq);
45 
46 static __rte_always_inline uint32_t
47 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe);
48 
49 static __rte_always_inline void
50 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
51 	       volatile struct mlx5_cqe *cqe,
52 	       volatile struct mlx5_mini_cqe8 *mcqe);
53 
54 static inline void
55 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp,
56 			volatile struct mlx5_cqe *__rte_restrict cqe,
57 			uint32_t phcsum, uint8_t l4_type);
58 
59 static inline void
60 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd,
61 		    volatile struct mlx5_cqe *__rte_restrict cqe,
62 		    volatile struct mlx5_mini_cqe8 *mcqe,
63 		    struct mlx5_rxq_data *rxq, uint32_t len);
64 
65 
66 /**
67  * Internal function to compute the number of used descriptors in an RX queue.
68  *
69  * @param rxq
70  *   The Rx queue.
71  *
72  * @return
73  *   The number of used Rx descriptor.
74  */
75 static uint32_t
76 rx_queue_count(struct mlx5_rxq_data *rxq)
77 {
78 	struct rxq_zip *zip = &rxq->zip;
79 	volatile struct mlx5_cqe *cqe;
80 	const unsigned int cqe_n = (1 << rxq->cqe_n);
81 	const unsigned int sges_n = (1 << rxq->sges_n);
82 	const unsigned int elts_n = (1 << rxq->elts_n);
83 	const unsigned int strd_n = RTE_BIT32(rxq->log_strd_num);
84 	const unsigned int cqe_cnt = cqe_n - 1;
85 	unsigned int cq_ci, used;
86 
87 	/* if we are processing a compressed cqe */
88 	if (zip->ai) {
89 		used = zip->cqe_cnt - zip->ai;
90 		cq_ci = zip->cq_ci;
91 	} else {
92 		used = 0;
93 		cq_ci = rxq->cq_ci;
94 	}
95 	cqe = &(*rxq->cqes)[cq_ci & cqe_cnt];
96 	while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) {
97 		int8_t op_own;
98 		unsigned int n;
99 
100 		op_own = cqe->op_own;
101 		if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED)
102 			n = rte_be_to_cpu_32(cqe->byte_cnt);
103 		else
104 			n = 1;
105 		cq_ci += n;
106 		used += n;
107 		cqe = &(*rxq->cqes)[cq_ci & cqe_cnt];
108 	}
109 	used = RTE_MIN(used * sges_n, elts_n * strd_n);
110 	return used;
111 }
112 
113 /**
114  * DPDK callback to check the status of a Rx descriptor.
115  *
116  * @param rx_queue
117  *   The Rx queue.
118  * @param[in] offset
119  *   The index of the descriptor in the ring.
120  *
121  * @return
122  *   The status of the Rx descriptor.
123  */
124 int
125 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
126 {
127 	struct mlx5_rxq_data *rxq = rx_queue;
128 
129 	if (offset >= (1 << rxq->cqe_n)) {
130 		rte_errno = EINVAL;
131 		return -rte_errno;
132 	}
133 	if (offset < rx_queue_count(rxq))
134 		return RTE_ETH_RX_DESC_DONE;
135 	return RTE_ETH_RX_DESC_AVAIL;
136 }
137 
138 /* Get rxq lwm percentage according to lwm number. */
139 static uint8_t
140 mlx5_rxq_lwm_to_percentage(struct mlx5_rxq_priv *rxq)
141 {
142 	struct mlx5_rxq_data *rxq_data = &rxq->ctrl->rxq;
143 	uint32_t wqe_cnt = 1 << (rxq_data->elts_n - rxq_data->sges_n);
144 
145 	return rxq->lwm * 100 / wqe_cnt;
146 }
147 
148 /**
149  * DPDK callback to get the RX queue information.
150  *
151  * @param dev
152  *   Pointer to the device structure.
153  *
154  * @param rx_queue_id
155  *   Rx queue identificator.
156  *
157  * @param qinfo
158  *   Pointer to the RX queue information structure.
159  *
160  * @return
161  *   None.
162  */
163 
164 void
165 mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id,
166 		  struct rte_eth_rxq_info *qinfo)
167 {
168 	struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_ctrl_get(dev, rx_queue_id);
169 	struct mlx5_rxq_data *rxq = mlx5_rxq_data_get(dev, rx_queue_id);
170 	struct mlx5_rxq_priv *rxq_priv = mlx5_rxq_get(dev, rx_queue_id);
171 
172 	if (!rxq)
173 		return;
174 	qinfo->mp = mlx5_rxq_mprq_enabled(rxq) ?
175 					rxq->mprq_mp : rxq->mp;
176 	qinfo->conf.rx_thresh.pthresh = 0;
177 	qinfo->conf.rx_thresh.hthresh = 0;
178 	qinfo->conf.rx_thresh.wthresh = 0;
179 	qinfo->conf.rx_free_thresh = rxq->rq_repl_thresh;
180 	qinfo->conf.rx_drop_en = 1;
181 	if (rxq_ctrl == NULL || rxq_ctrl->obj == NULL)
182 		qinfo->conf.rx_deferred_start = 0;
183 	else
184 		qinfo->conf.rx_deferred_start = 1;
185 	qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads;
186 	qinfo->scattered_rx = dev->data->scattered_rx;
187 	qinfo->nb_desc = mlx5_rxq_mprq_enabled(rxq) ?
188 		RTE_BIT32(rxq->elts_n) * RTE_BIT32(rxq->log_strd_num) :
189 		RTE_BIT32(rxq->elts_n);
190 	qinfo->avail_thresh = rxq_priv ?
191 		mlx5_rxq_lwm_to_percentage(rxq_priv) : 0;
192 }
193 
194 /**
195  * DPDK callback to get the RX packet burst mode information.
196  *
197  * @param dev
198  *   Pointer to the device structure.
199  *
200  * @param rx_queue_id
201  *   Rx queue identification.
202  *
203  * @param mode
204  *   Pointer to the burts mode information.
205  *
206  * @return
207  *   0 as success, -EINVAL as failure.
208  */
209 int
210 mlx5_rx_burst_mode_get(struct rte_eth_dev *dev,
211 		       uint16_t rx_queue_id __rte_unused,
212 		       struct rte_eth_burst_mode *mode)
213 {
214 	eth_rx_burst_t pkt_burst = dev->rx_pkt_burst;
215 	struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, rx_queue_id);
216 
217 	if (!rxq) {
218 		rte_errno = EINVAL;
219 		return -rte_errno;
220 	}
221 	if (pkt_burst == mlx5_rx_burst) {
222 		snprintf(mode->info, sizeof(mode->info), "%s", "Scalar");
223 	} else if (pkt_burst == mlx5_rx_burst_mprq) {
224 		snprintf(mode->info, sizeof(mode->info), "%s", "Multi-Packet RQ");
225 	} else if (pkt_burst == mlx5_rx_burst_vec) {
226 #if defined RTE_ARCH_X86_64
227 		snprintf(mode->info, sizeof(mode->info), "%s", "Vector SSE");
228 #elif defined RTE_ARCH_ARM64
229 		snprintf(mode->info, sizeof(mode->info), "%s", "Vector Neon");
230 #elif defined RTE_ARCH_PPC_64
231 		snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec");
232 #else
233 		return -EINVAL;
234 #endif
235 	} else if (pkt_burst == mlx5_rx_burst_mprq_vec) {
236 #if defined RTE_ARCH_X86_64
237 		snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector SSE");
238 #elif defined RTE_ARCH_ARM64
239 		snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector Neon");
240 #elif defined RTE_ARCH_PPC_64
241 		snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector AltiVec");
242 #else
243 		return -EINVAL;
244 #endif
245 	} else {
246 		return -EINVAL;
247 	}
248 	return 0;
249 }
250 
251 /**
252  * DPDK callback to get the number of used descriptors in a RX queue.
253  *
254  * @param rx_queue
255  *   The Rx queue pointer.
256  *
257  * @return
258  *   The number of used rx descriptor.
259  *   -EINVAL if the queue is invalid
260  */
261 uint32_t
262 mlx5_rx_queue_count(void *rx_queue)
263 {
264 	struct mlx5_rxq_data *rxq = rx_queue;
265 	struct rte_eth_dev *dev;
266 
267 	if (!rxq) {
268 		rte_errno = EINVAL;
269 		return -rte_errno;
270 	}
271 
272 	dev = &rte_eth_devices[rxq->port_id];
273 
274 	if (dev->rx_pkt_burst == NULL ||
275 	    dev->rx_pkt_burst == rte_eth_pkt_burst_dummy) {
276 		rte_errno = ENOTSUP;
277 		return -rte_errno;
278 	}
279 
280 	return rx_queue_count(rxq);
281 }
282 
283 #define CLB_VAL_IDX 0
284 #define CLB_MSK_IDX 1
285 static int
286 mlx5_monitor_callback(const uint64_t value,
287 		const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
288 {
289 	const uint64_t m = opaque[CLB_MSK_IDX];
290 	const uint64_t v = opaque[CLB_VAL_IDX];
291 
292 	return (value & m) == v ? -1 : 0;
293 }
294 
295 int mlx5_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
296 {
297 	struct mlx5_rxq_data *rxq = rx_queue;
298 	const unsigned int cqe_num = 1 << rxq->cqe_n;
299 	const unsigned int cqe_mask = cqe_num - 1;
300 	const uint16_t idx = rxq->cq_ci & cqe_num;
301 	const uint8_t vic = rxq->cq_ci >> rxq->cqe_n;
302 	volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask];
303 
304 	if (unlikely(rxq->cqes == NULL)) {
305 		rte_errno = EINVAL;
306 		return -rte_errno;
307 	}
308 	if (rxq->cqe_comp_layout) {
309 		pmc->addr = &cqe->validity_iteration_count;
310 		pmc->opaque[CLB_VAL_IDX] = vic;
311 		pmc->opaque[CLB_MSK_IDX] = MLX5_CQE_VIC_INIT;
312 	} else {
313 		pmc->addr = &cqe->op_own;
314 		pmc->opaque[CLB_VAL_IDX] = !!idx;
315 		pmc->opaque[CLB_MSK_IDX] = MLX5_CQE_OWNER_MASK;
316 	}
317 	pmc->fn = mlx5_monitor_callback;
318 	pmc->size = sizeof(uint8_t);
319 	return 0;
320 }
321 
322 /**
323  * Translate RX completion flags to packet type.
324  *
325  * @param[in] rxq
326  *   Pointer to RX queue structure.
327  * @param[in] cqe
328  *   Pointer to CQE.
329  *
330  * @note: fix mlx5_dev_supported_ptypes_get() if any change here.
331  *
332  * @return
333  *   Packet type for struct rte_mbuf.
334  */
335 static inline uint32_t
336 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
337 				   volatile struct mlx5_mini_cqe8 *mcqe)
338 {
339 	uint8_t idx;
340 	uint8_t ptype;
341 	uint8_t pinfo = (cqe->pkt_info & 0x3) << 6;
342 
343 	/* Get l3/l4 header from mini-CQE in case L3/L4 format*/
344 	if (mcqe == NULL ||
345 	    rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX)
346 		ptype = (cqe->hdr_type_etc & 0xfc00) >> 10;
347 	else
348 		ptype = mcqe->hdr_type >> 2;
349 	/*
350 	 * The index to the array should have:
351 	 * bit[1:0] = l3_hdr_type
352 	 * bit[4:2] = l4_hdr_type
353 	 * bit[5] = ip_frag
354 	 * bit[6] = tunneled
355 	 * bit[7] = outer_l3_type
356 	 */
357 	idx = pinfo | ptype;
358 	return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6));
359 }
360 
361 /**
362  * Initialize Rx WQ and indexes.
363  *
364  * @param[in] rxq
365  *   Pointer to RX queue structure.
366  */
367 void
368 mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
369 {
370 	const unsigned int wqe_n = 1 << rxq->elts_n;
371 	unsigned int i;
372 
373 	for (i = 0; (i != wqe_n); ++i) {
374 		volatile struct mlx5_wqe_data_seg *scat;
375 		uintptr_t addr;
376 		uint32_t byte_count;
377 		uint32_t lkey;
378 
379 		if (mlx5_rxq_mprq_enabled(rxq)) {
380 			struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i];
381 
382 			scat = &((volatile struct mlx5_wqe_mprq *)
383 				rxq->wqes)[i].dseg;
384 			addr = (uintptr_t)mlx5_mprq_buf_addr
385 					(buf, RTE_BIT32(rxq->log_strd_num));
386 			byte_count = RTE_BIT32(rxq->log_strd_sz) *
387 				     RTE_BIT32(rxq->log_strd_num);
388 			lkey = mlx5_rx_addr2mr(rxq, addr);
389 		} else {
390 			struct rte_mbuf *buf = (*rxq->elts)[i];
391 
392 			scat = &((volatile struct mlx5_wqe_data_seg *)
393 					rxq->wqes)[i];
394 			addr = rte_pktmbuf_mtod(buf, uintptr_t);
395 			byte_count = DATA_LEN(buf);
396 			lkey = mlx5_rx_mb2mr(rxq, buf);
397 		}
398 		/* scat->addr must be able to store a pointer. */
399 		MLX5_ASSERT(sizeof(scat->addr) >= sizeof(uintptr_t));
400 		*scat = (struct mlx5_wqe_data_seg){
401 			.addr = rte_cpu_to_be_64(addr),
402 			.byte_count = rte_cpu_to_be_32(byte_count),
403 			.lkey = lkey,
404 		};
405 	}
406 	rxq->consumed_strd = 0;
407 	rxq->decompressed = 0;
408 	rxq->rq_pi = 0;
409 	rxq->zip = (struct rxq_zip){
410 		.ai = 0,
411 	};
412 	rxq->elts_ci = mlx5_rxq_mprq_enabled(rxq) ?
413 		(wqe_n >> rxq->sges_n) * RTE_BIT32(rxq->log_strd_num) : 0;
414 	/* Update doorbell counter. */
415 	rxq->rq_ci = wqe_n >> rxq->sges_n;
416 	rte_io_wmb();
417 	*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
418 }
419 
420 #define MLX5_ERROR_CQE_MASK 0x40000000
421 /* Must be negative. */
422 #define MLX5_REGULAR_ERROR_CQE_RET (-5)
423 #define MLX5_CRITICAL_ERROR_CQE_RET (-4)
424 /* Must not be negative. */
425 #define MLX5_RECOVERY_ERROR_RET 0
426 #define MLX5_RECOVERY_IGNORE_RET 1
427 #define MLX5_RECOVERY_COMPLETED_RET 2
428 
429 /**
430  * Handle a Rx error.
431  * The function inserts the RQ state to reset when the first error CQE is
432  * shown, then drains the CQ by the caller function loop. When the CQ is empty,
433  * it moves the RQ state to ready and initializes the RQ.
434  * Next CQE identification and error counting are in the caller responsibility.
435  *
436  * @param[in] rxq
437  *   Pointer to RX queue structure.
438  * @param[in] vec
439  *   1 when called from vectorized Rx burst, need to prepare mbufs for the RQ.
440  *   0 when called from non-vectorized Rx burst.
441  * @param[in] err_n
442  *   Number of CQEs to check for an error.
443  *
444  * @return
445  *   MLX5_RECOVERY_ERROR_RET in case of recovery error,
446  *   MLX5_RECOVERY_IGNORE_RET in case of non-critical error syndrome,
447  *   MLX5_RECOVERY_COMPLETED_RET in case of recovery is completed,
448  *   otherwise the CQE status after ignored error syndrome or queue reset.
449  */
450 int
451 mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec,
452 		   uint16_t err_n, uint16_t *skip_cnt)
453 {
454 	const uint16_t cqe_n = 1 << rxq->cqe_n;
455 	const uint16_t cqe_mask = cqe_n - 1;
456 	const uint16_t wqe_n = 1 << rxq->elts_n;
457 	const uint16_t strd_n = RTE_BIT32(rxq->log_strd_num);
458 	struct mlx5_rxq_ctrl *rxq_ctrl =
459 			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
460 	union {
461 		volatile struct mlx5_cqe *cqe;
462 		volatile struct mlx5_error_cqe *err_cqe;
463 	} u = {
464 		.cqe = &(*rxq->cqes)[(rxq->cq_ci - vec) & cqe_mask],
465 	};
466 	struct mlx5_mp_arg_queue_state_modify sm;
467 	bool critical_syndrome = false;
468 	int ret, i;
469 
470 	switch (rxq->err_state) {
471 	case MLX5_RXQ_ERR_STATE_IGNORE:
472 		ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci - vec);
473 		if (ret != MLX5_CQE_STATUS_ERR) {
474 			rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
475 			return ret;
476 		}
477 		/* Fall-through */
478 	case MLX5_RXQ_ERR_STATE_NO_ERROR:
479 		for (i = 0; i < (int)err_n; i++) {
480 			u.cqe = &(*rxq->cqes)[(rxq->cq_ci - vec - i) & cqe_mask];
481 			if (MLX5_CQE_OPCODE(u.cqe->op_own) == MLX5_CQE_RESP_ERR) {
482 				if (u.err_cqe->syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR ||
483 				    u.err_cqe->syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR ||
484 				    u.err_cqe->syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR)
485 					critical_syndrome = true;
486 				break;
487 			}
488 		}
489 		if (!critical_syndrome) {
490 			if (rxq->err_state == MLX5_RXQ_ERR_STATE_NO_ERROR) {
491 				*skip_cnt = 0;
492 				if (i == err_n)
493 					rxq->err_state = MLX5_RXQ_ERR_STATE_IGNORE;
494 			}
495 			return MLX5_RECOVERY_IGNORE_RET;
496 		}
497 		rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET;
498 		/* Fall-through */
499 	case MLX5_RXQ_ERR_STATE_NEED_RESET:
500 		sm.is_wq = 1;
501 		sm.queue_id = rxq->idx;
502 		sm.state = IBV_WQS_RESET;
503 		if (mlx5_queue_state_modify(RXQ_DEV(rxq_ctrl), &sm))
504 			return MLX5_RECOVERY_ERROR_RET;
505 		if (rxq_ctrl->dump_file_n <
506 		    RXQ_PORT(rxq_ctrl)->config.max_dump_files_num) {
507 			MKSTR(err_str, "Unexpected CQE error syndrome "
508 			      "0x%02x CQN = %u RQN = %u wqe_counter = %u"
509 			      " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome,
510 			      rxq->cqn, rxq_ctrl->wqn,
511 			      rte_be_to_cpu_16(u.err_cqe->wqe_counter),
512 			      rxq->rq_ci << rxq->sges_n, rxq->cq_ci);
513 			MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u",
514 			      rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc());
515 			mlx5_dump_debug_information(name, NULL, err_str, 0);
516 			mlx5_dump_debug_information(name, "MLX5 Error CQ:",
517 						    (const void *)((uintptr_t)
518 								    rxq->cqes),
519 						    sizeof(*u.cqe) * cqe_n);
520 			mlx5_dump_debug_information(name, "MLX5 Error RQ:",
521 						    (const void *)((uintptr_t)
522 								    rxq->wqes),
523 						    16 * wqe_n);
524 			rxq_ctrl->dump_file_n++;
525 		}
526 		rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY;
527 		/* Fall-through */
528 	case MLX5_RXQ_ERR_STATE_NEED_READY:
529 		ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci);
530 		if (ret == MLX5_CQE_STATUS_HW_OWN) {
531 			rte_io_wmb();
532 			*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
533 			rte_io_wmb();
534 			/*
535 			 * The RQ consumer index must be zeroed while moving
536 			 * from RESET state to RDY state.
537 			 */
538 			*rxq->rq_db = rte_cpu_to_be_32(0);
539 			rte_io_wmb();
540 			sm.is_wq = 1;
541 			sm.queue_id = rxq->idx;
542 			sm.state = IBV_WQS_RDY;
543 			if (mlx5_queue_state_modify(RXQ_DEV(rxq_ctrl), &sm))
544 				return MLX5_RECOVERY_ERROR_RET;
545 			if (vec) {
546 				const uint32_t elts_n =
547 					mlx5_rxq_mprq_enabled(rxq) ?
548 					wqe_n * strd_n : wqe_n;
549 				const uint32_t e_mask = elts_n - 1;
550 				uint32_t elts_ci =
551 					mlx5_rxq_mprq_enabled(rxq) ?
552 					rxq->elts_ci : rxq->rq_ci;
553 				uint32_t elt_idx;
554 				struct rte_mbuf **elt;
555 				unsigned int n = elts_n - (elts_ci -
556 							  rxq->rq_pi);
557 
558 				for (i = 0; i < (int)n; ++i) {
559 					elt_idx = (elts_ci + i) & e_mask;
560 					elt = &(*rxq->elts)[elt_idx];
561 					*elt = rte_mbuf_raw_alloc(rxq->mp);
562 					if (!*elt) {
563 						for (i--; i >= 0; --i) {
564 							elt_idx = (elts_ci +
565 								   i) & elts_n;
566 							elt = &(*rxq->elts)
567 								[elt_idx];
568 							rte_pktmbuf_free_seg
569 								(*elt);
570 						}
571 						return MLX5_RECOVERY_ERROR_RET;
572 					}
573 				}
574 				for (i = 0; i < (int)elts_n; ++i) {
575 					elt = &(*rxq->elts)[i];
576 					DATA_LEN(*elt) =
577 						(uint16_t)((*elt)->buf_len -
578 						rte_pktmbuf_headroom(*elt));
579 				}
580 				/* Padding with a fake mbuf for vec Rx. */
581 				for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
582 					(*rxq->elts)[elts_n + i] =
583 								&rxq->fake_mbuf;
584 			}
585 			mlx5_rxq_initialize(rxq);
586 			rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
587 			return MLX5_RECOVERY_COMPLETED_RET;
588 		}
589 		return ret;
590 	default:
591 		return MLX5_RECOVERY_ERROR_RET;
592 	}
593 }
594 
595 /**
596  * Get size of the next packet for a given CQE. For compressed CQEs, the
597  * consumer index is updated only once all packets of the current one have
598  * been processed.
599  *
600  * @param rxq
601  *   Pointer to RX queue.
602  * @param cqe
603  *   CQE to process.
604  * @param cqe_n
605  *   Completion queue count.
606  * @param cqe_mask
607  *   Completion queue mask.
608  * @param[out] mcqe
609  *   Store pointer to mini-CQE if compressed. Otherwise, the pointer is not
610  *   written.
611  * @param[out] skip_cnt
612  *   Number of packets skipped due to recoverable errors.
613  * @param mprq
614  *   Indication if it is called from MPRQ.
615  * @return
616  *   0 in case of empty CQE,
617  *   MLX5_REGULAR_ERROR_CQE_RET in case of error CQE,
618  *   MLX5_CRITICAL_ERROR_CQE_RET in case of error CQE lead to Rx queue reset,
619  *   otherwise the packet size in regular RxQ,
620  *   and striding byte count format in mprq case.
621  */
622 static inline int
623 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
624 		 uint16_t cqe_n, uint16_t cqe_mask,
625 		 volatile struct mlx5_mini_cqe8 **mcqe,
626 		 uint16_t *skip_cnt, bool mprq)
627 {
628 	struct rxq_zip *zip = &rxq->zip;
629 	int len = 0, ret = 0;
630 	uint32_t idx, end;
631 
632 	do {
633 		len = 0;
634 		/* Process compressed data in the CQE and mini arrays. */
635 		if (zip->ai) {
636 			volatile struct mlx5_mini_cqe8 (*mc)[8] =
637 				(volatile struct mlx5_mini_cqe8 (*)[8])
638 				(uintptr_t)(&(*rxq->cqes)[zip->ca &
639 							cqe_mask].pkt_info);
640 			len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt &
641 						rxq->byte_mask);
642 			*mcqe = &(*mc)[zip->ai & 7];
643 			if (rxq->cqe_comp_layout) {
644 				zip->ai++;
645 				if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) {
646 					rxq->cq_ci = zip->cq_ci;
647 					zip->ai = 0;
648 				}
649 			} else {
650 				if ((++zip->ai & 7) == 0) {
651 					/* Invalidate consumed CQEs */
652 					idx = zip->ca;
653 					end = zip->na;
654 					while (idx != end) {
655 						(*rxq->cqes)[idx & cqe_mask].op_own =
656 							MLX5_CQE_INVALIDATE;
657 						++idx;
658 					}
659 					/*
660 					 * Increment consumer index to skip the number
661 					 * of CQEs consumed. Hardware leaves holes in
662 					 * the CQ ring for software use.
663 					 */
664 					zip->ca = zip->na;
665 					zip->na += 8;
666 				}
667 				if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) {
668 					/* Invalidate the rest */
669 					idx = zip->ca;
670 					end = zip->cq_ci;
671 
672 					while (idx != end) {
673 						(*rxq->cqes)[idx & cqe_mask].op_own =
674 							MLX5_CQE_INVALIDATE;
675 						++idx;
676 					}
677 					rxq->cq_ci = zip->cq_ci;
678 					zip->ai = 0;
679 				}
680 			}
681 		/*
682 		 * No compressed data, get next CQE and verify if it is
683 		 * compressed.
684 		 */
685 		} else {
686 			int8_t op_own;
687 			uint32_t cq_ci;
688 
689 			ret = (rxq->cqe_comp_layout) ?
690 				check_cqe_iteration(cqe, rxq->cqe_n, rxq->cq_ci) :
691 				check_cqe(cqe, cqe_n, rxq->cq_ci);
692 			if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
693 				if (unlikely(ret == MLX5_CQE_STATUS_ERR ||
694 					     rxq->err_state)) {
695 					ret = mlx5_rx_err_handle(rxq, 0, 1, skip_cnt);
696 					if (ret == MLX5_CQE_STATUS_HW_OWN)
697 						return MLX5_ERROR_CQE_MASK;
698 					if (ret == MLX5_RECOVERY_ERROR_RET ||
699 						ret == MLX5_RECOVERY_COMPLETED_RET)
700 						return MLX5_CRITICAL_ERROR_CQE_RET;
701 					if (!mprq && ret == MLX5_RECOVERY_IGNORE_RET) {
702 						*skip_cnt = 1;
703 						++rxq->cq_ci;
704 						return MLX5_ERROR_CQE_MASK;
705 					}
706 				} else {
707 					return 0;
708 				}
709 			}
710 			/*
711 			 * Introduce the local variable to have queue cq_ci
712 			 * index in queue structure always consistent with
713 			 * actual CQE boundary (not pointing to the middle
714 			 * of compressed CQE session).
715 			 */
716 			cq_ci = rxq->cq_ci + !rxq->cqe_comp_layout;
717 			op_own = cqe->op_own;
718 			if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) {
719 				volatile struct mlx5_mini_cqe8 (*mc)[8] =
720 					(volatile struct mlx5_mini_cqe8 (*)[8])
721 					(uintptr_t)(&(*rxq->cqes)
722 						[cq_ci & cqe_mask].pkt_info);
723 
724 				/* Fix endianness. */
725 				zip->cqe_cnt = rxq->cqe_comp_layout ?
726 					(MLX5_CQE_NUM_MINIS(op_own) + 1U) :
727 					rte_be_to_cpu_32(cqe->byte_cnt);
728 				/*
729 				 * Current mini array position is the one
730 				 * returned by check_cqe64().
731 				 *
732 				 * If completion comprises several mini arrays,
733 				 * as a special case the second one is located
734 				 * 7 CQEs after the initial CQE instead of 8
735 				 * for subsequent ones.
736 				*/
737 				zip->ca = cq_ci;
738 				zip->na = zip->ca + 7;
739 				/* Compute the next non compressed CQE. */
740 				zip->cq_ci = rxq->cq_ci + zip->cqe_cnt;
741 				/* Get packet size to return. */
742 				len = rte_be_to_cpu_32((*mc)[0].byte_cnt &
743 							rxq->byte_mask);
744 				*mcqe = &(*mc)[0];
745 				if (rxq->cqe_comp_layout) {
746 					if (MLX5_CQE_NUM_MINIS(op_own))
747 						zip->ai = 1;
748 					else
749 						rxq->cq_ci = zip->cq_ci;
750 				} else {
751 					zip->ai = 1;
752 					/* Prefetch all to be invalidated */
753 					idx = zip->ca;
754 					end = zip->cq_ci;
755 					while (idx != end) {
756 						rte_prefetch0(&(*rxq->cqes)[(idx) & cqe_mask]);
757 						++idx;
758 					}
759 				}
760 			} else {
761 				++rxq->cq_ci;
762 				len = rte_be_to_cpu_32(cqe->byte_cnt);
763 				if (rxq->cqe_comp_layout) {
764 					volatile struct mlx5_cqe *next;
765 
766 					next = &(*rxq->cqes)[rxq->cq_ci & cqe_mask];
767 					ret = check_cqe_iteration(next, rxq->cqe_n, rxq->cq_ci);
768 					if (ret != MLX5_CQE_STATUS_SW_OWN ||
769 					    MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED)
770 						rte_memcpy(&rxq->title_cqe,
771 							   (const void *)(uintptr_t)cqe,
772 							   sizeof(struct mlx5_cqe));
773 				}
774 			}
775 		}
776 		if (unlikely(rxq->err_state)) {
777 			if (rxq->err_state == MLX5_RXQ_ERR_STATE_IGNORE &&
778 			    ret == MLX5_CQE_STATUS_SW_OWN) {
779 				rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
780 				return len & MLX5_ERROR_CQE_MASK;
781 			}
782 			cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask];
783 			++rxq->stats.idropped;
784 			(*skip_cnt) += mprq ? (len & MLX5_MPRQ_STRIDE_NUM_MASK) >>
785 				MLX5_MPRQ_STRIDE_NUM_SHIFT : 1;
786 		} else {
787 			return len;
788 		}
789 	} while (1);
790 }
791 
792 /**
793  * Translate RX completion flags to offload flags.
794  *
795  * @param[in] cqe
796  *   Pointer to CQE.
797  *
798  * @return
799  *   Offload flags (ol_flags) for struct rte_mbuf.
800  */
801 static inline uint32_t
802 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe)
803 {
804 	uint32_t ol_flags = 0;
805 	uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc);
806 
807 	ol_flags =
808 		TRANSPOSE(flags,
809 			  MLX5_CQE_RX_L3_HDR_VALID,
810 			  RTE_MBUF_F_RX_IP_CKSUM_GOOD) |
811 		TRANSPOSE(flags,
812 			  MLX5_CQE_RX_L4_HDR_VALID,
813 			  RTE_MBUF_F_RX_L4_CKSUM_GOOD);
814 	return ol_flags;
815 }
816 
817 /**
818  * Fill in mbuf fields from RX completion flags.
819  * Note that pkt->ol_flags should be initialized outside of this function.
820  *
821  * @param rxq
822  *   Pointer to RX queue.
823  * @param pkt
824  *   mbuf to fill.
825  * @param cqe
826  *   CQE to process.
827  * @param rss_hash_res
828  *   Packet RSS Hash result.
829  */
830 static inline void
831 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
832 	       volatile struct mlx5_cqe *cqe,
833 	       volatile struct mlx5_mini_cqe8 *mcqe)
834 {
835 	/* Update packet information. */
836 	pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe, mcqe);
837 	pkt->port = unlikely(rxq->shared) ? cqe->user_index_low : rxq->port_id;
838 
839 	if (rxq->rss_hash) {
840 		uint32_t rss_hash_res = 0;
841 
842 		/* If compressed, take hash result from mini-CQE. */
843 		if (mcqe == NULL ||
844 		    rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH)
845 			rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res);
846 		else
847 			rss_hash_res = rte_be_to_cpu_32(mcqe->rx_hash_result);
848 		if (rss_hash_res) {
849 			pkt->hash.rss = rss_hash_res;
850 			pkt->ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
851 		}
852 	}
853 	if (rxq->mark) {
854 		uint32_t mark = 0;
855 
856 		/* If compressed, take flow tag from mini-CQE. */
857 		if (mcqe == NULL ||
858 		    rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_FTAG_STRIDX)
859 			mark = cqe->sop_drop_qpn;
860 		else
861 			mark = ((mcqe->byte_cnt_flow & 0xff) << 8) |
862 				(mcqe->flow_tag_high << 16);
863 		if (MLX5_FLOW_MARK_IS_VALID(mark)) {
864 			pkt->ol_flags |= RTE_MBUF_F_RX_FDIR;
865 			if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) {
866 				pkt->ol_flags |= rxq->mark_flag;
867 				pkt->hash.fdir.hi = mlx5_flow_mark_get(mark);
868 			}
869 		}
870 	}
871 	if (rxq->dynf_meta) {
872 		uint32_t meta = rte_be_to_cpu_32(cqe->flow_table_metadata) &
873 			rxq->flow_meta_port_mask;
874 
875 		if (meta) {
876 			pkt->ol_flags |= rxq->flow_meta_mask;
877 			*RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset,
878 						uint32_t *) = meta;
879 		}
880 	}
881 	if (rxq->csum)
882 		pkt->ol_flags |= rxq_cq_to_ol_flags(cqe);
883 	if (rxq->vlan_strip) {
884 		bool vlan_strip;
885 
886 		if (mcqe == NULL ||
887 		    rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX)
888 			vlan_strip = cqe->hdr_type_etc &
889 				     RTE_BE16(MLX5_CQE_VLAN_STRIPPED);
890 		else
891 			vlan_strip = mcqe->hdr_type &
892 				     RTE_BE16(MLX5_CQE_VLAN_STRIPPED);
893 		if (vlan_strip) {
894 			pkt->ol_flags |= RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED;
895 			pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info);
896 		}
897 	}
898 	if (rxq->hw_timestamp) {
899 		uint64_t ts = rte_be_to_cpu_64(cqe->timestamp);
900 
901 		if (rxq->rt_timestamp)
902 			ts = mlx5_txpp_convert_rx_ts(rxq->sh, ts);
903 		mlx5_timestamp_set(pkt, rxq->timestamp_offset, ts);
904 		pkt->ol_flags |= rxq->timestamp_rx_flag;
905 	}
906 }
907 
908 /**
909  * DPDK callback for RX.
910  *
911  * @param dpdk_rxq
912  *   Generic pointer to RX queue structure.
913  * @param[out] pkts
914  *   Array to store received packets.
915  * @param pkts_n
916  *   Maximum number of packets in array.
917  *
918  * @return
919  *   Number of packets successfully received (<= pkts_n).
920  */
921 uint16_t
922 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
923 {
924 	struct mlx5_rxq_data *rxq = dpdk_rxq;
925 	const uint32_t wqe_n = 1 << rxq->elts_n;
926 	const uint32_t wqe_mask = wqe_n - 1;
927 	const uint32_t cqe_n = 1 << rxq->cqe_n;
928 	const uint32_t cqe_mask = cqe_n - 1;
929 	const unsigned int sges_n = rxq->sges_n;
930 	struct rte_mbuf *pkt = NULL;
931 	struct rte_mbuf *seg = NULL;
932 	volatile struct mlx5_cqe *cqe =
933 		&(*rxq->cqes)[rxq->cq_ci & cqe_mask];
934 	unsigned int i = 0;
935 	unsigned int rq_ci = rxq->rq_ci << sges_n;
936 	int len = 0; /* keep its value across iterations. */
937 
938 	while (pkts_n) {
939 		uint16_t skip_cnt;
940 		unsigned int idx = rq_ci & wqe_mask;
941 		volatile struct mlx5_wqe_data_seg *wqe =
942 			&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx];
943 		struct rte_mbuf *rep = (*rxq->elts)[idx];
944 		volatile struct mlx5_mini_cqe8 *mcqe = NULL;
945 
946 		if (pkt)
947 			NEXT(seg) = rep;
948 		seg = rep;
949 		rte_prefetch0(seg);
950 		rte_prefetch0(cqe);
951 		rte_prefetch0(wqe);
952 		/* Allocate the buf from the same pool. */
953 		rep = rte_mbuf_raw_alloc(seg->pool);
954 		if (unlikely(rep == NULL)) {
955 			++rxq->stats.rx_nombuf;
956 			if (!pkt) {
957 				/*
958 				 * no buffers before we even started,
959 				 * bail out silently.
960 				 */
961 				break;
962 			}
963 			while (pkt != seg) {
964 				MLX5_ASSERT(pkt != (*rxq->elts)[idx]);
965 				rep = NEXT(pkt);
966 				NEXT(pkt) = NULL;
967 				NB_SEGS(pkt) = 1;
968 				rte_mbuf_raw_free(pkt);
969 				pkt = rep;
970 			}
971 			rq_ci >>= sges_n;
972 			++rq_ci;
973 			rq_ci <<= sges_n;
974 			break;
975 		}
976 		if (!pkt) {
977 			cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask];
978 			len = mlx5_rx_poll_len(rxq, cqe, cqe_n, cqe_mask, &mcqe, &skip_cnt, false);
979 			if (unlikely(len & MLX5_ERROR_CQE_MASK)) {
980 				/* We drop packets with non-critical errors */
981 				rte_mbuf_raw_free(rep);
982 				if (len == MLX5_CRITICAL_ERROR_CQE_RET) {
983 					rq_ci = rxq->rq_ci << sges_n;
984 					break;
985 				}
986 				/* Skip specified amount of error CQEs packets */
987 				rq_ci >>= sges_n;
988 				rq_ci += skip_cnt;
989 				rq_ci <<= sges_n;
990 				MLX5_ASSERT(!pkt);
991 				continue;
992 			}
993 			if (len == 0) {
994 				rte_mbuf_raw_free(rep);
995 				break;
996 			}
997 			pkt = seg;
998 			MLX5_ASSERT(len >= (rxq->crc_present << 2));
999 			pkt->ol_flags &= RTE_MBUF_F_EXTERNAL;
1000 			if (rxq->cqe_comp_layout && mcqe)
1001 				cqe = &rxq->title_cqe;
1002 			rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe);
1003 			if (rxq->crc_present)
1004 				len -= RTE_ETHER_CRC_LEN;
1005 			PKT_LEN(pkt) = len;
1006 			if (cqe->lro_num_seg > 1) {
1007 				mlx5_lro_update_hdr
1008 					(rte_pktmbuf_mtod(pkt, uint8_t *), cqe,
1009 					 mcqe, rxq, len);
1010 				pkt->ol_flags |= RTE_MBUF_F_RX_LRO;
1011 				pkt->tso_segsz = len / cqe->lro_num_seg;
1012 			}
1013 		}
1014 		DATA_LEN(rep) = DATA_LEN(seg);
1015 		PKT_LEN(rep) = PKT_LEN(seg);
1016 		SET_DATA_OFF(rep, DATA_OFF(seg));
1017 		PORT(rep) = PORT(seg);
1018 		(*rxq->elts)[idx] = rep;
1019 		/*
1020 		 * Fill NIC descriptor with the new buffer. The lkey and size
1021 		 * of the buffers are already known, only the buffer address
1022 		 * changes.
1023 		 */
1024 		wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t));
1025 		/* If there's only one MR, no need to replace LKey in WQE. */
1026 		if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1))
1027 			wqe->lkey = mlx5_rx_mb2mr(rxq, rep);
1028 		if (len > DATA_LEN(seg)) {
1029 			len -= DATA_LEN(seg);
1030 			++NB_SEGS(pkt);
1031 			++rq_ci;
1032 			continue;
1033 		}
1034 		DATA_LEN(seg) = len;
1035 #ifdef MLX5_PMD_SOFT_COUNTERS
1036 		/* Increment bytes counter. */
1037 		rxq->stats.ibytes += PKT_LEN(pkt);
1038 #endif
1039 		/* Return packet. */
1040 		*(pkts++) = pkt;
1041 		pkt = NULL;
1042 		--pkts_n;
1043 		++i;
1044 		/* Align consumer index to the next stride. */
1045 		rq_ci >>= sges_n;
1046 		++rq_ci;
1047 		rq_ci <<= sges_n;
1048 	}
1049 	if (unlikely(i == 0 && ((rq_ci >> sges_n) == rxq->rq_ci)))
1050 		return 0;
1051 	/* Update the consumer index. */
1052 	rxq->rq_ci = rq_ci >> sges_n;
1053 	rte_io_wmb();
1054 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
1055 	rte_io_wmb();
1056 	*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
1057 #ifdef MLX5_PMD_SOFT_COUNTERS
1058 	/* Increment packets counter. */
1059 	rxq->stats.ipackets += i;
1060 #endif
1061 	return i;
1062 }
1063 
1064 /**
1065  * Update LRO packet TCP header.
1066  * The HW LRO feature doesn't update the TCP header after coalescing the
1067  * TCP segments but supplies information in CQE to fill it by SW.
1068  *
1069  * @param tcp
1070  *   Pointer to the TCP header.
1071  * @param cqe
1072  *   Pointer to the completion entry.
1073  * @param phcsum
1074  *   The L3 pseudo-header checksum.
1075  */
1076 static inline void
1077 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp,
1078 			volatile struct mlx5_cqe *__rte_restrict cqe,
1079 			uint32_t phcsum, uint8_t l4_type)
1080 {
1081 	/*
1082 	 * The HW calculates only the TCP payload checksum, need to complete
1083 	 * the TCP header checksum and the L3 pseudo-header checksum.
1084 	 */
1085 	uint32_t csum = phcsum + cqe->csum;
1086 
1087 	if (l4_type == MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK ||
1088 	    l4_type == MLX5_L4_HDR_TYPE_TCP_WITH_ACL) {
1089 		tcp->tcp_flags |= RTE_TCP_ACK_FLAG;
1090 		tcp->recv_ack = cqe->lro_ack_seq_num;
1091 		tcp->rx_win = cqe->lro_tcp_win;
1092 	}
1093 	if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK)
1094 		tcp->tcp_flags |= RTE_TCP_PSH_FLAG;
1095 	tcp->cksum = 0;
1096 	csum += rte_raw_cksum(tcp, (tcp->data_off >> 4) * 4);
1097 	csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff);
1098 	csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff);
1099 	csum = (~csum) & 0xffff;
1100 	if (csum == 0)
1101 		csum = 0xffff;
1102 	tcp->cksum = csum;
1103 }
1104 
1105 /**
1106  * Update LRO packet headers.
1107  * The HW LRO feature doesn't update the L3/TCP headers after coalescing the
1108  * TCP segments but supply information in CQE to fill it by SW.
1109  *
1110  * @param padd
1111  *   The packet address.
1112  * @param cqe
1113  *   Pointer to the completion entry.
1114  * @param len
1115  *   The packet length.
1116  */
1117 static inline void
1118 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd,
1119 		    volatile struct mlx5_cqe *__rte_restrict cqe,
1120 		    volatile struct mlx5_mini_cqe8 *mcqe,
1121 		    struct mlx5_rxq_data *rxq, uint32_t len)
1122 {
1123 	union {
1124 		struct rte_ether_hdr *eth;
1125 		struct rte_vlan_hdr *vlan;
1126 		struct rte_ipv4_hdr *ipv4;
1127 		struct rte_ipv6_hdr *ipv6;
1128 		struct rte_tcp_hdr *tcp;
1129 		uint8_t *hdr;
1130 	} h = {
1131 		.hdr = padd,
1132 	};
1133 	uint16_t proto = h.eth->ether_type;
1134 	uint32_t phcsum;
1135 	uint8_t l4_type;
1136 
1137 	h.eth++;
1138 	while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) ||
1139 	       proto == RTE_BE16(RTE_ETHER_TYPE_QINQ)) {
1140 		proto = h.vlan->eth_proto;
1141 		h.vlan++;
1142 	}
1143 	if (proto == RTE_BE16(RTE_ETHER_TYPE_IPV4)) {
1144 		h.ipv4->time_to_live = cqe->lro_min_ttl;
1145 		h.ipv4->total_length = rte_cpu_to_be_16(len - (h.hdr - padd));
1146 		h.ipv4->hdr_checksum = 0;
1147 		h.ipv4->hdr_checksum = rte_ipv4_cksum(h.ipv4);
1148 		phcsum = rte_ipv4_phdr_cksum(h.ipv4, 0);
1149 		h.ipv4++;
1150 	} else {
1151 		h.ipv6->hop_limits = cqe->lro_min_ttl;
1152 		h.ipv6->payload_len = rte_cpu_to_be_16(len - (h.hdr - padd) -
1153 						       sizeof(*h.ipv6));
1154 		phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0);
1155 		h.ipv6++;
1156 	}
1157 	if (mcqe == NULL ||
1158 	    rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX)
1159 		l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) &
1160 			   MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT;
1161 	else
1162 		l4_type = (rte_be_to_cpu_16(mcqe->hdr_type) &
1163 			   MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT;
1164 	mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum, l4_type);
1165 }
1166 
1167 void
1168 mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf)
1169 {
1170 	mlx5_mprq_buf_free_cb(NULL, buf);
1171 }
1172 
1173 /**
1174  * DPDK callback for RX with Multi-Packet RQ support.
1175  *
1176  * @param dpdk_rxq
1177  *   Generic pointer to RX queue structure.
1178  * @param[out] pkts
1179  *   Array to store received packets.
1180  * @param pkts_n
1181  *   Maximum number of packets in array.
1182  *
1183  * @return
1184  *   Number of packets successfully received (<= pkts_n).
1185  */
1186 uint16_t
1187 mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
1188 {
1189 	struct mlx5_rxq_data *rxq = dpdk_rxq;
1190 	const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num);
1191 	const uint32_t strd_sz = RTE_BIT32(rxq->log_strd_sz);
1192 	const uint32_t cqe_n = 1 << rxq->cqe_n;
1193 	const uint32_t cq_mask = cqe_n - 1;
1194 	const uint32_t wqe_n = 1 << rxq->elts_n;
1195 	const uint32_t wq_mask = wqe_n - 1;
1196 	volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
1197 	unsigned int i = 0;
1198 	uint32_t rq_ci = rxq->rq_ci;
1199 	uint16_t consumed_strd = rxq->consumed_strd;
1200 	struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
1201 
1202 	while (i < pkts_n) {
1203 		struct rte_mbuf *pkt;
1204 		int ret;
1205 		uint32_t len;
1206 		uint16_t strd_cnt;
1207 		uint16_t strd_idx;
1208 		uint32_t byte_cnt;
1209 		uint16_t skip_cnt;
1210 		volatile struct mlx5_mini_cqe8 *mcqe = NULL;
1211 		enum mlx5_rqx_code rxq_code;
1212 
1213 		if (consumed_strd == strd_n) {
1214 			/* Replace WQE if the buffer is still in use. */
1215 			mprq_buf_replace(rxq, rq_ci & wq_mask);
1216 			/* Advance to the next WQE. */
1217 			consumed_strd = 0;
1218 			++rq_ci;
1219 			buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
1220 		}
1221 		cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
1222 		ret = mlx5_rx_poll_len(rxq, cqe, cqe_n, cq_mask, &mcqe, &skip_cnt, true);
1223 		if (unlikely(ret & MLX5_ERROR_CQE_MASK)) {
1224 			if (ret == MLX5_CRITICAL_ERROR_CQE_RET) {
1225 				rq_ci = rxq->rq_ci;
1226 				consumed_strd = rxq->consumed_strd;
1227 				break;
1228 			}
1229 			consumed_strd += skip_cnt;
1230 			while (consumed_strd >= strd_n) {
1231 				/* Replace WQE if the buffer is still in use. */
1232 				mprq_buf_replace(rxq, rq_ci & wq_mask);
1233 				/* Advance to the next WQE. */
1234 				consumed_strd -= strd_n;
1235 				++rq_ci;
1236 				buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
1237 			}
1238 			cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
1239 		}
1240 		if (ret == 0)
1241 			break;
1242 		byte_cnt = ret;
1243 		len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT;
1244 		MLX5_ASSERT((int)len >= (rxq->crc_present << 2));
1245 		if (rxq->crc_present)
1246 			len -= RTE_ETHER_CRC_LEN;
1247 		if (mcqe &&
1248 		    rxq->mcqe_format == MLX5_CQE_RESP_FORMAT_FTAG_STRIDX)
1249 			strd_cnt = (len / strd_sz) + !!(len % strd_sz);
1250 		else
1251 			strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >>
1252 				   MLX5_MPRQ_STRIDE_NUM_SHIFT;
1253 		MLX5_ASSERT(strd_cnt);
1254 		consumed_strd += strd_cnt;
1255 		if (byte_cnt & MLX5_MPRQ_FILLER_MASK)
1256 			continue;
1257 		if (rxq->cqe_comp_layout && mcqe)
1258 			cqe = &rxq->title_cqe;
1259 		strd_idx = rte_be_to_cpu_16(mcqe == NULL ?
1260 					cqe->wqe_counter :
1261 					mcqe->stride_idx);
1262 		MLX5_ASSERT(strd_idx < strd_n);
1263 		MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) &
1264 			    wq_mask));
1265 		pkt = rte_pktmbuf_alloc(rxq->mp);
1266 		if (unlikely(pkt == NULL)) {
1267 			++rxq->stats.rx_nombuf;
1268 			break;
1269 		}
1270 		len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT;
1271 		MLX5_ASSERT((int)len >= (rxq->crc_present << 2));
1272 		if (rxq->crc_present)
1273 			len -= RTE_ETHER_CRC_LEN;
1274 		rxq_code = mprq_buf_to_pkt(rxq, pkt, len, buf,
1275 					   strd_idx, strd_cnt);
1276 		if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
1277 			rte_pktmbuf_free_seg(pkt);
1278 			if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
1279 				++rxq->stats.idropped;
1280 				continue;
1281 			}
1282 			if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
1283 				++rxq->stats.rx_nombuf;
1284 				break;
1285 			}
1286 		}
1287 		rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe);
1288 		if (cqe->lro_num_seg > 1) {
1289 			mlx5_lro_update_hdr(rte_pktmbuf_mtod(pkt, uint8_t *),
1290 					    cqe, mcqe, rxq, len);
1291 			pkt->ol_flags |= RTE_MBUF_F_RX_LRO;
1292 			pkt->tso_segsz = len / cqe->lro_num_seg;
1293 		}
1294 		PKT_LEN(pkt) = len;
1295 		PORT(pkt) = rxq->port_id;
1296 #ifdef MLX5_PMD_SOFT_COUNTERS
1297 		/* Increment bytes counter. */
1298 		rxq->stats.ibytes += PKT_LEN(pkt);
1299 #endif
1300 		/* Return packet. */
1301 		*(pkts++) = pkt;
1302 		++i;
1303 	}
1304 	/* Update the consumer indexes. */
1305 	rxq->consumed_strd = consumed_strd;
1306 	rte_io_wmb();
1307 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
1308 	if (rq_ci != rxq->rq_ci) {
1309 		rxq->rq_ci = rq_ci;
1310 		rte_io_wmb();
1311 		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
1312 	}
1313 #ifdef MLX5_PMD_SOFT_COUNTERS
1314 	/* Increment packets counter. */
1315 	rxq->stats.ipackets += i;
1316 #endif
1317 	return i;
1318 }
1319 
1320 int
1321 mlx5_rx_queue_lwm_query(struct rte_eth_dev *dev,
1322 			uint16_t *queue_id, uint8_t *lwm)
1323 {
1324 	struct mlx5_priv *priv = dev->data->dev_private;
1325 	unsigned int rxq_id, found = 0, n;
1326 	struct mlx5_rxq_priv *rxq;
1327 
1328 	if (!queue_id)
1329 		return -EINVAL;
1330 	/* Query all the Rx queues of the port in a circular way. */
1331 	for (rxq_id = *queue_id, n = 0; n < priv->rxqs_n; n++) {
1332 		rxq = mlx5_rxq_get(dev, rxq_id);
1333 		if (rxq && rxq->lwm_event_pending) {
1334 			pthread_mutex_lock(&priv->sh->lwm_config_lock);
1335 			rxq->lwm_event_pending = 0;
1336 			pthread_mutex_unlock(&priv->sh->lwm_config_lock);
1337 			*queue_id = rxq_id;
1338 			found = 1;
1339 			if (lwm)
1340 				*lwm =  mlx5_rxq_lwm_to_percentage(rxq);
1341 			break;
1342 		}
1343 		rxq_id = (rxq_id + 1) % priv->rxqs_n;
1344 	}
1345 	return found;
1346 }
1347 
1348 /**
1349  * Rte interrupt handler for LWM event.
1350  * It first checks if the event arrives, if so process the callback for
1351  * RTE_ETH_EVENT_RX_LWM.
1352  *
1353  * @param args
1354  *   Generic pointer to mlx5_priv.
1355  */
1356 void
1357 mlx5_dev_interrupt_handler_lwm(void *args)
1358 {
1359 	struct mlx5_priv *priv = args;
1360 	struct mlx5_rxq_priv *rxq;
1361 	struct rte_eth_dev *dev;
1362 	int ret, rxq_idx = 0, port_id = 0;
1363 
1364 	ret = priv->obj_ops.rxq_event_get_lwm(priv, &rxq_idx, &port_id);
1365 	if (unlikely(ret < 0)) {
1366 		DRV_LOG(WARNING, "Cannot get LWM event context.");
1367 		return;
1368 	}
1369 	DRV_LOG(INFO, "%s get LWM event, port_id:%d rxq_id:%d.", __func__,
1370 		port_id, rxq_idx);
1371 	dev = &rte_eth_devices[port_id];
1372 	rxq = mlx5_rxq_get(dev, rxq_idx);
1373 	if (rxq) {
1374 		pthread_mutex_lock(&priv->sh->lwm_config_lock);
1375 		rxq->lwm_event_pending = 1;
1376 		pthread_mutex_unlock(&priv->sh->lwm_config_lock);
1377 	}
1378 	rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_RX_AVAIL_THRESH, NULL);
1379 }
1380 
1381 /**
1382  * DPDK callback to arm an Rx queue LWM(limit watermark) event.
1383  * While the Rx queue fullness reaches the LWM limit, the driver catches
1384  * an HW event and invokes the user event callback.
1385  * After the last event handling, the user needs to call this API again
1386  * to arm an additional event.
1387  *
1388  * @param dev
1389  *   Pointer to the device structure.
1390  * @param[in] rx_queue_id
1391  *   Rx queue identificator.
1392  * @param[in] lwm
1393  *   The LWM value, is defined by a percentage of the Rx queue size.
1394  *   [1-99] to set a new LWM (update the old value).
1395  *   0 to unarm the event.
1396  *
1397  * @return
1398  *   0 : operation success.
1399  *   Otherwise:
1400  *   - ENOMEM - not enough memory to create LWM event channel.
1401  *   - EINVAL - the input Rxq is not created by devx.
1402  *   - E2BIG  - lwm is bigger than 99.
1403  */
1404 int
1405 mlx5_rx_queue_lwm_set(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1406 		      uint8_t lwm)
1407 {
1408 	struct mlx5_priv *priv = dev->data->dev_private;
1409 	uint16_t port_id = PORT_ID(priv);
1410 	struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, rx_queue_id);
1411 	uint16_t event_nums[1] = {MLX5_EVENT_TYPE_SRQ_LIMIT_REACHED};
1412 	struct mlx5_rxq_data *rxq_data;
1413 	uint32_t wqe_cnt;
1414 	uint64_t cookie;
1415 	int ret = 0;
1416 
1417 	if (!rxq) {
1418 		rte_errno = EINVAL;
1419 		return -rte_errno;
1420 	}
1421 	rxq_data = &rxq->ctrl->rxq;
1422 	/* Ensure the Rq is created by devx. */
1423 	if (priv->obj_ops.rxq_obj_new != devx_obj_ops.rxq_obj_new) {
1424 		rte_errno = EINVAL;
1425 		return -rte_errno;
1426 	}
1427 	if (lwm > 99) {
1428 		DRV_LOG(WARNING, "Too big LWM configuration.");
1429 		rte_errno = E2BIG;
1430 		return -rte_errno;
1431 	}
1432 	/* Start config LWM. */
1433 	pthread_mutex_lock(&priv->sh->lwm_config_lock);
1434 	if (rxq->lwm == 0 && lwm == 0) {
1435 		/* Both old/new values are 0, do nothing. */
1436 		ret = 0;
1437 		goto end;
1438 	}
1439 	wqe_cnt = 1 << (rxq_data->elts_n - rxq_data->sges_n);
1440 	if (lwm) {
1441 		if (!priv->sh->devx_channel_lwm) {
1442 			ret = mlx5_lwm_setup(priv);
1443 			if (ret) {
1444 				DRV_LOG(WARNING,
1445 					"Failed to create shared_lwm.");
1446 				rte_errno = ENOMEM;
1447 				ret = -rte_errno;
1448 				goto end;
1449 			}
1450 		}
1451 		if (!rxq->lwm_devx_subscribed) {
1452 			cookie = ((uint32_t)
1453 				  (port_id << LWM_COOKIE_PORTID_OFFSET)) |
1454 				(rx_queue_id << LWM_COOKIE_RXQID_OFFSET);
1455 			ret = mlx5_os_devx_subscribe_devx_event
1456 				(priv->sh->devx_channel_lwm,
1457 				 rxq->devx_rq.rq->obj,
1458 				 sizeof(event_nums),
1459 				 event_nums,
1460 				 cookie);
1461 			if (ret) {
1462 				rte_errno = rte_errno ? rte_errno : EINVAL;
1463 				ret = -rte_errno;
1464 				goto end;
1465 			}
1466 			rxq->lwm_devx_subscribed = 1;
1467 		}
1468 	}
1469 	/* Save LWM to rxq and send modify_rq devx command. */
1470 	rxq->lwm = lwm * wqe_cnt / 100;
1471 	/* Prevent integer division loss when switch lwm number to percentage. */
1472 	if (lwm && (lwm * wqe_cnt % 100)) {
1473 		rxq->lwm = ((uint32_t)(rxq->lwm + 1) >= wqe_cnt) ?
1474 			rxq->lwm : (rxq->lwm + 1);
1475 	}
1476 	if (lwm && !rxq->lwm) {
1477 		/* With mprq, wqe_cnt may be < 100. */
1478 		DRV_LOG(WARNING, "Too small LWM configuration.");
1479 		rte_errno = EINVAL;
1480 		ret = -rte_errno;
1481 		goto end;
1482 	}
1483 	ret = mlx5_devx_modify_rq(rxq, MLX5_RXQ_MOD_RDY2RDY);
1484 end:
1485 	pthread_mutex_unlock(&priv->sh->lwm_config_lock);
1486 	return ret;
1487 }
1488 
1489 /**
1490  * Mlx5 access register function to configure host shaper.
1491  * It calls API in libmtcr_ul to access QSHR(Qos Shaper Host Register)
1492  * in firmware.
1493  *
1494  * @param dev
1495  *   Pointer to rte_eth_dev.
1496  * @param lwm_triggered
1497  *   Flag to enable/disable lwm_triggered bit in QSHR.
1498  * @param rate
1499  *   Host shaper rate, unit is 100Mbps, set to 0 means disable the shaper.
1500  * @return
1501  *   0 : operation success.
1502  *   Otherwise:
1503  *   - ENOENT - no ibdev interface.
1504  *   - EBUSY  - the register access unit is busy.
1505  *   - EIO    - the register access command meets IO error.
1506  */
1507 static int
1508 mlxreg_host_shaper_config(struct rte_eth_dev *dev,
1509 			  bool lwm_triggered, uint8_t rate)
1510 {
1511 #ifdef HAVE_MLX5_MSTFLINT
1512 	struct mlx5_priv *priv = dev->data->dev_private;
1513 	uint32_t data[MLX5_ST_SZ_DW(register_qshr)] = {0};
1514 	int rc, retry_count = 3;
1515 	mfile *mf = NULL;
1516 	int status;
1517 	void *ptr;
1518 
1519 	mf = mopen(priv->sh->ibdev_name);
1520 	if (!mf) {
1521 		DRV_LOG(WARNING, "mopen failed\n");
1522 		rte_errno = ENOENT;
1523 		return -rte_errno;
1524 	}
1525 	MLX5_SET(register_qshr, data, connected_host, 1);
1526 	MLX5_SET(register_qshr, data, fast_response, lwm_triggered ? 1 : 0);
1527 	MLX5_SET(register_qshr, data, local_port, 1);
1528 	ptr = MLX5_ADDR_OF(register_qshr, data, global_config);
1529 	MLX5_SET(ets_global_config_register, ptr, rate_limit_update, 1);
1530 	MLX5_SET(ets_global_config_register, ptr, max_bw_units,
1531 		 rate ? ETS_GLOBAL_CONFIG_BW_UNIT_HUNDREDS_MBPS :
1532 		 ETS_GLOBAL_CONFIG_BW_UNIT_DISABLED);
1533 	MLX5_SET(ets_global_config_register, ptr, max_bw_value, rate);
1534 	do {
1535 		rc = maccess_reg(mf,
1536 				 MLX5_QSHR_REGISTER_ID,
1537 				 MACCESS_REG_METHOD_SET,
1538 				 (u_int32_t *)&data[0],
1539 				 sizeof(data),
1540 				 sizeof(data),
1541 				 sizeof(data),
1542 				 &status);
1543 		if ((rc != ME_ICMD_STATUS_IFC_BUSY &&
1544 		     status != ME_REG_ACCESS_BAD_PARAM) ||
1545 		    !(mf->flags & MDEVS_REM)) {
1546 			break;
1547 		}
1548 		DRV_LOG(WARNING, "%s retry.", __func__);
1549 		usleep(10000);
1550 	} while (retry_count-- > 0);
1551 	mclose(mf);
1552 	rte_errno = (rc == ME_REG_ACCESS_DEV_BUSY) ? EBUSY : EIO;
1553 	return rc ? -rte_errno : 0;
1554 #else
1555 	(void)dev;
1556 	(void)lwm_triggered;
1557 	(void)rate;
1558 	return -1;
1559 #endif
1560 }
1561 
1562 int rte_pmd_mlx5_host_shaper_config(int port_id, uint8_t rate,
1563 				    uint32_t flags)
1564 {
1565 	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
1566 	struct mlx5_priv *priv = dev->data->dev_private;
1567 	bool lwm_triggered =
1568 	     !!(flags & RTE_BIT32(RTE_PMD_MLX5_HOST_SHAPER_FLAG_AVAIL_THRESH_TRIGGERED));
1569 
1570 	if (!lwm_triggered) {
1571 		priv->sh->host_shaper_rate = rate;
1572 	} else {
1573 		switch (rate) {
1574 		case 0:
1575 		/* Rate 0 means disable lwm_triggered. */
1576 			priv->sh->lwm_triggered = 0;
1577 			break;
1578 		case 1:
1579 		/* Rate 1 means enable lwm_triggered. */
1580 			priv->sh->lwm_triggered = 1;
1581 			break;
1582 		default:
1583 			return -ENOTSUP;
1584 		}
1585 	}
1586 	return mlxreg_host_shaper_config(dev, priv->sh->lwm_triggered,
1587 					 priv->sh->host_shaper_rate);
1588 }
1589 
1590 /**
1591  * Dump RQ/CQ Context to a file.
1592  *
1593  * @param[in] port_id
1594  *   Port ID
1595  * @param[in] queue_id
1596  *   Queue ID
1597  * @param[in] filename
1598  *   Name of file to dump the Rx Queue Context
1599  *
1600  * @return
1601  *   0 for Success, non-zero value depending on failure type
1602  */
1603 int rte_pmd_mlx5_rxq_dump_contexts(uint16_t port_id, uint16_t queue_id, const char *filename)
1604 {
1605 	struct rte_eth_dev *dev;
1606 	struct mlx5_rxq_priv *rxq;
1607 	struct mlx5_rxq_ctrl *rxq_ctrl;
1608 	struct mlx5_rxq_obj *rxq_obj;
1609 	struct mlx5_devx_rq *rq;
1610 	struct mlx5_devx_cq *cq;
1611 	struct mlx5_devx_obj *rq_devx_obj;
1612 	struct mlx5_devx_obj *cq_devx_obj;
1613 
1614 	uint32_t rq_out[MLX5_ST_SZ_DW(query_rq_out)] = {0};
1615 	uint32_t cq_out[MLX5_ST_SZ_DW(query_cq_out)] = {0};
1616 
1617 	int ret;
1618 	FILE *fd;
1619 	MKSTR(path, "./%s", filename);
1620 
1621 	if (!rte_eth_dev_is_valid_port(port_id))
1622 		return -ENODEV;
1623 
1624 	if (rte_eth_rx_queue_is_valid(port_id, queue_id))
1625 		return -EINVAL;
1626 
1627 	fd = fopen(path, "w");
1628 	if (!fd) {
1629 		rte_errno = errno;
1630 		return -EIO;
1631 	}
1632 
1633 	dev = &rte_eth_devices[port_id];
1634 	rxq = mlx5_rxq_ref(dev, queue_id);
1635 	rxq_ctrl = rxq->ctrl;
1636 	rxq_obj = rxq_ctrl->obj;
1637 	rq = &rxq->devx_rq;
1638 	cq = &rxq_obj->cq_obj;
1639 	rq_devx_obj = rq->rq;
1640 	cq_devx_obj = cq->cq;
1641 
1642 	do {
1643 		ret = mlx5_devx_cmd_query_rq(rq_devx_obj, rq_out, sizeof(rq_out));
1644 		if (ret)
1645 			break;
1646 
1647 		/* Dump rq query output to file */
1648 		MKSTR(rq_headline, "RQ DevX ID = %u Port = %u Queue index = %u ",
1649 					rq_devx_obj->id, port_id, queue_id);
1650 		mlx5_dump_to_file(fd, NULL, rq_headline, 0);
1651 		mlx5_dump_to_file(fd, "Query RQ Dump:",
1652 					(const void *)((uintptr_t)rq_out),
1653 					sizeof(rq_out));
1654 
1655 		ret = mlx5_devx_cmd_query_cq(cq_devx_obj, cq_out, sizeof(cq_out));
1656 		if (ret)
1657 			break;
1658 
1659 		/* Dump cq query output to file */
1660 		MKSTR(cq_headline, "CQ DevX ID = %u Port = %u Queue index = %u ",
1661 					cq_devx_obj->id, port_id, queue_id);
1662 		mlx5_dump_to_file(fd, NULL, cq_headline, 0);
1663 		mlx5_dump_to_file(fd, "Query CQ Dump:",
1664 					(const void *)((uintptr_t)cq_out),
1665 					sizeof(cq_out));
1666 	} while (false);
1667 
1668 	fclose(fd);
1669 	return ret;
1670 }
1671