xref: /dpdk/drivers/net/mlx5/mlx5_rxq.c (revision 3cc6ecfdfe85d2577fef30e1791bb7534e3d60b3)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5 
6 #include <stddef.h>
7 #include <errno.h>
8 #include <string.h>
9 #include <stdint.h>
10 #include <fcntl.h>
11 #include <sys/queue.h>
12 
13 #include <rte_mbuf.h>
14 #include <rte_malloc.h>
15 #include <rte_ethdev_driver.h>
16 #include <rte_common.h>
17 #include <rte_interrupts.h>
18 #include <rte_debug.h>
19 #include <rte_io.h>
20 #include <rte_eal_paging.h>
21 
22 #include <mlx5_glue.h>
23 #include <mlx5_devx_cmds.h>
24 #include <mlx5_malloc.h>
25 
26 #include "mlx5_defs.h"
27 #include "mlx5.h"
28 #include "mlx5_common_os.h"
29 #include "mlx5_rxtx.h"
30 #include "mlx5_utils.h"
31 #include "mlx5_autoconf.h"
32 #include "mlx5_flow.h"
33 
34 
35 /* Default RSS hash key also used for ConnectX-3. */
36 uint8_t rss_hash_default_key[] = {
37 	0x2c, 0xc6, 0x81, 0xd1,
38 	0x5b, 0xdb, 0xf4, 0xf7,
39 	0xfc, 0xa2, 0x83, 0x19,
40 	0xdb, 0x1a, 0x3e, 0x94,
41 	0x6b, 0x9e, 0x38, 0xd9,
42 	0x2c, 0x9c, 0x03, 0xd1,
43 	0xad, 0x99, 0x44, 0xa7,
44 	0xd9, 0x56, 0x3d, 0x59,
45 	0x06, 0x3c, 0x25, 0xf3,
46 	0xfc, 0x1f, 0xdc, 0x2a,
47 };
48 
49 /* Length of the default RSS hash key. */
50 static_assert(MLX5_RSS_HASH_KEY_LEN ==
51 	      (unsigned int)sizeof(rss_hash_default_key),
52 	      "wrong RSS default key size.");
53 
54 /**
55  * Check whether Multi-Packet RQ can be enabled for the device.
56  *
57  * @param dev
58  *   Pointer to Ethernet device.
59  *
60  * @return
61  *   1 if supported, negative errno value if not.
62  */
63 inline int
64 mlx5_check_mprq_support(struct rte_eth_dev *dev)
65 {
66 	struct mlx5_priv *priv = dev->data->dev_private;
67 
68 	if (priv->config.mprq.enabled &&
69 	    priv->rxqs_n >= priv->config.mprq.min_rxqs_num)
70 		return 1;
71 	return -ENOTSUP;
72 }
73 
74 /**
75  * Check whether Multi-Packet RQ is enabled for the Rx queue.
76  *
77  *  @param rxq
78  *     Pointer to receive queue structure.
79  *
80  * @return
81  *   0 if disabled, otherwise enabled.
82  */
83 inline int
84 mlx5_rxq_mprq_enabled(struct mlx5_rxq_data *rxq)
85 {
86 	return rxq->strd_num_n > 0;
87 }
88 
89 /**
90  * Check whether Multi-Packet RQ is enabled for the device.
91  *
92  * @param dev
93  *   Pointer to Ethernet device.
94  *
95  * @return
96  *   0 if disabled, otherwise enabled.
97  */
98 inline int
99 mlx5_mprq_enabled(struct rte_eth_dev *dev)
100 {
101 	struct mlx5_priv *priv = dev->data->dev_private;
102 	uint32_t i;
103 	uint16_t n = 0;
104 	uint16_t n_ibv = 0;
105 
106 	if (mlx5_check_mprq_support(dev) < 0)
107 		return 0;
108 	/* All the configured queues should be enabled. */
109 	for (i = 0; i < priv->rxqs_n; ++i) {
110 		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
111 		struct mlx5_rxq_ctrl *rxq_ctrl = container_of
112 			(rxq, struct mlx5_rxq_ctrl, rxq);
113 
114 		if (rxq == NULL || rxq_ctrl->type != MLX5_RXQ_TYPE_STANDARD)
115 			continue;
116 		n_ibv++;
117 		if (mlx5_rxq_mprq_enabled(rxq))
118 			++n;
119 	}
120 	/* Multi-Packet RQ can't be partially configured. */
121 	MLX5_ASSERT(n == 0 || n == n_ibv);
122 	return n == n_ibv;
123 }
124 
125 /**
126  * Allocate RX queue elements for Multi-Packet RQ.
127  *
128  * @param rxq_ctrl
129  *   Pointer to RX queue structure.
130  *
131  * @return
132  *   0 on success, a negative errno value otherwise and rte_errno is set.
133  */
134 static int
135 rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
136 {
137 	struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
138 	unsigned int wqe_n = 1 << rxq->elts_n;
139 	unsigned int i;
140 	int err;
141 
142 	/* Iterate on segments. */
143 	for (i = 0; i <= wqe_n; ++i) {
144 		struct mlx5_mprq_buf *buf;
145 
146 		if (rte_mempool_get(rxq->mprq_mp, (void **)&buf) < 0) {
147 			DRV_LOG(ERR, "port %u empty mbuf pool", rxq->port_id);
148 			rte_errno = ENOMEM;
149 			goto error;
150 		}
151 		if (i < wqe_n)
152 			(*rxq->mprq_bufs)[i] = buf;
153 		else
154 			rxq->mprq_repl = buf;
155 	}
156 	DRV_LOG(DEBUG,
157 		"port %u Rx queue %u allocated and configured %u segments",
158 		rxq->port_id, rxq->idx, wqe_n);
159 	return 0;
160 error:
161 	err = rte_errno; /* Save rte_errno before cleanup. */
162 	wqe_n = i;
163 	for (i = 0; (i != wqe_n); ++i) {
164 		if ((*rxq->mprq_bufs)[i] != NULL)
165 			rte_mempool_put(rxq->mprq_mp,
166 					(*rxq->mprq_bufs)[i]);
167 		(*rxq->mprq_bufs)[i] = NULL;
168 	}
169 	DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
170 		rxq->port_id, rxq->idx);
171 	rte_errno = err; /* Restore rte_errno. */
172 	return -rte_errno;
173 }
174 
175 /**
176  * Allocate RX queue elements for Single-Packet RQ.
177  *
178  * @param rxq_ctrl
179  *   Pointer to RX queue structure.
180  *
181  * @return
182  *   0 on success, errno value on failure.
183  */
184 static int
185 rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
186 {
187 	const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
188 	unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
189 	unsigned int i;
190 	int err;
191 
192 	/* Iterate on segments. */
193 	for (i = 0; (i != elts_n); ++i) {
194 		struct rte_mbuf *buf;
195 
196 		buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
197 		if (buf == NULL) {
198 			DRV_LOG(ERR, "port %u empty mbuf pool",
199 				PORT_ID(rxq_ctrl->priv));
200 			rte_errno = ENOMEM;
201 			goto error;
202 		}
203 		/* Headroom is reserved by rte_pktmbuf_alloc(). */
204 		MLX5_ASSERT(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
205 		/* Buffer is supposed to be empty. */
206 		MLX5_ASSERT(rte_pktmbuf_data_len(buf) == 0);
207 		MLX5_ASSERT(rte_pktmbuf_pkt_len(buf) == 0);
208 		MLX5_ASSERT(!buf->next);
209 		/* Only the first segment keeps headroom. */
210 		if (i % sges_n)
211 			SET_DATA_OFF(buf, 0);
212 		PORT(buf) = rxq_ctrl->rxq.port_id;
213 		DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
214 		PKT_LEN(buf) = DATA_LEN(buf);
215 		NB_SEGS(buf) = 1;
216 		(*rxq_ctrl->rxq.elts)[i] = buf;
217 	}
218 	/* If Rx vector is activated. */
219 	if (mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
220 		struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
221 		struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
222 		struct rte_pktmbuf_pool_private *priv =
223 			(struct rte_pktmbuf_pool_private *)
224 				rte_mempool_get_priv(rxq_ctrl->rxq.mp);
225 		int j;
226 
227 		/* Initialize default rearm_data for vPMD. */
228 		mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
229 		rte_mbuf_refcnt_set(mbuf_init, 1);
230 		mbuf_init->nb_segs = 1;
231 		mbuf_init->port = rxq->port_id;
232 		if (priv->flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF)
233 			mbuf_init->ol_flags = EXT_ATTACHED_MBUF;
234 		/*
235 		 * prevent compiler reordering:
236 		 * rearm_data covers previous fields.
237 		 */
238 		rte_compiler_barrier();
239 		rxq->mbuf_initializer =
240 			*(rte_xmm_t *)&mbuf_init->rearm_data;
241 		/* Padding with a fake mbuf for vectorized Rx. */
242 		for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
243 			(*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
244 	}
245 	DRV_LOG(DEBUG,
246 		"port %u Rx queue %u allocated and configured %u segments"
247 		" (max %u packets)",
248 		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx, elts_n,
249 		elts_n / (1 << rxq_ctrl->rxq.sges_n));
250 	return 0;
251 error:
252 	err = rte_errno; /* Save rte_errno before cleanup. */
253 	elts_n = i;
254 	for (i = 0; (i != elts_n); ++i) {
255 		if ((*rxq_ctrl->rxq.elts)[i] != NULL)
256 			rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
257 		(*rxq_ctrl->rxq.elts)[i] = NULL;
258 	}
259 	DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
260 		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx);
261 	rte_errno = err; /* Restore rte_errno. */
262 	return -rte_errno;
263 }
264 
265 /**
266  * Allocate RX queue elements.
267  *
268  * @param rxq_ctrl
269  *   Pointer to RX queue structure.
270  *
271  * @return
272  *   0 on success, errno value on failure.
273  */
274 int
275 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
276 {
277 	return mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
278 	       rxq_alloc_elts_mprq(rxq_ctrl) : rxq_alloc_elts_sprq(rxq_ctrl);
279 }
280 
281 /**
282  * Free RX queue elements for Multi-Packet RQ.
283  *
284  * @param rxq_ctrl
285  *   Pointer to RX queue structure.
286  */
287 static void
288 rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
289 {
290 	struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
291 	uint16_t i;
292 
293 	DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing WRs",
294 		rxq->port_id, rxq->idx);
295 	if (rxq->mprq_bufs == NULL)
296 		return;
297 	MLX5_ASSERT(mlx5_rxq_check_vec_support(rxq) < 0);
298 	for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
299 		if ((*rxq->mprq_bufs)[i] != NULL)
300 			mlx5_mprq_buf_free((*rxq->mprq_bufs)[i]);
301 		(*rxq->mprq_bufs)[i] = NULL;
302 	}
303 	if (rxq->mprq_repl != NULL) {
304 		mlx5_mprq_buf_free(rxq->mprq_repl);
305 		rxq->mprq_repl = NULL;
306 	}
307 }
308 
309 /**
310  * Free RX queue elements for Single-Packet RQ.
311  *
312  * @param rxq_ctrl
313  *   Pointer to RX queue structure.
314  */
315 static void
316 rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
317 {
318 	struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
319 	const uint16_t q_n = (1 << rxq->elts_n);
320 	const uint16_t q_mask = q_n - 1;
321 	uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
322 	uint16_t i;
323 
324 	DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs",
325 		PORT_ID(rxq_ctrl->priv), rxq->idx);
326 	if (rxq->elts == NULL)
327 		return;
328 	/**
329 	 * Some mbuf in the Ring belongs to the application.  They cannot be
330 	 * freed.
331 	 */
332 	if (mlx5_rxq_check_vec_support(rxq) > 0) {
333 		for (i = 0; i < used; ++i)
334 			(*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL;
335 		rxq->rq_pi = rxq->rq_ci;
336 	}
337 	for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
338 		if ((*rxq->elts)[i] != NULL)
339 			rte_pktmbuf_free_seg((*rxq->elts)[i]);
340 		(*rxq->elts)[i] = NULL;
341 	}
342 }
343 
344 /**
345  * Free RX queue elements.
346  *
347  * @param rxq_ctrl
348  *   Pointer to RX queue structure.
349  */
350 static void
351 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
352 {
353 	if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq))
354 		rxq_free_elts_mprq(rxq_ctrl);
355 	else
356 		rxq_free_elts_sprq(rxq_ctrl);
357 }
358 
359 /**
360  * Returns the per-queue supported offloads.
361  *
362  * @param dev
363  *   Pointer to Ethernet device.
364  *
365  * @return
366  *   Supported Rx offloads.
367  */
368 uint64_t
369 mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev)
370 {
371 	struct mlx5_priv *priv = dev->data->dev_private;
372 	struct mlx5_dev_config *config = &priv->config;
373 	uint64_t offloads = (DEV_RX_OFFLOAD_SCATTER |
374 			     DEV_RX_OFFLOAD_TIMESTAMP |
375 			     DEV_RX_OFFLOAD_JUMBO_FRAME |
376 			     DEV_RX_OFFLOAD_RSS_HASH);
377 
378 	if (config->hw_fcs_strip)
379 		offloads |= DEV_RX_OFFLOAD_KEEP_CRC;
380 
381 	if (config->hw_csum)
382 		offloads |= (DEV_RX_OFFLOAD_IPV4_CKSUM |
383 			     DEV_RX_OFFLOAD_UDP_CKSUM |
384 			     DEV_RX_OFFLOAD_TCP_CKSUM);
385 	if (config->hw_vlan_strip)
386 		offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
387 	if (MLX5_LRO_SUPPORTED(dev))
388 		offloads |= DEV_RX_OFFLOAD_TCP_LRO;
389 	return offloads;
390 }
391 
392 
393 /**
394  * Returns the per-port supported offloads.
395  *
396  * @return
397  *   Supported Rx offloads.
398  */
399 uint64_t
400 mlx5_get_rx_port_offloads(void)
401 {
402 	uint64_t offloads = DEV_RX_OFFLOAD_VLAN_FILTER;
403 
404 	return offloads;
405 }
406 
407 /**
408  * Verify if the queue can be released.
409  *
410  * @param dev
411  *   Pointer to Ethernet device.
412  * @param idx
413  *   RX queue index.
414  *
415  * @return
416  *   1 if the queue can be released
417  *   0 if the queue can not be released, there are references to it.
418  *   Negative errno and rte_errno is set if queue doesn't exist.
419  */
420 static int
421 mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx)
422 {
423 	struct mlx5_priv *priv = dev->data->dev_private;
424 	struct mlx5_rxq_ctrl *rxq_ctrl;
425 
426 	if (!(*priv->rxqs)[idx]) {
427 		rte_errno = EINVAL;
428 		return -rte_errno;
429 	}
430 	rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
431 	return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1);
432 }
433 
434 /* Fetches and drops all SW-owned and error CQEs to synchronize CQ. */
435 static void
436 rxq_sync_cq(struct mlx5_rxq_data *rxq)
437 {
438 	const uint16_t cqe_n = 1 << rxq->cqe_n;
439 	const uint16_t cqe_mask = cqe_n - 1;
440 	volatile struct mlx5_cqe *cqe;
441 	int ret, i;
442 
443 	i = cqe_n;
444 	do {
445 		cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask];
446 		ret = check_cqe(cqe, cqe_n, rxq->cq_ci);
447 		if (ret == MLX5_CQE_STATUS_HW_OWN)
448 			break;
449 		if (ret == MLX5_CQE_STATUS_ERR) {
450 			rxq->cq_ci++;
451 			continue;
452 		}
453 		MLX5_ASSERT(ret == MLX5_CQE_STATUS_SW_OWN);
454 		if (MLX5_CQE_FORMAT(cqe->op_own) != MLX5_COMPRESSED) {
455 			rxq->cq_ci++;
456 			continue;
457 		}
458 		/* Compute the next non compressed CQE. */
459 		rxq->cq_ci += rte_be_to_cpu_32(cqe->byte_cnt);
460 
461 	} while (--i);
462 	/* Move all CQEs to HW ownership, including possible MiniCQEs. */
463 	for (i = 0; i < cqe_n; i++) {
464 		cqe = &(*rxq->cqes)[i];
465 		cqe->op_own = MLX5_CQE_INVALIDATE;
466 	}
467 	/* Resync CQE and WQE (WQ in RESET state). */
468 	rte_cio_wmb();
469 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
470 	rte_cio_wmb();
471 	*rxq->rq_db = rte_cpu_to_be_32(0);
472 	rte_cio_wmb();
473 }
474 
475 /**
476  * Rx queue stop. Device queue goes to the RESET state,
477  * all involved mbufs are freed from WQ.
478  *
479  * @param dev
480  *   Pointer to Ethernet device structure.
481  * @param idx
482  *   RX queue index.
483  *
484  * @return
485  *   0 on success, a negative errno value otherwise and rte_errno is set.
486  */
487 int
488 mlx5_rx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t idx)
489 {
490 	struct mlx5_priv *priv = dev->data->dev_private;
491 	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
492 	struct mlx5_rxq_ctrl *rxq_ctrl =
493 			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
494 	int ret;
495 
496 	MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
497 	if (rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_IBV) {
498 		struct ibv_wq_attr mod = {
499 			.attr_mask = IBV_WQ_ATTR_STATE,
500 			.wq_state = IBV_WQS_RESET,
501 		};
502 
503 		ret = mlx5_glue->modify_wq(rxq_ctrl->obj->wq, &mod);
504 	} else { /* rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ. */
505 		struct mlx5_devx_modify_rq_attr rq_attr;
506 
507 		memset(&rq_attr, 0, sizeof(rq_attr));
508 		rq_attr.rq_state = MLX5_RQC_STATE_RST;
509 		rq_attr.state = MLX5_RQC_STATE_RDY;
510 		ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
511 	}
512 	if (ret) {
513 		DRV_LOG(ERR, "Cannot change Rx WQ state to RESET:  %s",
514 			strerror(errno));
515 		rte_errno = errno;
516 		return ret;
517 	}
518 	/* Remove all processes CQEs. */
519 	rxq_sync_cq(rxq);
520 	/* Free all involved mbufs. */
521 	rxq_free_elts(rxq_ctrl);
522 	/* Set the actual queue state. */
523 	dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STOPPED;
524 	return 0;
525 }
526 
527 /**
528  * Rx queue stop. Device queue goes to the RESET state,
529  * all involved mbufs are freed from WQ.
530  *
531  * @param dev
532  *   Pointer to Ethernet device structure.
533  * @param idx
534  *   RX queue index.
535  *
536  * @return
537  *   0 on success, a negative errno value otherwise and rte_errno is set.
538  */
539 int
540 mlx5_rx_queue_stop(struct rte_eth_dev *dev, uint16_t idx)
541 {
542 	eth_rx_burst_t pkt_burst = dev->rx_pkt_burst;
543 	int ret;
544 
545 	if (dev->data->rx_queue_state[idx] == RTE_ETH_QUEUE_STATE_HAIRPIN) {
546 		DRV_LOG(ERR, "Hairpin queue can't be stopped");
547 		rte_errno = EINVAL;
548 		return -EINVAL;
549 	}
550 	if (dev->data->rx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STOPPED)
551 		return 0;
552 	/*
553 	 * Vectorized Rx burst requires the CQ and RQ indices
554 	 * synchronized, that might be broken on RQ restart
555 	 * and cause Rx malfunction, so queue stopping is
556 	 * not supported if vectorized Rx burst is engaged.
557 	 * The routine pointer depends on the process
558 	 * type, should perform check there.
559 	 */
560 	if (pkt_burst == mlx5_rx_burst) {
561 		DRV_LOG(ERR, "Rx queue stop is not supported "
562 			"for vectorized Rx");
563 		rte_errno = EINVAL;
564 		return -EINVAL;
565 	}
566 	if (rte_eal_process_type() ==  RTE_PROC_SECONDARY) {
567 		ret = mlx5_mp_os_req_queue_control(dev, idx,
568 						   MLX5_MP_REQ_QUEUE_RX_STOP);
569 	} else {
570 		ret = mlx5_rx_queue_stop_primary(dev, idx);
571 	}
572 	return ret;
573 }
574 
575 /**
576  * Rx queue start. Device queue goes to the ready state,
577  * all required mbufs are allocated and WQ is replenished.
578  *
579  * @param dev
580  *   Pointer to Ethernet device structure.
581  * @param idx
582  *   RX queue index.
583  *
584  * @return
585  *   0 on success, a negative errno value otherwise and rte_errno is set.
586  */
587 int
588 mlx5_rx_queue_start_primary(struct rte_eth_dev *dev, uint16_t idx)
589 {
590 	struct mlx5_priv *priv = dev->data->dev_private;
591 	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
592 	struct mlx5_rxq_ctrl *rxq_ctrl =
593 			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
594 	int ret;
595 
596 	MLX5_ASSERT(rte_eal_process_type() ==  RTE_PROC_PRIMARY);
597 	/* Allocate needed buffers. */
598 	ret = rxq_alloc_elts(rxq_ctrl);
599 	if (ret) {
600 		DRV_LOG(ERR, "Cannot reallocate buffers for Rx WQ");
601 		rte_errno = errno;
602 		return ret;
603 	}
604 	rte_cio_wmb();
605 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
606 	rte_cio_wmb();
607 	/* Reset RQ consumer before moving queue ro READY state. */
608 	*rxq->rq_db = rte_cpu_to_be_32(0);
609 	rte_cio_wmb();
610 	if (rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_IBV) {
611 		struct ibv_wq_attr mod = {
612 			.attr_mask = IBV_WQ_ATTR_STATE,
613 			.wq_state = IBV_WQS_RDY,
614 		};
615 
616 		ret = mlx5_glue->modify_wq(rxq_ctrl->obj->wq, &mod);
617 	} else { /* rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ. */
618 		struct mlx5_devx_modify_rq_attr rq_attr;
619 
620 		memset(&rq_attr, 0, sizeof(rq_attr));
621 		rq_attr.rq_state = MLX5_RQC_STATE_RDY;
622 		rq_attr.state = MLX5_RQC_STATE_RST;
623 		ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
624 	}
625 	if (ret) {
626 		DRV_LOG(ERR, "Cannot change Rx WQ state to READY:  %s",
627 			strerror(errno));
628 		rte_errno = errno;
629 		return ret;
630 	}
631 	/* Reinitialize RQ - set WQEs. */
632 	mlx5_rxq_initialize(rxq);
633 	rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
634 	/* Set actual queue state. */
635 	dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
636 	return 0;
637 }
638 
639 /**
640  * Rx queue start. Device queue goes to the ready state,
641  * all required mbufs are allocated and WQ is replenished.
642  *
643  * @param dev
644  *   Pointer to Ethernet device structure.
645  * @param idx
646  *   RX queue index.
647  *
648  * @return
649  *   0 on success, a negative errno value otherwise and rte_errno is set.
650  */
651 int
652 mlx5_rx_queue_start(struct rte_eth_dev *dev, uint16_t idx)
653 {
654 	int ret;
655 
656 	if (dev->data->rx_queue_state[idx] == RTE_ETH_QUEUE_STATE_HAIRPIN) {
657 		DRV_LOG(ERR, "Hairpin queue can't be started");
658 		rte_errno = EINVAL;
659 		return -EINVAL;
660 	}
661 	if (dev->data->rx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STARTED)
662 		return 0;
663 	if (rte_eal_process_type() ==  RTE_PROC_SECONDARY) {
664 		ret = mlx5_mp_os_req_queue_control(dev, idx,
665 						   MLX5_MP_REQ_QUEUE_RX_START);
666 	} else {
667 		ret = mlx5_rx_queue_start_primary(dev, idx);
668 	}
669 	return ret;
670 }
671 
672 /**
673  * Rx queue presetup checks.
674  *
675  * @param dev
676  *   Pointer to Ethernet device structure.
677  * @param idx
678  *   RX queue index.
679  * @param desc
680  *   Number of descriptors to configure in queue.
681  *
682  * @return
683  *   0 on success, a negative errno value otherwise and rte_errno is set.
684  */
685 static int
686 mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc)
687 {
688 	struct mlx5_priv *priv = dev->data->dev_private;
689 
690 	if (!rte_is_power_of_2(*desc)) {
691 		*desc = 1 << log2above(*desc);
692 		DRV_LOG(WARNING,
693 			"port %u increased number of descriptors in Rx queue %u"
694 			" to the next power of two (%d)",
695 			dev->data->port_id, idx, *desc);
696 	}
697 	DRV_LOG(DEBUG, "port %u configuring Rx queue %u for %u descriptors",
698 		dev->data->port_id, idx, *desc);
699 	if (idx >= priv->rxqs_n) {
700 		DRV_LOG(ERR, "port %u Rx queue index out of range (%u >= %u)",
701 			dev->data->port_id, idx, priv->rxqs_n);
702 		rte_errno = EOVERFLOW;
703 		return -rte_errno;
704 	}
705 	if (!mlx5_rxq_releasable(dev, idx)) {
706 		DRV_LOG(ERR, "port %u unable to release queue index %u",
707 			dev->data->port_id, idx);
708 		rte_errno = EBUSY;
709 		return -rte_errno;
710 	}
711 	mlx5_rxq_release(dev, idx);
712 	return 0;
713 }
714 
715 /**
716  *
717  * @param dev
718  *   Pointer to Ethernet device structure.
719  * @param idx
720  *   RX queue index.
721  * @param desc
722  *   Number of descriptors to configure in queue.
723  * @param socket
724  *   NUMA socket on which memory must be allocated.
725  * @param[in] conf
726  *   Thresholds parameters.
727  * @param mp
728  *   Memory pool for buffer allocations.
729  *
730  * @return
731  *   0 on success, a negative errno value otherwise and rte_errno is set.
732  */
733 int
734 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
735 		    unsigned int socket, const struct rte_eth_rxconf *conf,
736 		    struct rte_mempool *mp)
737 {
738 	struct mlx5_priv *priv = dev->data->dev_private;
739 	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
740 	struct mlx5_rxq_ctrl *rxq_ctrl =
741 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
742 	int res;
743 
744 	res = mlx5_rx_queue_pre_setup(dev, idx, &desc);
745 	if (res)
746 		return res;
747 	rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, mp);
748 	if (!rxq_ctrl) {
749 		DRV_LOG(ERR, "port %u unable to allocate queue index %u",
750 			dev->data->port_id, idx);
751 		rte_errno = ENOMEM;
752 		return -rte_errno;
753 	}
754 	DRV_LOG(DEBUG, "port %u adding Rx queue %u to list",
755 		dev->data->port_id, idx);
756 	(*priv->rxqs)[idx] = &rxq_ctrl->rxq;
757 	return 0;
758 }
759 
760 /**
761  *
762  * @param dev
763  *   Pointer to Ethernet device structure.
764  * @param idx
765  *   RX queue index.
766  * @param desc
767  *   Number of descriptors to configure in queue.
768  * @param hairpin_conf
769  *   Hairpin configuration parameters.
770  *
771  * @return
772  *   0 on success, a negative errno value otherwise and rte_errno is set.
773  */
774 int
775 mlx5_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
776 			    uint16_t desc,
777 			    const struct rte_eth_hairpin_conf *hairpin_conf)
778 {
779 	struct mlx5_priv *priv = dev->data->dev_private;
780 	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
781 	struct mlx5_rxq_ctrl *rxq_ctrl =
782 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
783 	int res;
784 
785 	res = mlx5_rx_queue_pre_setup(dev, idx, &desc);
786 	if (res)
787 		return res;
788 	if (hairpin_conf->peer_count != 1 ||
789 	    hairpin_conf->peers[0].port != dev->data->port_id ||
790 	    hairpin_conf->peers[0].queue >= priv->txqs_n) {
791 		DRV_LOG(ERR, "port %u unable to setup hairpin queue index %u "
792 			" invalid hairpind configuration", dev->data->port_id,
793 			idx);
794 		rte_errno = EINVAL;
795 		return -rte_errno;
796 	}
797 	rxq_ctrl = mlx5_rxq_hairpin_new(dev, idx, desc, hairpin_conf);
798 	if (!rxq_ctrl) {
799 		DRV_LOG(ERR, "port %u unable to allocate queue index %u",
800 			dev->data->port_id, idx);
801 		rte_errno = ENOMEM;
802 		return -rte_errno;
803 	}
804 	DRV_LOG(DEBUG, "port %u adding Rx queue %u to list",
805 		dev->data->port_id, idx);
806 	(*priv->rxqs)[idx] = &rxq_ctrl->rxq;
807 	return 0;
808 }
809 
810 /**
811  * DPDK callback to release a RX queue.
812  *
813  * @param dpdk_rxq
814  *   Generic RX queue pointer.
815  */
816 void
817 mlx5_rx_queue_release(void *dpdk_rxq)
818 {
819 	struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq;
820 	struct mlx5_rxq_ctrl *rxq_ctrl;
821 	struct mlx5_priv *priv;
822 
823 	if (rxq == NULL)
824 		return;
825 	rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
826 	priv = rxq_ctrl->priv;
827 	if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.idx))
828 		rte_panic("port %u Rx queue %u is still used by a flow and"
829 			  " cannot be removed\n",
830 			  PORT_ID(priv), rxq->idx);
831 	mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.idx);
832 }
833 
834 /**
835  * Get an Rx queue Verbs/DevX object.
836  *
837  * @param dev
838  *   Pointer to Ethernet device.
839  * @param idx
840  *   Queue index in DPDK Rx queue array
841  *
842  * @return
843  *   The Verbs/DevX object if it exists.
844  */
845 static struct mlx5_rxq_obj *
846 mlx5_rxq_obj_get(struct rte_eth_dev *dev, uint16_t idx)
847 {
848 	struct mlx5_priv *priv = dev->data->dev_private;
849 	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
850 	struct mlx5_rxq_ctrl *rxq_ctrl;
851 
852 	if (idx >= priv->rxqs_n)
853 		return NULL;
854 	if (!rxq_data)
855 		return NULL;
856 	rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
857 	if (rxq_ctrl->obj)
858 		rte_atomic32_inc(&rxq_ctrl->obj->refcnt);
859 	return rxq_ctrl->obj;
860 }
861 
862 /**
863  * Release the resources allocated for an RQ DevX object.
864  *
865  * @param rxq_ctrl
866  *   DevX Rx queue object.
867  */
868 static void
869 rxq_release_devx_rq_resources(struct mlx5_rxq_ctrl *rxq_ctrl)
870 {
871 	if (rxq_ctrl->rxq.wqes) {
872 		mlx5_free((void *)(uintptr_t)rxq_ctrl->rxq.wqes);
873 		rxq_ctrl->rxq.wqes = NULL;
874 	}
875 	if (rxq_ctrl->wq_umem) {
876 		mlx5_glue->devx_umem_dereg(rxq_ctrl->wq_umem);
877 		rxq_ctrl->wq_umem = NULL;
878 	}
879 }
880 
881 /**
882  * Release the resources allocated for the Rx CQ DevX object.
883  *
884  * @param rxq_ctrl
885  *   DevX Rx queue object.
886  */
887 static void
888 rxq_release_devx_cq_resources(struct mlx5_rxq_ctrl *rxq_ctrl)
889 {
890 	if (rxq_ctrl->rxq.cqes) {
891 		rte_free((void *)(uintptr_t)rxq_ctrl->rxq.cqes);
892 		rxq_ctrl->rxq.cqes = NULL;
893 	}
894 	if (rxq_ctrl->cq_umem) {
895 		mlx5_glue->devx_umem_dereg(rxq_ctrl->cq_umem);
896 		rxq_ctrl->cq_umem = NULL;
897 	}
898 }
899 
900 /**
901  * Release an Rx hairpin related resources.
902  *
903  * @param rxq_obj
904  *   Hairpin Rx queue object.
905  */
906 static void
907 rxq_obj_hairpin_release(struct mlx5_rxq_obj *rxq_obj)
908 {
909 	struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
910 
911 	MLX5_ASSERT(rxq_obj);
912 	rq_attr.state = MLX5_RQC_STATE_RST;
913 	rq_attr.rq_state = MLX5_RQC_STATE_RDY;
914 	mlx5_devx_cmd_modify_rq(rxq_obj->rq, &rq_attr);
915 	claim_zero(mlx5_devx_cmd_destroy(rxq_obj->rq));
916 }
917 
918 /**
919  * Release an Rx verbs/DevX queue object.
920  *
921  * @param rxq_obj
922  *   Verbs/DevX Rx queue object.
923  *
924  * @return
925  *   1 while a reference on it exists, 0 when freed.
926  */
927 static int
928 mlx5_rxq_obj_release(struct mlx5_rxq_obj *rxq_obj)
929 {
930 	MLX5_ASSERT(rxq_obj);
931 	if (rte_atomic32_dec_and_test(&rxq_obj->refcnt)) {
932 		switch (rxq_obj->type) {
933 		case MLX5_RXQ_OBJ_TYPE_IBV:
934 			MLX5_ASSERT(rxq_obj->wq);
935 			MLX5_ASSERT(rxq_obj->ibv_cq);
936 			rxq_free_elts(rxq_obj->rxq_ctrl);
937 			claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
938 			claim_zero(mlx5_glue->destroy_cq(rxq_obj->ibv_cq));
939 			if (rxq_obj->ibv_channel)
940 				claim_zero(mlx5_glue->destroy_comp_channel
941 					   (rxq_obj->ibv_channel));
942 			break;
943 		case MLX5_RXQ_OBJ_TYPE_DEVX_RQ:
944 			MLX5_ASSERT(rxq_obj->rq);
945 			MLX5_ASSERT(rxq_obj->devx_cq);
946 			rxq_free_elts(rxq_obj->rxq_ctrl);
947 			claim_zero(mlx5_devx_cmd_destroy(rxq_obj->rq));
948 			claim_zero(mlx5_devx_cmd_destroy(rxq_obj->devx_cq));
949 			if (rxq_obj->devx_channel)
950 				mlx5_glue->devx_destroy_event_channel
951 							(rxq_obj->devx_channel);
952 			rxq_release_devx_rq_resources(rxq_obj->rxq_ctrl);
953 			rxq_release_devx_cq_resources(rxq_obj->rxq_ctrl);
954 			break;
955 		case MLX5_RXQ_OBJ_TYPE_DEVX_HAIRPIN:
956 			MLX5_ASSERT(rxq_obj->rq);
957 			rxq_obj_hairpin_release(rxq_obj);
958 			break;
959 		}
960 		LIST_REMOVE(rxq_obj, next);
961 		mlx5_free(rxq_obj);
962 		return 0;
963 	}
964 	return 1;
965 }
966 
967 /**
968  * Allocate queue vector and fill epoll fd list for Rx interrupts.
969  *
970  * @param dev
971  *   Pointer to Ethernet device.
972  *
973  * @return
974  *   0 on success, a negative errno value otherwise and rte_errno is set.
975  */
976 int
977 mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev)
978 {
979 	struct mlx5_priv *priv = dev->data->dev_private;
980 	unsigned int i;
981 	unsigned int rxqs_n = priv->rxqs_n;
982 	unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
983 	unsigned int count = 0;
984 	struct rte_intr_handle *intr_handle = dev->intr_handle;
985 
986 	if (!dev->data->dev_conf.intr_conf.rxq)
987 		return 0;
988 	mlx5_rx_intr_vec_disable(dev);
989 	intr_handle->intr_vec = mlx5_malloc(0,
990 				n * sizeof(intr_handle->intr_vec[0]),
991 				0, SOCKET_ID_ANY);
992 	if (intr_handle->intr_vec == NULL) {
993 		DRV_LOG(ERR,
994 			"port %u failed to allocate memory for interrupt"
995 			" vector, Rx interrupts will not be supported",
996 			dev->data->port_id);
997 		rte_errno = ENOMEM;
998 		return -rte_errno;
999 	}
1000 	intr_handle->type = RTE_INTR_HANDLE_EXT;
1001 	for (i = 0; i != n; ++i) {
1002 		/* This rxq obj must not be released in this function. */
1003 		struct mlx5_rxq_obj *rxq_obj = mlx5_rxq_obj_get(dev, i);
1004 		int rc;
1005 
1006 		/* Skip queues that cannot request interrupts. */
1007 		if (!rxq_obj || (!rxq_obj->ibv_channel &&
1008 				 !rxq_obj->devx_channel)) {
1009 			/* Use invalid intr_vec[] index to disable entry. */
1010 			intr_handle->intr_vec[i] =
1011 				RTE_INTR_VEC_RXTX_OFFSET +
1012 				RTE_MAX_RXTX_INTR_VEC_ID;
1013 			continue;
1014 		}
1015 		if (count >= RTE_MAX_RXTX_INTR_VEC_ID) {
1016 			DRV_LOG(ERR,
1017 				"port %u too many Rx queues for interrupt"
1018 				" vector size (%d), Rx interrupts cannot be"
1019 				" enabled",
1020 				dev->data->port_id, RTE_MAX_RXTX_INTR_VEC_ID);
1021 			mlx5_rx_intr_vec_disable(dev);
1022 			rte_errno = ENOMEM;
1023 			return -rte_errno;
1024 		}
1025 		rc = mlx5_os_set_nonblock_channel_fd(rxq_obj->fd);
1026 		if (rc < 0) {
1027 			rte_errno = errno;
1028 			DRV_LOG(ERR,
1029 				"port %u failed to make Rx interrupt file"
1030 				" descriptor %d non-blocking for queue index"
1031 				" %d",
1032 				dev->data->port_id, rxq_obj->fd, i);
1033 			mlx5_rx_intr_vec_disable(dev);
1034 			return -rte_errno;
1035 		}
1036 		intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count;
1037 		intr_handle->efds[count] = rxq_obj->fd;
1038 		count++;
1039 	}
1040 	if (!count)
1041 		mlx5_rx_intr_vec_disable(dev);
1042 	else
1043 		intr_handle->nb_efd = count;
1044 	return 0;
1045 }
1046 
1047 /**
1048  * Clean up Rx interrupts handler.
1049  *
1050  * @param dev
1051  *   Pointer to Ethernet device.
1052  */
1053 void
1054 mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev)
1055 {
1056 	struct mlx5_priv *priv = dev->data->dev_private;
1057 	struct rte_intr_handle *intr_handle = dev->intr_handle;
1058 	unsigned int i;
1059 	unsigned int rxqs_n = priv->rxqs_n;
1060 	unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
1061 
1062 	if (!dev->data->dev_conf.intr_conf.rxq)
1063 		return;
1064 	if (!intr_handle->intr_vec)
1065 		goto free;
1066 	for (i = 0; i != n; ++i) {
1067 		struct mlx5_rxq_ctrl *rxq_ctrl;
1068 		struct mlx5_rxq_data *rxq_data;
1069 
1070 		if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET +
1071 		    RTE_MAX_RXTX_INTR_VEC_ID)
1072 			continue;
1073 		/**
1074 		 * Need to access directly the queue to release the reference
1075 		 * kept in mlx5_rx_intr_vec_enable().
1076 		 */
1077 		rxq_data = (*priv->rxqs)[i];
1078 		rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1079 		if (rxq_ctrl->obj)
1080 			mlx5_rxq_obj_release(rxq_ctrl->obj);
1081 	}
1082 free:
1083 	rte_intr_free_epoll_fd(intr_handle);
1084 	if (intr_handle->intr_vec)
1085 		mlx5_free(intr_handle->intr_vec);
1086 	intr_handle->nb_efd = 0;
1087 	intr_handle->intr_vec = NULL;
1088 }
1089 
1090 /**
1091  *  MLX5 CQ notification .
1092  *
1093  *  @param rxq
1094  *     Pointer to receive queue structure.
1095  *  @param sq_n_rxq
1096  *     Sequence number per receive queue .
1097  */
1098 static inline void
1099 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
1100 {
1101 	int sq_n = 0;
1102 	uint32_t doorbell_hi;
1103 	uint64_t doorbell;
1104 	void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL;
1105 
1106 	sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK;
1107 	doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK);
1108 	doorbell = (uint64_t)doorbell_hi << 32;
1109 	doorbell |= rxq->cqn;
1110 	rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
1111 	mlx5_uar_write64(rte_cpu_to_be_64(doorbell),
1112 			 cq_db_reg, rxq->uar_lock_cq);
1113 }
1114 
1115 /**
1116  * DPDK callback for Rx queue interrupt enable.
1117  *
1118  * @param dev
1119  *   Pointer to Ethernet device structure.
1120  * @param rx_queue_id
1121  *   Rx queue number.
1122  *
1123  * @return
1124  *   0 on success, a negative errno value otherwise and rte_errno is set.
1125  */
1126 int
1127 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1128 {
1129 	struct mlx5_priv *priv = dev->data->dev_private;
1130 	struct mlx5_rxq_data *rxq_data;
1131 	struct mlx5_rxq_ctrl *rxq_ctrl;
1132 
1133 	rxq_data = (*priv->rxqs)[rx_queue_id];
1134 	if (!rxq_data) {
1135 		rte_errno = EINVAL;
1136 		return -rte_errno;
1137 	}
1138 	rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1139 	if (rxq_ctrl->irq) {
1140 		struct mlx5_rxq_obj *rxq_obj;
1141 
1142 		rxq_obj = mlx5_rxq_obj_get(dev, rx_queue_id);
1143 		if (!rxq_obj) {
1144 			rte_errno = EINVAL;
1145 			return -rte_errno;
1146 		}
1147 		mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn);
1148 		mlx5_rxq_obj_release(rxq_obj);
1149 	}
1150 	return 0;
1151 }
1152 
1153 /**
1154  * DPDK callback for Rx queue interrupt disable.
1155  *
1156  * @param dev
1157  *   Pointer to Ethernet device structure.
1158  * @param rx_queue_id
1159  *   Rx queue number.
1160  *
1161  * @return
1162  *   0 on success, a negative errno value otherwise and rte_errno is set.
1163  */
1164 int
1165 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1166 {
1167 	struct mlx5_priv *priv = dev->data->dev_private;
1168 	struct mlx5_rxq_data *rxq_data;
1169 	struct mlx5_rxq_ctrl *rxq_ctrl;
1170 	struct mlx5_rxq_obj *rxq_obj = NULL;
1171 	struct ibv_cq *ev_cq;
1172 	void *ev_ctx;
1173 	int ret;
1174 
1175 	rxq_data = (*priv->rxqs)[rx_queue_id];
1176 	if (!rxq_data) {
1177 		rte_errno = EINVAL;
1178 		return -rte_errno;
1179 	}
1180 	rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1181 	if (!rxq_ctrl->irq)
1182 		return 0;
1183 	rxq_obj = mlx5_rxq_obj_get(dev, rx_queue_id);
1184 	if (!rxq_obj) {
1185 		rte_errno = EINVAL;
1186 		return -rte_errno;
1187 	}
1188 	if (rxq_obj->type == MLX5_RXQ_OBJ_TYPE_IBV) {
1189 		ret = mlx5_glue->get_cq_event(rxq_obj->ibv_channel, &ev_cq,
1190 					      &ev_ctx);
1191 		if (ret < 0 || ev_cq != rxq_obj->ibv_cq)
1192 			goto exit;
1193 		mlx5_glue->ack_cq_events(rxq_obj->ibv_cq, 1);
1194 	} else if (rxq_obj->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ) {
1195 #ifdef HAVE_IBV_DEVX_EVENT
1196 		union {
1197 			struct mlx5dv_devx_async_event_hdr event_resp;
1198 			uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr)
1199 				    + 128];
1200 		} out;
1201 
1202 		ret = mlx5_glue->devx_get_event
1203 				(rxq_obj->devx_channel, &out.event_resp,
1204 				 sizeof(out.buf));
1205 		if (ret < 0 || out.event_resp.cookie !=
1206 				(uint64_t)(uintptr_t)rxq_obj->devx_cq)
1207 			goto exit;
1208 #endif /* HAVE_IBV_DEVX_EVENT */
1209 	}
1210 	rxq_data->cq_arm_sn++;
1211 	mlx5_rxq_obj_release(rxq_obj);
1212 	return 0;
1213 exit:
1214 	/**
1215 	 * For ret < 0 save the errno (may be EAGAIN which means the get_event
1216 	 * function was called before receiving one).
1217 	 */
1218 	if (ret < 0)
1219 		rte_errno = errno;
1220 	else
1221 		rte_errno = EINVAL;
1222 	ret = rte_errno; /* Save rte_errno before cleanup. */
1223 	if (rxq_obj)
1224 		mlx5_rxq_obj_release(rxq_obj);
1225 	if (ret != EAGAIN)
1226 		DRV_LOG(WARNING, "port %u unable to disable interrupt on Rx queue %d",
1227 			dev->data->port_id, rx_queue_id);
1228 	rte_errno = ret; /* Restore rte_errno. */
1229 	return -rte_errno;
1230 }
1231 
1232 /**
1233  * Create a CQ Verbs object.
1234  *
1235  * @param dev
1236  *   Pointer to Ethernet device.
1237  * @param priv
1238  *   Pointer to device private data.
1239  * @param rxq_data
1240  *   Pointer to Rx queue data.
1241  * @param cqe_n
1242  *   Number of CQEs in CQ.
1243  * @param rxq_obj
1244  *   Pointer to Rx queue object data.
1245  *
1246  * @return
1247  *   The Verbs object initialised, NULL otherwise and rte_errno is set.
1248  */
1249 static struct ibv_cq *
1250 mlx5_ibv_cq_new(struct rte_eth_dev *dev, struct mlx5_priv *priv,
1251 		struct mlx5_rxq_data *rxq_data,
1252 		unsigned int cqe_n, struct mlx5_rxq_obj *rxq_obj)
1253 {
1254 	struct {
1255 		struct ibv_cq_init_attr_ex ibv;
1256 		struct mlx5dv_cq_init_attr mlx5;
1257 	} cq_attr;
1258 
1259 	cq_attr.ibv = (struct ibv_cq_init_attr_ex){
1260 		.cqe = cqe_n,
1261 		.channel = rxq_obj->ibv_channel,
1262 		.comp_mask = 0,
1263 	};
1264 	cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){
1265 		.comp_mask = 0,
1266 	};
1267 	if (priv->config.cqe_comp && !rxq_data->hw_timestamp &&
1268 	    !rxq_data->lro) {
1269 		cq_attr.mlx5.comp_mask |=
1270 				MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
1271 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
1272 		cq_attr.mlx5.cqe_comp_res_format =
1273 				mlx5_rxq_mprq_enabled(rxq_data) ?
1274 				MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX :
1275 				MLX5DV_CQE_RES_FORMAT_HASH;
1276 #else
1277 		cq_attr.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
1278 #endif
1279 		/*
1280 		 * For vectorized Rx, it must not be doubled in order to
1281 		 * make cq_ci and rq_ci aligned.
1282 		 */
1283 		if (mlx5_rxq_check_vec_support(rxq_data) < 0)
1284 			cq_attr.ibv.cqe *= 2;
1285 	} else if (priv->config.cqe_comp && rxq_data->hw_timestamp) {
1286 		DRV_LOG(DEBUG,
1287 			"port %u Rx CQE compression is disabled for HW"
1288 			" timestamp",
1289 			dev->data->port_id);
1290 	} else if (priv->config.cqe_comp && rxq_data->lro) {
1291 		DRV_LOG(DEBUG,
1292 			"port %u Rx CQE compression is disabled for LRO",
1293 			dev->data->port_id);
1294 	}
1295 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
1296 	if (priv->config.cqe_pad) {
1297 		cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS;
1298 		cq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD;
1299 	}
1300 #endif
1301 	return mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq(priv->sh->ctx,
1302 							      &cq_attr.ibv,
1303 							      &cq_attr.mlx5));
1304 }
1305 
1306 /**
1307  * Create a WQ Verbs object.
1308  *
1309  * @param dev
1310  *   Pointer to Ethernet device.
1311  * @param priv
1312  *   Pointer to device private data.
1313  * @param rxq_data
1314  *   Pointer to Rx queue data.
1315  * @param idx
1316  *   Queue index in DPDK Rx queue array
1317  * @param wqe_n
1318  *   Number of WQEs in WQ.
1319  * @param rxq_obj
1320  *   Pointer to Rx queue object data.
1321  *
1322  * @return
1323  *   The Verbs object initialised, NULL otherwise and rte_errno is set.
1324  */
1325 static struct ibv_wq *
1326 mlx5_ibv_wq_new(struct rte_eth_dev *dev, struct mlx5_priv *priv,
1327 		struct mlx5_rxq_data *rxq_data, uint16_t idx,
1328 		unsigned int wqe_n, struct mlx5_rxq_obj *rxq_obj)
1329 {
1330 	struct {
1331 		struct ibv_wq_init_attr ibv;
1332 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
1333 		struct mlx5dv_wq_init_attr mlx5;
1334 #endif
1335 	} wq_attr;
1336 
1337 	wq_attr.ibv = (struct ibv_wq_init_attr){
1338 		.wq_context = NULL, /* Could be useful in the future. */
1339 		.wq_type = IBV_WQT_RQ,
1340 		/* Max number of outstanding WRs. */
1341 		.max_wr = wqe_n >> rxq_data->sges_n,
1342 		/* Max number of scatter/gather elements in a WR. */
1343 		.max_sge = 1 << rxq_data->sges_n,
1344 		.pd = priv->sh->pd,
1345 		.cq = rxq_obj->ibv_cq,
1346 		.comp_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING | 0,
1347 		.create_flags = (rxq_data->vlan_strip ?
1348 				 IBV_WQ_FLAGS_CVLAN_STRIPPING : 0),
1349 	};
1350 	/* By default, FCS (CRC) is stripped by hardware. */
1351 	if (rxq_data->crc_present) {
1352 		wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
1353 		wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1354 	}
1355 	if (priv->config.hw_padding) {
1356 #if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING)
1357 		wq_attr.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
1358 		wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1359 #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING)
1360 		wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING;
1361 		wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1362 #endif
1363 	}
1364 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
1365 	wq_attr.mlx5 = (struct mlx5dv_wq_init_attr){
1366 		.comp_mask = 0,
1367 	};
1368 	if (mlx5_rxq_mprq_enabled(rxq_data)) {
1369 		struct mlx5dv_striding_rq_init_attr *mprq_attr =
1370 						&wq_attr.mlx5.striding_rq_attrs;
1371 
1372 		wq_attr.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ;
1373 		*mprq_attr = (struct mlx5dv_striding_rq_init_attr){
1374 			.single_stride_log_num_of_bytes = rxq_data->strd_sz_n,
1375 			.single_wqe_log_num_of_strides = rxq_data->strd_num_n,
1376 			.two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT,
1377 		};
1378 	}
1379 	rxq_obj->wq = mlx5_glue->dv_create_wq(priv->sh->ctx, &wq_attr.ibv,
1380 					      &wq_attr.mlx5);
1381 #else
1382 	rxq_obj->wq = mlx5_glue->create_wq(priv->sh->ctx, &wq_attr.ibv);
1383 #endif
1384 	if (rxq_obj->wq) {
1385 		/*
1386 		 * Make sure number of WRs*SGEs match expectations since a queue
1387 		 * cannot allocate more than "desc" buffers.
1388 		 */
1389 		if (wq_attr.ibv.max_wr != (wqe_n >> rxq_data->sges_n) ||
1390 		    wq_attr.ibv.max_sge != (1u << rxq_data->sges_n)) {
1391 			DRV_LOG(ERR,
1392 				"port %u Rx queue %u requested %u*%u but got"
1393 				" %u*%u WRs*SGEs",
1394 				dev->data->port_id, idx,
1395 				wqe_n >> rxq_data->sges_n,
1396 				(1 << rxq_data->sges_n),
1397 				wq_attr.ibv.max_wr, wq_attr.ibv.max_sge);
1398 			claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
1399 			rxq_obj->wq = NULL;
1400 			rte_errno = EINVAL;
1401 		}
1402 	}
1403 	return rxq_obj->wq;
1404 }
1405 
1406 /**
1407  * Fill common fields of create RQ attributes structure.
1408  *
1409  * @param rxq_data
1410  *   Pointer to Rx queue data.
1411  * @param cqn
1412  *   CQ number to use with this RQ.
1413  * @param rq_attr
1414  *   RQ attributes structure to fill..
1415  */
1416 static void
1417 mlx5_devx_create_rq_attr_fill(struct mlx5_rxq_data *rxq_data, uint32_t cqn,
1418 			      struct mlx5_devx_create_rq_attr *rq_attr)
1419 {
1420 	rq_attr->state = MLX5_RQC_STATE_RST;
1421 	rq_attr->vsd = (rxq_data->vlan_strip) ? 0 : 1;
1422 	rq_attr->cqn = cqn;
1423 	rq_attr->scatter_fcs = (rxq_data->crc_present) ? 1 : 0;
1424 }
1425 
1426 /**
1427  * Fill common fields of DevX WQ attributes structure.
1428  *
1429  * @param priv
1430  *   Pointer to device private data.
1431  * @param rxq_ctrl
1432  *   Pointer to Rx queue control structure.
1433  * @param wq_attr
1434  *   WQ attributes structure to fill..
1435  */
1436 static void
1437 mlx5_devx_wq_attr_fill(struct mlx5_priv *priv, struct mlx5_rxq_ctrl *rxq_ctrl,
1438 		       struct mlx5_devx_wq_attr *wq_attr)
1439 {
1440 	wq_attr->end_padding_mode = priv->config.cqe_pad ?
1441 					MLX5_WQ_END_PAD_MODE_ALIGN :
1442 					MLX5_WQ_END_PAD_MODE_NONE;
1443 	wq_attr->pd = priv->sh->pdn;
1444 	wq_attr->dbr_addr = rxq_ctrl->rq_dbr_offset;
1445 	wq_attr->dbr_umem_id = rxq_ctrl->rq_dbr_umem_id;
1446 	wq_attr->dbr_umem_valid = 1;
1447 	wq_attr->wq_umem_id = rxq_ctrl->wq_umem->umem_id;
1448 	wq_attr->wq_umem_valid = 1;
1449 }
1450 
1451 /**
1452  * Create a RQ object using DevX.
1453  *
1454  * @param dev
1455  *   Pointer to Ethernet device.
1456  * @param idx
1457  *   Queue index in DPDK Rx queue array
1458  * @param cqn
1459  *   CQ number to use with this RQ.
1460  *
1461  * @return
1462  *   The DevX object initialised, NULL otherwise and rte_errno is set.
1463  */
1464 static struct mlx5_devx_obj *
1465 mlx5_devx_rq_new(struct rte_eth_dev *dev, uint16_t idx, uint32_t cqn)
1466 {
1467 	struct mlx5_priv *priv = dev->data->dev_private;
1468 	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1469 	struct mlx5_rxq_ctrl *rxq_ctrl =
1470 		container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1471 	struct mlx5_devx_create_rq_attr rq_attr = { 0 };
1472 	uint32_t wqe_n = 1 << (rxq_data->elts_n - rxq_data->sges_n);
1473 	uint32_t wq_size = 0;
1474 	uint32_t wqe_size = 0;
1475 	uint32_t log_wqe_size = 0;
1476 	void *buf = NULL;
1477 	struct mlx5_devx_obj *rq;
1478 
1479 	/* Fill RQ attributes. */
1480 	rq_attr.mem_rq_type = MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE;
1481 	rq_attr.flush_in_error_en = 1;
1482 	mlx5_devx_create_rq_attr_fill(rxq_data, cqn, &rq_attr);
1483 	/* Fill WQ attributes for this RQ. */
1484 	if (mlx5_rxq_mprq_enabled(rxq_data)) {
1485 		rq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC_STRIDING_RQ;
1486 		/*
1487 		 * Number of strides in each WQE:
1488 		 * 512*2^single_wqe_log_num_of_strides.
1489 		 */
1490 		rq_attr.wq_attr.single_wqe_log_num_of_strides =
1491 				rxq_data->strd_num_n -
1492 				MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
1493 		/* Stride size = (2^single_stride_log_num_of_bytes)*64B. */
1494 		rq_attr.wq_attr.single_stride_log_num_of_bytes =
1495 				rxq_data->strd_sz_n -
1496 				MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES;
1497 		wqe_size = sizeof(struct mlx5_wqe_mprq);
1498 	} else {
1499 		rq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC;
1500 		wqe_size = sizeof(struct mlx5_wqe_data_seg);
1501 	}
1502 	log_wqe_size = log2above(wqe_size) + rxq_data->sges_n;
1503 	rq_attr.wq_attr.log_wq_stride = log_wqe_size;
1504 	rq_attr.wq_attr.log_wq_sz = rxq_data->elts_n - rxq_data->sges_n;
1505 	/* Calculate and allocate WQ memory space. */
1506 	wqe_size = 1 << log_wqe_size; /* round up power of two.*/
1507 	wq_size = wqe_n * wqe_size;
1508 	size_t alignment = MLX5_WQE_BUF_ALIGNMENT;
1509 	if (alignment == (size_t)-1) {
1510 		DRV_LOG(ERR, "Failed to get mem page size");
1511 		rte_errno = ENOMEM;
1512 		return NULL;
1513 	}
1514 	buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, wq_size,
1515 			  alignment, rxq_ctrl->socket);
1516 	if (!buf)
1517 		return NULL;
1518 	rxq_data->wqes = buf;
1519 	rxq_ctrl->wq_umem = mlx5_glue->devx_umem_reg(priv->sh->ctx,
1520 						     buf, wq_size, 0);
1521 	if (!rxq_ctrl->wq_umem) {
1522 		mlx5_free(buf);
1523 		return NULL;
1524 	}
1525 	mlx5_devx_wq_attr_fill(priv, rxq_ctrl, &rq_attr.wq_attr);
1526 	rq = mlx5_devx_cmd_create_rq(priv->sh->ctx, &rq_attr, rxq_ctrl->socket);
1527 	if (!rq)
1528 		rxq_release_devx_rq_resources(rxq_ctrl);
1529 	return rq;
1530 }
1531 
1532 /**
1533  * Create a DevX CQ object for an Rx queue.
1534  *
1535  * @param dev
1536  *   Pointer to Ethernet device.
1537  * @param cqe_n
1538  *   Number of CQEs in CQ.
1539  * @param idx
1540  *   Queue index in DPDK Rx queue array
1541  * @param rxq_obj
1542  *   Pointer to Rx queue object data.
1543  *
1544  * @return
1545  *   The DevX object initialised, NULL otherwise and rte_errno is set.
1546  */
1547 static struct mlx5_devx_obj *
1548 mlx5_devx_cq_new(struct rte_eth_dev *dev, unsigned int cqe_n, uint16_t idx,
1549 		 struct mlx5_rxq_obj *rxq_obj)
1550 {
1551 	struct mlx5_devx_obj *cq_obj = 0;
1552 	struct mlx5_devx_cq_attr cq_attr = { 0 };
1553 	struct mlx5_priv *priv = dev->data->dev_private;
1554 	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1555 	struct mlx5_rxq_ctrl *rxq_ctrl =
1556 		container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1557 	size_t page_size = rte_mem_page_size();
1558 	uint32_t lcore = (uint32_t)rte_lcore_to_cpu_id(-1);
1559 	uint32_t eqn = 0;
1560 	void *buf = NULL;
1561 	uint16_t event_nums[1] = {0};
1562 	uint32_t log_cqe_n;
1563 	uint32_t cq_size;
1564 	int ret = 0;
1565 
1566 	if (page_size == (size_t)-1) {
1567 		DRV_LOG(ERR, "Failed to get page_size.");
1568 		goto error;
1569 	}
1570 	if (priv->config.cqe_comp && !rxq_data->hw_timestamp &&
1571 	    !rxq_data->lro) {
1572 		cq_attr.cqe_comp_en = MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
1573 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
1574 		cq_attr.mini_cqe_res_format =
1575 				mlx5_rxq_mprq_enabled(rxq_data) ?
1576 				MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX :
1577 				MLX5DV_CQE_RES_FORMAT_HASH;
1578 #else
1579 		cq_attr.mini_cqe_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
1580 #endif
1581 		/*
1582 		 * For vectorized Rx, it must not be doubled in order to
1583 		 * make cq_ci and rq_ci aligned.
1584 		 */
1585 		if (mlx5_rxq_check_vec_support(rxq_data) < 0)
1586 			cqe_n *= 2;
1587 	} else if (priv->config.cqe_comp && rxq_data->hw_timestamp) {
1588 		DRV_LOG(DEBUG,
1589 			"port %u Rx CQE compression is disabled for HW"
1590 			" timestamp",
1591 			dev->data->port_id);
1592 	} else if (priv->config.cqe_comp && rxq_data->lro) {
1593 		DRV_LOG(DEBUG,
1594 			"port %u Rx CQE compression is disabled for LRO",
1595 			dev->data->port_id);
1596 	}
1597 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
1598 	if (priv->config.cqe_pad)
1599 		cq_attr.cqe_size = MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD;
1600 #endif
1601 	log_cqe_n = log2above(cqe_n);
1602 	cq_size = sizeof(struct mlx5_cqe) * (1 << log_cqe_n);
1603 	/* Query the EQN for this core. */
1604 	if (mlx5_glue->devx_query_eqn(priv->sh->ctx, lcore, &eqn)) {
1605 		DRV_LOG(ERR, "Failed to query EQN for CQ.");
1606 		goto error;
1607 	}
1608 	cq_attr.eqn = eqn;
1609 	buf = rte_calloc_socket(__func__, 1, cq_size, page_size,
1610 				rxq_ctrl->socket);
1611 	if (!buf) {
1612 		DRV_LOG(ERR, "Failed to allocate memory for CQ.");
1613 		goto error;
1614 	}
1615 	rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)buf;
1616 	rxq_ctrl->cq_umem = mlx5_glue->devx_umem_reg(priv->sh->ctx, buf,
1617 						     cq_size,
1618 						     IBV_ACCESS_LOCAL_WRITE);
1619 	if (!rxq_ctrl->cq_umem) {
1620 		DRV_LOG(ERR, "Failed to register umem for CQ.");
1621 		goto error;
1622 	}
1623 	cq_attr.uar_page_id = priv->sh->devx_rx_uar->page_id;
1624 	cq_attr.q_umem_id = rxq_ctrl->cq_umem->umem_id;
1625 	cq_attr.q_umem_valid = 1;
1626 	cq_attr.log_cq_size = log_cqe_n;
1627 	cq_attr.log_page_size = rte_log2_u32(page_size);
1628 	cq_attr.db_umem_offset = rxq_ctrl->cq_dbr_offset;
1629 	cq_attr.db_umem_id = rxq_ctrl->cq_dbr_umem_id;
1630 	cq_attr.db_umem_valid = rxq_ctrl->cq_dbr_umem_id_valid;
1631 	cq_obj = mlx5_devx_cmd_create_cq(priv->sh->ctx, &cq_attr);
1632 	if (!cq_obj)
1633 		goto error;
1634 	rxq_data->cqe_n = log_cqe_n;
1635 	rxq_data->cqn = cq_obj->id;
1636 	if (rxq_obj->devx_channel) {
1637 		ret = mlx5_glue->devx_subscribe_devx_event
1638 						(rxq_obj->devx_channel,
1639 						 cq_obj->obj,
1640 						 sizeof(event_nums),
1641 						 event_nums,
1642 						 (uint64_t)(uintptr_t)cq_obj);
1643 		if (ret) {
1644 			DRV_LOG(ERR, "Fail to subscribe CQ to event channel.");
1645 			rte_errno = errno;
1646 			goto error;
1647 		}
1648 	}
1649 	/* Initialise CQ to 1's to mark HW ownership for all CQEs. */
1650 	memset((void *)(uintptr_t)rxq_data->cqes, 0xFF, cq_size);
1651 	return cq_obj;
1652 error:
1653 	if (cq_obj)
1654 		mlx5_devx_cmd_destroy(cq_obj);
1655 	rxq_release_devx_cq_resources(rxq_ctrl);
1656 	return NULL;
1657 }
1658 
1659 /**
1660  * Create the Rx hairpin queue object.
1661  *
1662  * @param dev
1663  *   Pointer to Ethernet device.
1664  * @param idx
1665  *   Queue index in DPDK Rx queue array
1666  *
1667  * @return
1668  *   The hairpin DevX object initialised, NULL otherwise and rte_errno is set.
1669  */
1670 static struct mlx5_rxq_obj *
1671 mlx5_rxq_obj_hairpin_new(struct rte_eth_dev *dev, uint16_t idx)
1672 {
1673 	struct mlx5_priv *priv = dev->data->dev_private;
1674 	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1675 	struct mlx5_rxq_ctrl *rxq_ctrl =
1676 		container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1677 	struct mlx5_devx_create_rq_attr attr = { 0 };
1678 	struct mlx5_rxq_obj *tmpl = NULL;
1679 	uint32_t max_wq_data;
1680 
1681 	MLX5_ASSERT(rxq_data);
1682 	MLX5_ASSERT(!rxq_ctrl->obj);
1683 	tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl), 0,
1684 			   rxq_ctrl->socket);
1685 	if (!tmpl) {
1686 		DRV_LOG(ERR,
1687 			"port %u Rx queue %u cannot allocate verbs resources",
1688 			dev->data->port_id, rxq_data->idx);
1689 		rte_errno = ENOMEM;
1690 		return NULL;
1691 	}
1692 	tmpl->type = MLX5_RXQ_OBJ_TYPE_DEVX_HAIRPIN;
1693 	tmpl->rxq_ctrl = rxq_ctrl;
1694 	attr.hairpin = 1;
1695 	max_wq_data = priv->config.hca_attr.log_max_hairpin_wq_data_sz;
1696 	/* Jumbo frames > 9KB should be supported, and more packets. */
1697 	if (priv->config.log_hp_size != (uint32_t)MLX5_ARG_UNSET) {
1698 		if (priv->config.log_hp_size > max_wq_data) {
1699 			DRV_LOG(ERR, "total data size %u power of 2 is "
1700 				"too large for hairpin",
1701 				priv->config.log_hp_size);
1702 			mlx5_free(tmpl);
1703 			rte_errno = ERANGE;
1704 			return NULL;
1705 		}
1706 		attr.wq_attr.log_hairpin_data_sz = priv->config.log_hp_size;
1707 	} else {
1708 		attr.wq_attr.log_hairpin_data_sz =
1709 				(max_wq_data < MLX5_HAIRPIN_JUMBO_LOG_SIZE) ?
1710 				 max_wq_data : MLX5_HAIRPIN_JUMBO_LOG_SIZE;
1711 	}
1712 	/* Set the packets number to the maximum value for performance. */
1713 	attr.wq_attr.log_hairpin_num_packets =
1714 			attr.wq_attr.log_hairpin_data_sz -
1715 			MLX5_HAIRPIN_QUEUE_STRIDE;
1716 	tmpl->rq = mlx5_devx_cmd_create_rq(priv->sh->ctx, &attr,
1717 					   rxq_ctrl->socket);
1718 	if (!tmpl->rq) {
1719 		DRV_LOG(ERR,
1720 			"port %u Rx hairpin queue %u can't create rq object",
1721 			dev->data->port_id, idx);
1722 		mlx5_free(tmpl);
1723 		rte_errno = errno;
1724 		return NULL;
1725 	}
1726 	DRV_LOG(DEBUG, "port %u rxq %u updated with %p", dev->data->port_id,
1727 		idx, (void *)&tmpl);
1728 	rte_atomic32_inc(&tmpl->refcnt);
1729 	LIST_INSERT_HEAD(&priv->rxqsobj, tmpl, next);
1730 	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
1731 	dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_HAIRPIN;
1732 	return tmpl;
1733 }
1734 
1735 /**
1736  * Create the Rx queue Verbs/DevX object.
1737  *
1738  * @param dev
1739  *   Pointer to Ethernet device.
1740  * @param idx
1741  *   Queue index in DPDK Rx queue array
1742  * @param type
1743  *   Type of Rx queue object to create.
1744  *
1745  * @return
1746  *   The Verbs/DevX object initialised, NULL otherwise and rte_errno is set.
1747  */
1748 struct mlx5_rxq_obj *
1749 mlx5_rxq_obj_new(struct rte_eth_dev *dev, uint16_t idx,
1750 		 enum mlx5_rxq_obj_type type)
1751 {
1752 	struct mlx5_priv *priv = dev->data->dev_private;
1753 	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1754 	struct mlx5_rxq_ctrl *rxq_ctrl =
1755 		container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1756 	struct ibv_wq_attr mod;
1757 	unsigned int cqe_n;
1758 	unsigned int wqe_n = 1 << rxq_data->elts_n;
1759 	struct mlx5_rxq_obj *tmpl = NULL;
1760 	struct mlx5dv_cq cq_info;
1761 	struct mlx5dv_rwq rwq;
1762 	int ret = 0;
1763 	struct mlx5dv_obj obj;
1764 
1765 	MLX5_ASSERT(rxq_data);
1766 	MLX5_ASSERT(!rxq_ctrl->obj);
1767 	if (type == MLX5_RXQ_OBJ_TYPE_DEVX_HAIRPIN)
1768 		return mlx5_rxq_obj_hairpin_new(dev, idx);
1769 	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE;
1770 	priv->verbs_alloc_ctx.obj = rxq_ctrl;
1771 	tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl), 0,
1772 			   rxq_ctrl->socket);
1773 	if (!tmpl) {
1774 		DRV_LOG(ERR,
1775 			"port %u Rx queue %u cannot allocate resources",
1776 			dev->data->port_id, rxq_data->idx);
1777 		rte_errno = ENOMEM;
1778 		goto error;
1779 	}
1780 	tmpl->type = type;
1781 	tmpl->rxq_ctrl = rxq_ctrl;
1782 	if (rxq_ctrl->irq) {
1783 		if (tmpl->type == MLX5_RXQ_OBJ_TYPE_IBV) {
1784 			tmpl->ibv_channel =
1785 				mlx5_glue->create_comp_channel(priv->sh->ctx);
1786 			if (!tmpl->ibv_channel) {
1787 				DRV_LOG(ERR, "port %u: comp channel creation "
1788 					"failure", dev->data->port_id);
1789 				rte_errno = ENOMEM;
1790 				goto error;
1791 			}
1792 			tmpl->fd = tmpl->ibv_channel->fd;
1793 		} else if (tmpl->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ) {
1794 			int devx_ev_flag =
1795 			  MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA;
1796 
1797 			tmpl->devx_channel =
1798 				mlx5_glue->devx_create_event_channel
1799 								(priv->sh->ctx,
1800 								 devx_ev_flag);
1801 			if (!tmpl->devx_channel) {
1802 				rte_errno = errno;
1803 				DRV_LOG(ERR,
1804 					"Failed to create event channel %d.",
1805 					rte_errno);
1806 				goto error;
1807 			}
1808 			tmpl->fd = tmpl->devx_channel->fd;
1809 		}
1810 	}
1811 	if (mlx5_rxq_mprq_enabled(rxq_data))
1812 		cqe_n = wqe_n * (1 << rxq_data->strd_num_n) - 1;
1813 	else
1814 		cqe_n = wqe_n - 1;
1815 	DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d",
1816 		dev->data->port_id, priv->sh->device_attr.max_qp_wr);
1817 	DRV_LOG(DEBUG, "port %u device_attr.max_sge is %d",
1818 		dev->data->port_id, priv->sh->device_attr.max_sge);
1819 	if (tmpl->type == MLX5_RXQ_OBJ_TYPE_IBV) {
1820 		/* Create CQ using Verbs API. */
1821 		tmpl->ibv_cq = mlx5_ibv_cq_new(dev, priv, rxq_data, cqe_n,
1822 					       tmpl);
1823 		if (!tmpl->ibv_cq) {
1824 			DRV_LOG(ERR, "port %u Rx queue %u CQ creation failure",
1825 				dev->data->port_id, idx);
1826 			rte_errno = ENOMEM;
1827 			goto error;
1828 		}
1829 		obj.cq.in = tmpl->ibv_cq;
1830 		obj.cq.out = &cq_info;
1831 		ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ);
1832 		if (ret) {
1833 			rte_errno = ret;
1834 			goto error;
1835 		}
1836 		if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
1837 			DRV_LOG(ERR,
1838 				"port %u wrong MLX5_CQE_SIZE environment "
1839 				"variable value: it should be set to %u",
1840 				dev->data->port_id, RTE_CACHE_LINE_SIZE);
1841 			rte_errno = EINVAL;
1842 			goto error;
1843 		}
1844 		/* Fill the rings. */
1845 		rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
1846 		rxq_data->cq_db = cq_info.dbrec;
1847 		rxq_data->cqes =
1848 			(volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
1849 		rxq_data->cq_uar = cq_info.cq_uar;
1850 		rxq_data->cqn = cq_info.cqn;
1851 		/* Create WQ (RQ) using Verbs API. */
1852 		tmpl->wq = mlx5_ibv_wq_new(dev, priv, rxq_data, idx, wqe_n,
1853 					   tmpl);
1854 		if (!tmpl->wq) {
1855 			DRV_LOG(ERR, "port %u Rx queue %u WQ creation failure",
1856 				dev->data->port_id, idx);
1857 			rte_errno = ENOMEM;
1858 			goto error;
1859 		}
1860 		/* Change queue state to ready. */
1861 		mod = (struct ibv_wq_attr){
1862 			.attr_mask = IBV_WQ_ATTR_STATE,
1863 			.wq_state = IBV_WQS_RDY,
1864 		};
1865 		ret = mlx5_glue->modify_wq(tmpl->wq, &mod);
1866 		if (ret) {
1867 			DRV_LOG(ERR,
1868 				"port %u Rx queue %u WQ state to IBV_WQS_RDY"
1869 				" failed", dev->data->port_id, idx);
1870 			rte_errno = ret;
1871 			goto error;
1872 		}
1873 		obj.rwq.in = tmpl->wq;
1874 		obj.rwq.out = &rwq;
1875 		ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_RWQ);
1876 		if (ret) {
1877 			rte_errno = ret;
1878 			goto error;
1879 		}
1880 		rxq_data->wqes = rwq.buf;
1881 		rxq_data->rq_db = rwq.dbrec;
1882 	} else if (tmpl->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ) {
1883 		struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
1884 		struct mlx5_devx_dbr_page *dbr_page;
1885 		int64_t dbr_offset;
1886 
1887 		/* Allocate CQ door-bell. */
1888 		dbr_offset = mlx5_get_dbr(priv->sh->ctx, &priv->dbrpgs,
1889 					  &dbr_page);
1890 		if (dbr_offset < 0) {
1891 			DRV_LOG(ERR, "Failed to allocate CQ door-bell.");
1892 			goto error;
1893 		}
1894 		rxq_ctrl->cq_dbr_offset = dbr_offset;
1895 		rxq_ctrl->cq_dbr_umem_id = mlx5_os_get_umem_id(dbr_page->umem);
1896 		rxq_ctrl->cq_dbr_umem_id_valid = 1;
1897 		rxq_data->cq_db =
1898 			(uint32_t *)((uintptr_t)dbr_page->dbrs +
1899 				     (uintptr_t)rxq_ctrl->cq_dbr_offset);
1900 		rxq_data->cq_uar = priv->sh->devx_rx_uar->base_addr;
1901 		/* Create CQ using DevX API. */
1902 		tmpl->devx_cq = mlx5_devx_cq_new(dev, cqe_n, idx, tmpl);
1903 		if (!tmpl->devx_cq) {
1904 			DRV_LOG(ERR, "Failed to create CQ.");
1905 			goto error;
1906 		}
1907 		/* Allocate RQ door-bell. */
1908 		dbr_offset = mlx5_get_dbr(priv->sh->ctx, &priv->dbrpgs,
1909 					  &dbr_page);
1910 		if (dbr_offset < 0) {
1911 			DRV_LOG(ERR, "Failed to allocate RQ door-bell.");
1912 			goto error;
1913 		}
1914 		rxq_ctrl->rq_dbr_offset = dbr_offset;
1915 		rxq_ctrl->rq_dbr_umem_id = mlx5_os_get_umem_id(dbr_page->umem);
1916 		rxq_ctrl->rq_dbr_umem_id_valid = 1;
1917 		rxq_data->rq_db =
1918 			(uint32_t *)((uintptr_t)dbr_page->dbrs +
1919 				     (uintptr_t)rxq_ctrl->rq_dbr_offset);
1920 		/* Create RQ using DevX API. */
1921 		tmpl->rq = mlx5_devx_rq_new(dev, idx, tmpl->devx_cq->id);
1922 		if (!tmpl->rq) {
1923 			DRV_LOG(ERR, "port %u Rx queue %u RQ creation failure",
1924 				dev->data->port_id, idx);
1925 			rte_errno = ENOMEM;
1926 			goto error;
1927 		}
1928 		/* Change queue state to ready. */
1929 		rq_attr.rq_state = MLX5_RQC_STATE_RST;
1930 		rq_attr.state = MLX5_RQC_STATE_RDY;
1931 		ret = mlx5_devx_cmd_modify_rq(tmpl->rq, &rq_attr);
1932 		if (ret)
1933 			goto error;
1934 	}
1935 	rxq_data->cq_arm_sn = 0;
1936 	mlx5_rxq_initialize(rxq_data);
1937 	rxq_data->cq_ci = 0;
1938 	DRV_LOG(DEBUG, "port %u rxq %u updated with %p", dev->data->port_id,
1939 		idx, (void *)&tmpl);
1940 	rte_atomic32_inc(&tmpl->refcnt);
1941 	LIST_INSERT_HEAD(&priv->rxqsobj, tmpl, next);
1942 	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
1943 	dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
1944 	return tmpl;
1945 error:
1946 	if (tmpl) {
1947 		ret = rte_errno; /* Save rte_errno before cleanup. */
1948 		if (tmpl->type == MLX5_RXQ_OBJ_TYPE_IBV) {
1949 			if (tmpl->wq)
1950 				claim_zero(mlx5_glue->destroy_wq(tmpl->wq));
1951 			if (tmpl->ibv_cq)
1952 				claim_zero(mlx5_glue->destroy_cq(tmpl->ibv_cq));
1953 			if (tmpl->ibv_channel)
1954 				claim_zero(mlx5_glue->destroy_comp_channel
1955 							(tmpl->ibv_channel));
1956 		} else if (tmpl->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ) {
1957 			if (tmpl->rq)
1958 				claim_zero(mlx5_devx_cmd_destroy(tmpl->rq));
1959 			if (tmpl->devx_cq)
1960 				claim_zero(mlx5_devx_cmd_destroy
1961 							(tmpl->devx_cq));
1962 			if (tmpl->devx_channel)
1963 				mlx5_glue->devx_destroy_event_channel
1964 							(tmpl->devx_channel);
1965 		}
1966 		mlx5_free(tmpl);
1967 		rte_errno = ret; /* Restore rte_errno. */
1968 	}
1969 	if (type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ) {
1970 		rxq_release_devx_rq_resources(rxq_ctrl);
1971 		rxq_release_devx_cq_resources(rxq_ctrl);
1972 	}
1973 	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
1974 	return NULL;
1975 }
1976 
1977 /**
1978  * Verify the Rx queue objects list is empty
1979  *
1980  * @param dev
1981  *   Pointer to Ethernet device.
1982  *
1983  * @return
1984  *   The number of objects not released.
1985  */
1986 int
1987 mlx5_rxq_obj_verify(struct rte_eth_dev *dev)
1988 {
1989 	struct mlx5_priv *priv = dev->data->dev_private;
1990 	int ret = 0;
1991 	struct mlx5_rxq_obj *rxq_obj;
1992 
1993 	LIST_FOREACH(rxq_obj, &priv->rxqsobj, next) {
1994 		DRV_LOG(DEBUG, "port %u Rx queue %u still referenced",
1995 			dev->data->port_id, rxq_obj->rxq_ctrl->rxq.idx);
1996 		++ret;
1997 	}
1998 	return ret;
1999 }
2000 
2001 /**
2002  * Callback function to initialize mbufs for Multi-Packet RQ.
2003  */
2004 static inline void
2005 mlx5_mprq_buf_init(struct rte_mempool *mp, void *opaque_arg,
2006 		    void *_m, unsigned int i __rte_unused)
2007 {
2008 	struct mlx5_mprq_buf *buf = _m;
2009 	struct rte_mbuf_ext_shared_info *shinfo;
2010 	unsigned int strd_n = (unsigned int)(uintptr_t)opaque_arg;
2011 	unsigned int j;
2012 
2013 	memset(_m, 0, sizeof(*buf));
2014 	buf->mp = mp;
2015 	rte_atomic16_set(&buf->refcnt, 1);
2016 	for (j = 0; j != strd_n; ++j) {
2017 		shinfo = &buf->shinfos[j];
2018 		shinfo->free_cb = mlx5_mprq_buf_free_cb;
2019 		shinfo->fcb_opaque = buf;
2020 	}
2021 }
2022 
2023 /**
2024  * Free mempool of Multi-Packet RQ.
2025  *
2026  * @param dev
2027  *   Pointer to Ethernet device.
2028  *
2029  * @return
2030  *   0 on success, negative errno value on failure.
2031  */
2032 int
2033 mlx5_mprq_free_mp(struct rte_eth_dev *dev)
2034 {
2035 	struct mlx5_priv *priv = dev->data->dev_private;
2036 	struct rte_mempool *mp = priv->mprq_mp;
2037 	unsigned int i;
2038 
2039 	if (mp == NULL)
2040 		return 0;
2041 	DRV_LOG(DEBUG, "port %u freeing mempool (%s) for Multi-Packet RQ",
2042 		dev->data->port_id, mp->name);
2043 	/*
2044 	 * If a buffer in the pool has been externally attached to a mbuf and it
2045 	 * is still in use by application, destroying the Rx queue can spoil
2046 	 * the packet. It is unlikely to happen but if application dynamically
2047 	 * creates and destroys with holding Rx packets, this can happen.
2048 	 *
2049 	 * TODO: It is unavoidable for now because the mempool for Multi-Packet
2050 	 * RQ isn't provided by application but managed by PMD.
2051 	 */
2052 	if (!rte_mempool_full(mp)) {
2053 		DRV_LOG(ERR,
2054 			"port %u mempool for Multi-Packet RQ is still in use",
2055 			dev->data->port_id);
2056 		rte_errno = EBUSY;
2057 		return -rte_errno;
2058 	}
2059 	rte_mempool_free(mp);
2060 	/* Unset mempool for each Rx queue. */
2061 	for (i = 0; i != priv->rxqs_n; ++i) {
2062 		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
2063 
2064 		if (rxq == NULL)
2065 			continue;
2066 		rxq->mprq_mp = NULL;
2067 	}
2068 	priv->mprq_mp = NULL;
2069 	return 0;
2070 }
2071 
2072 /**
2073  * Allocate a mempool for Multi-Packet RQ. All configured Rx queues share the
2074  * mempool. If already allocated, reuse it if there're enough elements.
2075  * Otherwise, resize it.
2076  *
2077  * @param dev
2078  *   Pointer to Ethernet device.
2079  *
2080  * @return
2081  *   0 on success, negative errno value on failure.
2082  */
2083 int
2084 mlx5_mprq_alloc_mp(struct rte_eth_dev *dev)
2085 {
2086 	struct mlx5_priv *priv = dev->data->dev_private;
2087 	struct rte_mempool *mp = priv->mprq_mp;
2088 	char name[RTE_MEMPOOL_NAMESIZE];
2089 	unsigned int desc = 0;
2090 	unsigned int buf_len;
2091 	unsigned int obj_num;
2092 	unsigned int obj_size;
2093 	unsigned int strd_num_n = 0;
2094 	unsigned int strd_sz_n = 0;
2095 	unsigned int i;
2096 	unsigned int n_ibv = 0;
2097 
2098 	if (!mlx5_mprq_enabled(dev))
2099 		return 0;
2100 	/* Count the total number of descriptors configured. */
2101 	for (i = 0; i != priv->rxqs_n; ++i) {
2102 		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
2103 		struct mlx5_rxq_ctrl *rxq_ctrl = container_of
2104 			(rxq, struct mlx5_rxq_ctrl, rxq);
2105 
2106 		if (rxq == NULL || rxq_ctrl->type != MLX5_RXQ_TYPE_STANDARD)
2107 			continue;
2108 		n_ibv++;
2109 		desc += 1 << rxq->elts_n;
2110 		/* Get the max number of strides. */
2111 		if (strd_num_n < rxq->strd_num_n)
2112 			strd_num_n = rxq->strd_num_n;
2113 		/* Get the max size of a stride. */
2114 		if (strd_sz_n < rxq->strd_sz_n)
2115 			strd_sz_n = rxq->strd_sz_n;
2116 	}
2117 	MLX5_ASSERT(strd_num_n && strd_sz_n);
2118 	buf_len = (1 << strd_num_n) * (1 << strd_sz_n);
2119 	obj_size = sizeof(struct mlx5_mprq_buf) + buf_len + (1 << strd_num_n) *
2120 		sizeof(struct rte_mbuf_ext_shared_info) + RTE_PKTMBUF_HEADROOM;
2121 	/*
2122 	 * Received packets can be either memcpy'd or externally referenced. In
2123 	 * case that the packet is attached to an mbuf as an external buffer, as
2124 	 * it isn't possible to predict how the buffers will be queued by
2125 	 * application, there's no option to exactly pre-allocate needed buffers
2126 	 * in advance but to speculatively prepares enough buffers.
2127 	 *
2128 	 * In the data path, if this Mempool is depleted, PMD will try to memcpy
2129 	 * received packets to buffers provided by application (rxq->mp) until
2130 	 * this Mempool gets available again.
2131 	 */
2132 	desc *= 4;
2133 	obj_num = desc + MLX5_MPRQ_MP_CACHE_SZ * n_ibv;
2134 	/*
2135 	 * rte_mempool_create_empty() has sanity check to refuse large cache
2136 	 * size compared to the number of elements.
2137 	 * CACHE_FLUSHTHRESH_MULTIPLIER is defined in a C file, so using a
2138 	 * constant number 2 instead.
2139 	 */
2140 	obj_num = RTE_MAX(obj_num, MLX5_MPRQ_MP_CACHE_SZ * 2);
2141 	/* Check a mempool is already allocated and if it can be resued. */
2142 	if (mp != NULL && mp->elt_size >= obj_size && mp->size >= obj_num) {
2143 		DRV_LOG(DEBUG, "port %u mempool %s is being reused",
2144 			dev->data->port_id, mp->name);
2145 		/* Reuse. */
2146 		goto exit;
2147 	} else if (mp != NULL) {
2148 		DRV_LOG(DEBUG, "port %u mempool %s should be resized, freeing it",
2149 			dev->data->port_id, mp->name);
2150 		/*
2151 		 * If failed to free, which means it may be still in use, no way
2152 		 * but to keep using the existing one. On buffer underrun,
2153 		 * packets will be memcpy'd instead of external buffer
2154 		 * attachment.
2155 		 */
2156 		if (mlx5_mprq_free_mp(dev)) {
2157 			if (mp->elt_size >= obj_size)
2158 				goto exit;
2159 			else
2160 				return -rte_errno;
2161 		}
2162 	}
2163 	snprintf(name, sizeof(name), "port-%u-mprq", dev->data->port_id);
2164 	mp = rte_mempool_create(name, obj_num, obj_size, MLX5_MPRQ_MP_CACHE_SZ,
2165 				0, NULL, NULL, mlx5_mprq_buf_init,
2166 				(void *)(uintptr_t)(1 << strd_num_n),
2167 				dev->device->numa_node, 0);
2168 	if (mp == NULL) {
2169 		DRV_LOG(ERR,
2170 			"port %u failed to allocate a mempool for"
2171 			" Multi-Packet RQ, count=%u, size=%u",
2172 			dev->data->port_id, obj_num, obj_size);
2173 		rte_errno = ENOMEM;
2174 		return -rte_errno;
2175 	}
2176 	priv->mprq_mp = mp;
2177 exit:
2178 	/* Set mempool for each Rx queue. */
2179 	for (i = 0; i != priv->rxqs_n; ++i) {
2180 		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
2181 		struct mlx5_rxq_ctrl *rxq_ctrl = container_of
2182 			(rxq, struct mlx5_rxq_ctrl, rxq);
2183 
2184 		if (rxq == NULL || rxq_ctrl->type != MLX5_RXQ_TYPE_STANDARD)
2185 			continue;
2186 		rxq->mprq_mp = mp;
2187 	}
2188 	DRV_LOG(INFO, "port %u Multi-Packet RQ is configured",
2189 		dev->data->port_id);
2190 	return 0;
2191 }
2192 
2193 #define MLX5_MAX_TCP_HDR_OFFSET ((unsigned int)(sizeof(struct rte_ether_hdr) + \
2194 					sizeof(struct rte_vlan_hdr) * 2 + \
2195 					sizeof(struct rte_ipv6_hdr)))
2196 #define MAX_TCP_OPTION_SIZE 40u
2197 #define MLX5_MAX_LRO_HEADER_FIX ((unsigned int)(MLX5_MAX_TCP_HDR_OFFSET + \
2198 				 sizeof(struct rte_tcp_hdr) + \
2199 				 MAX_TCP_OPTION_SIZE))
2200 
2201 /**
2202  * Adjust the maximum LRO massage size.
2203  *
2204  * @param dev
2205  *   Pointer to Ethernet device.
2206  * @param idx
2207  *   RX queue index.
2208  * @param max_lro_size
2209  *   The maximum size for LRO packet.
2210  */
2211 static void
2212 mlx5_max_lro_msg_size_adjust(struct rte_eth_dev *dev, uint16_t idx,
2213 			     uint32_t max_lro_size)
2214 {
2215 	struct mlx5_priv *priv = dev->data->dev_private;
2216 
2217 	if (priv->config.hca_attr.lro_max_msg_sz_mode ==
2218 	    MLX5_LRO_MAX_MSG_SIZE_START_FROM_L4 && max_lro_size >
2219 	    MLX5_MAX_TCP_HDR_OFFSET)
2220 		max_lro_size -= MLX5_MAX_TCP_HDR_OFFSET;
2221 	max_lro_size = RTE_MIN(max_lro_size, MLX5_MAX_LRO_SIZE);
2222 	MLX5_ASSERT(max_lro_size >= MLX5_LRO_SEG_CHUNK_SIZE);
2223 	max_lro_size /= MLX5_LRO_SEG_CHUNK_SIZE;
2224 	if (priv->max_lro_msg_size)
2225 		priv->max_lro_msg_size =
2226 			RTE_MIN((uint32_t)priv->max_lro_msg_size, max_lro_size);
2227 	else
2228 		priv->max_lro_msg_size = max_lro_size;
2229 	DRV_LOG(DEBUG,
2230 		"port %u Rx Queue %u max LRO message size adjusted to %u bytes",
2231 		dev->data->port_id, idx,
2232 		priv->max_lro_msg_size * MLX5_LRO_SEG_CHUNK_SIZE);
2233 }
2234 
2235 /**
2236  * Create a DPDK Rx queue.
2237  *
2238  * @param dev
2239  *   Pointer to Ethernet device.
2240  * @param idx
2241  *   RX queue index.
2242  * @param desc
2243  *   Number of descriptors to configure in queue.
2244  * @param socket
2245  *   NUMA socket on which memory must be allocated.
2246  *
2247  * @return
2248  *   A DPDK queue object on success, NULL otherwise and rte_errno is set.
2249  */
2250 struct mlx5_rxq_ctrl *
2251 mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
2252 	     unsigned int socket, const struct rte_eth_rxconf *conf,
2253 	     struct rte_mempool *mp)
2254 {
2255 	struct mlx5_priv *priv = dev->data->dev_private;
2256 	struct mlx5_rxq_ctrl *tmpl;
2257 	unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
2258 	unsigned int mprq_stride_nums;
2259 	unsigned int mprq_stride_size;
2260 	unsigned int mprq_stride_cap;
2261 	struct mlx5_dev_config *config = &priv->config;
2262 	/*
2263 	 * Always allocate extra slots, even if eventually
2264 	 * the vector Rx will not be used.
2265 	 */
2266 	uint16_t desc_n =
2267 		desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
2268 	uint64_t offloads = conf->offloads |
2269 			   dev->data->dev_conf.rxmode.offloads;
2270 	unsigned int lro_on_queue = !!(offloads & DEV_RX_OFFLOAD_TCP_LRO);
2271 	const int mprq_en = mlx5_check_mprq_support(dev) > 0;
2272 	unsigned int max_rx_pkt_len = lro_on_queue ?
2273 			dev->data->dev_conf.rxmode.max_lro_pkt_size :
2274 			dev->data->dev_conf.rxmode.max_rx_pkt_len;
2275 	unsigned int non_scatter_min_mbuf_size = max_rx_pkt_len +
2276 							RTE_PKTMBUF_HEADROOM;
2277 	unsigned int max_lro_size = 0;
2278 	unsigned int first_mb_free_size = mb_len - RTE_PKTMBUF_HEADROOM;
2279 
2280 	if (non_scatter_min_mbuf_size > mb_len && !(offloads &
2281 						    DEV_RX_OFFLOAD_SCATTER)) {
2282 		DRV_LOG(ERR, "port %u Rx queue %u: Scatter offload is not"
2283 			" configured and no enough mbuf space(%u) to contain "
2284 			"the maximum RX packet length(%u) with head-room(%u)",
2285 			dev->data->port_id, idx, mb_len, max_rx_pkt_len,
2286 			RTE_PKTMBUF_HEADROOM);
2287 		rte_errno = ENOSPC;
2288 		return NULL;
2289 	}
2290 	tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl) +
2291 			   desc_n * sizeof(struct rte_mbuf *), 0, socket);
2292 	if (!tmpl) {
2293 		rte_errno = ENOMEM;
2294 		return NULL;
2295 	}
2296 	tmpl->type = MLX5_RXQ_TYPE_STANDARD;
2297 	if (mlx5_mr_btree_init(&tmpl->rxq.mr_ctrl.cache_bh,
2298 			       MLX5_MR_BTREE_CACHE_N, socket)) {
2299 		/* rte_errno is already set. */
2300 		goto error;
2301 	}
2302 	tmpl->socket = socket;
2303 	if (dev->data->dev_conf.intr_conf.rxq)
2304 		tmpl->irq = 1;
2305 	mprq_stride_nums = config->mprq.stride_num_n ?
2306 		config->mprq.stride_num_n : MLX5_MPRQ_STRIDE_NUM_N;
2307 	mprq_stride_size = non_scatter_min_mbuf_size <=
2308 		(1U << config->mprq.max_stride_size_n) ?
2309 		log2above(non_scatter_min_mbuf_size) : MLX5_MPRQ_STRIDE_SIZE_N;
2310 	mprq_stride_cap = (config->mprq.stride_num_n ?
2311 		(1U << config->mprq.stride_num_n) : (1U << mprq_stride_nums)) *
2312 			(config->mprq.stride_size_n ?
2313 		(1U << config->mprq.stride_size_n) : (1U << mprq_stride_size));
2314 	/*
2315 	 * This Rx queue can be configured as a Multi-Packet RQ if all of the
2316 	 * following conditions are met:
2317 	 *  - MPRQ is enabled.
2318 	 *  - The number of descs is more than the number of strides.
2319 	 *  - max_rx_pkt_len plus overhead is less than the max size
2320 	 *    of a stride or mprq_stride_size is specified by a user.
2321 	 *    Need to nake sure that there are enough stides to encap
2322 	 *    the maximum packet size in case mprq_stride_size is set.
2323 	 *  Otherwise, enable Rx scatter if necessary.
2324 	 */
2325 	if (mprq_en && desc > (1U << mprq_stride_nums) &&
2326 	    (non_scatter_min_mbuf_size <=
2327 	     (1U << config->mprq.max_stride_size_n) ||
2328 	     (config->mprq.stride_size_n &&
2329 	      non_scatter_min_mbuf_size <= mprq_stride_cap))) {
2330 		/* TODO: Rx scatter isn't supported yet. */
2331 		tmpl->rxq.sges_n = 0;
2332 		/* Trim the number of descs needed. */
2333 		desc >>= mprq_stride_nums;
2334 		tmpl->rxq.strd_num_n = config->mprq.stride_num_n ?
2335 			config->mprq.stride_num_n : mprq_stride_nums;
2336 		tmpl->rxq.strd_sz_n = config->mprq.stride_size_n ?
2337 			config->mprq.stride_size_n : mprq_stride_size;
2338 		tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT;
2339 		tmpl->rxq.strd_scatter_en =
2340 				!!(offloads & DEV_RX_OFFLOAD_SCATTER);
2341 		tmpl->rxq.mprq_max_memcpy_len = RTE_MIN(first_mb_free_size,
2342 				config->mprq.max_memcpy_len);
2343 		max_lro_size = RTE_MIN(max_rx_pkt_len,
2344 				       (1u << tmpl->rxq.strd_num_n) *
2345 				       (1u << tmpl->rxq.strd_sz_n));
2346 		DRV_LOG(DEBUG,
2347 			"port %u Rx queue %u: Multi-Packet RQ is enabled"
2348 			" strd_num_n = %u, strd_sz_n = %u",
2349 			dev->data->port_id, idx,
2350 			tmpl->rxq.strd_num_n, tmpl->rxq.strd_sz_n);
2351 	} else if (max_rx_pkt_len <= first_mb_free_size) {
2352 		tmpl->rxq.sges_n = 0;
2353 		max_lro_size = max_rx_pkt_len;
2354 	} else if (offloads & DEV_RX_OFFLOAD_SCATTER) {
2355 		unsigned int size = non_scatter_min_mbuf_size;
2356 		unsigned int sges_n;
2357 
2358 		if (lro_on_queue && first_mb_free_size <
2359 		    MLX5_MAX_LRO_HEADER_FIX) {
2360 			DRV_LOG(ERR, "Not enough space in the first segment(%u)"
2361 				" to include the max header size(%u) for LRO",
2362 				first_mb_free_size, MLX5_MAX_LRO_HEADER_FIX);
2363 			rte_errno = ENOTSUP;
2364 			goto error;
2365 		}
2366 		/*
2367 		 * Determine the number of SGEs needed for a full packet
2368 		 * and round it to the next power of two.
2369 		 */
2370 		sges_n = log2above((size / mb_len) + !!(size % mb_len));
2371 		if (sges_n > MLX5_MAX_LOG_RQ_SEGS) {
2372 			DRV_LOG(ERR,
2373 				"port %u too many SGEs (%u) needed to handle"
2374 				" requested maximum packet size %u, the maximum"
2375 				" supported are %u", dev->data->port_id,
2376 				1 << sges_n, max_rx_pkt_len,
2377 				1u << MLX5_MAX_LOG_RQ_SEGS);
2378 			rte_errno = ENOTSUP;
2379 			goto error;
2380 		}
2381 		tmpl->rxq.sges_n = sges_n;
2382 		max_lro_size = max_rx_pkt_len;
2383 	}
2384 	if (config->mprq.enabled && !mlx5_rxq_mprq_enabled(&tmpl->rxq))
2385 		DRV_LOG(WARNING,
2386 			"port %u MPRQ is requested but cannot be enabled\n"
2387 			" (requested: pkt_sz = %u, desc_num = %u,"
2388 			" rxq_num = %u, stride_sz = %u, stride_num = %u\n"
2389 			"  supported: min_rxqs_num = %u,"
2390 			" min_stride_sz = %u, max_stride_sz = %u).",
2391 			dev->data->port_id, non_scatter_min_mbuf_size,
2392 			desc, priv->rxqs_n,
2393 			config->mprq.stride_size_n ?
2394 				(1U << config->mprq.stride_size_n) :
2395 				(1U << mprq_stride_size),
2396 			config->mprq.stride_num_n ?
2397 				(1U << config->mprq.stride_num_n) :
2398 				(1U << mprq_stride_nums),
2399 			config->mprq.min_rxqs_num,
2400 			(1U << config->mprq.min_stride_size_n),
2401 			(1U << config->mprq.max_stride_size_n));
2402 	DRV_LOG(DEBUG, "port %u maximum number of segments per packet: %u",
2403 		dev->data->port_id, 1 << tmpl->rxq.sges_n);
2404 	if (desc % (1 << tmpl->rxq.sges_n)) {
2405 		DRV_LOG(ERR,
2406 			"port %u number of Rx queue descriptors (%u) is not a"
2407 			" multiple of SGEs per packet (%u)",
2408 			dev->data->port_id,
2409 			desc,
2410 			1 << tmpl->rxq.sges_n);
2411 		rte_errno = EINVAL;
2412 		goto error;
2413 	}
2414 	mlx5_max_lro_msg_size_adjust(dev, idx, max_lro_size);
2415 	/* Toggle RX checksum offload if hardware supports it. */
2416 	tmpl->rxq.csum = !!(offloads & DEV_RX_OFFLOAD_CHECKSUM);
2417 	tmpl->rxq.hw_timestamp = !!(offloads & DEV_RX_OFFLOAD_TIMESTAMP);
2418 	/* Configure VLAN stripping. */
2419 	tmpl->rxq.vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
2420 	/* By default, FCS (CRC) is stripped by hardware. */
2421 	tmpl->rxq.crc_present = 0;
2422 	tmpl->rxq.lro = lro_on_queue;
2423 	if (offloads & DEV_RX_OFFLOAD_KEEP_CRC) {
2424 		if (config->hw_fcs_strip) {
2425 			/*
2426 			 * RQs used for LRO-enabled TIRs should not be
2427 			 * configured to scatter the FCS.
2428 			 */
2429 			if (lro_on_queue)
2430 				DRV_LOG(WARNING,
2431 					"port %u CRC stripping has been "
2432 					"disabled but will still be performed "
2433 					"by hardware, because LRO is enabled",
2434 					dev->data->port_id);
2435 			else
2436 				tmpl->rxq.crc_present = 1;
2437 		} else {
2438 			DRV_LOG(WARNING,
2439 				"port %u CRC stripping has been disabled but will"
2440 				" still be performed by hardware, make sure MLNX_OFED"
2441 				" and firmware are up to date",
2442 				dev->data->port_id);
2443 		}
2444 	}
2445 	DRV_LOG(DEBUG,
2446 		"port %u CRC stripping is %s, %u bytes will be subtracted from"
2447 		" incoming frames to hide it",
2448 		dev->data->port_id,
2449 		tmpl->rxq.crc_present ? "disabled" : "enabled",
2450 		tmpl->rxq.crc_present << 2);
2451 	/* Save port ID. */
2452 	tmpl->rxq.rss_hash = !!priv->rss_conf.rss_hf &&
2453 		(!!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS));
2454 	tmpl->rxq.port_id = dev->data->port_id;
2455 	tmpl->priv = priv;
2456 	tmpl->rxq.mp = mp;
2457 	tmpl->rxq.elts_n = log2above(desc);
2458 	tmpl->rxq.rq_repl_thresh =
2459 		MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n);
2460 	tmpl->rxq.elts =
2461 		(struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
2462 #ifndef RTE_ARCH_64
2463 	tmpl->rxq.uar_lock_cq = &priv->sh->uar_lock_cq;
2464 #endif
2465 	tmpl->rxq.idx = idx;
2466 	rte_atomic32_inc(&tmpl->refcnt);
2467 	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
2468 	return tmpl;
2469 error:
2470 	mlx5_free(tmpl);
2471 	return NULL;
2472 }
2473 
2474 /**
2475  * Create a DPDK Rx hairpin queue.
2476  *
2477  * @param dev
2478  *   Pointer to Ethernet device.
2479  * @param idx
2480  *   RX queue index.
2481  * @param desc
2482  *   Number of descriptors to configure in queue.
2483  * @param hairpin_conf
2484  *   The hairpin binding configuration.
2485  *
2486  * @return
2487  *   A DPDK queue object on success, NULL otherwise and rte_errno is set.
2488  */
2489 struct mlx5_rxq_ctrl *
2490 mlx5_rxq_hairpin_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
2491 		     const struct rte_eth_hairpin_conf *hairpin_conf)
2492 {
2493 	struct mlx5_priv *priv = dev->data->dev_private;
2494 	struct mlx5_rxq_ctrl *tmpl;
2495 
2496 	tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl), 0,
2497 			   SOCKET_ID_ANY);
2498 	if (!tmpl) {
2499 		rte_errno = ENOMEM;
2500 		return NULL;
2501 	}
2502 	tmpl->type = MLX5_RXQ_TYPE_HAIRPIN;
2503 	tmpl->socket = SOCKET_ID_ANY;
2504 	tmpl->rxq.rss_hash = 0;
2505 	tmpl->rxq.port_id = dev->data->port_id;
2506 	tmpl->priv = priv;
2507 	tmpl->rxq.mp = NULL;
2508 	tmpl->rxq.elts_n = log2above(desc);
2509 	tmpl->rxq.elts = NULL;
2510 	tmpl->rxq.mr_ctrl.cache_bh = (struct mlx5_mr_btree) { 0 };
2511 	tmpl->hairpin_conf = *hairpin_conf;
2512 	tmpl->rxq.idx = idx;
2513 	rte_atomic32_inc(&tmpl->refcnt);
2514 	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
2515 	return tmpl;
2516 }
2517 
2518 /**
2519  * Get a Rx queue.
2520  *
2521  * @param dev
2522  *   Pointer to Ethernet device.
2523  * @param idx
2524  *   RX queue index.
2525  *
2526  * @return
2527  *   A pointer to the queue if it exists, NULL otherwise.
2528  */
2529 struct mlx5_rxq_ctrl *
2530 mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx)
2531 {
2532 	struct mlx5_priv *priv = dev->data->dev_private;
2533 	struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
2534 
2535 	if ((*priv->rxqs)[idx]) {
2536 		rxq_ctrl = container_of((*priv->rxqs)[idx],
2537 					struct mlx5_rxq_ctrl,
2538 					rxq);
2539 		mlx5_rxq_obj_get(dev, idx);
2540 		rte_atomic32_inc(&rxq_ctrl->refcnt);
2541 	}
2542 	return rxq_ctrl;
2543 }
2544 
2545 /**
2546  * Release a Rx queue.
2547  *
2548  * @param dev
2549  *   Pointer to Ethernet device.
2550  * @param idx
2551  *   RX queue index.
2552  *
2553  * @return
2554  *   1 while a reference on it exists, 0 when freed.
2555  */
2556 int
2557 mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx)
2558 {
2559 	struct mlx5_priv *priv = dev->data->dev_private;
2560 	struct mlx5_rxq_ctrl *rxq_ctrl;
2561 
2562 	if (!(*priv->rxqs)[idx])
2563 		return 0;
2564 	rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
2565 	MLX5_ASSERT(rxq_ctrl->priv);
2566 	if (rxq_ctrl->obj && !mlx5_rxq_obj_release(rxq_ctrl->obj))
2567 		rxq_ctrl->obj = NULL;
2568 	if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) {
2569 		if (rxq_ctrl->rq_dbr_umem_id_valid)
2570 			claim_zero(mlx5_release_dbr(&priv->dbrpgs,
2571 						    rxq_ctrl->rq_dbr_umem_id,
2572 						    rxq_ctrl->rq_dbr_offset));
2573 		if (rxq_ctrl->cq_dbr_umem_id_valid)
2574 			claim_zero(mlx5_release_dbr(&priv->dbrpgs,
2575 						    rxq_ctrl->cq_dbr_umem_id,
2576 						    rxq_ctrl->cq_dbr_offset));
2577 		if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD)
2578 			mlx5_mr_btree_free(&rxq_ctrl->rxq.mr_ctrl.cache_bh);
2579 		LIST_REMOVE(rxq_ctrl, next);
2580 		mlx5_free(rxq_ctrl);
2581 		(*priv->rxqs)[idx] = NULL;
2582 		return 0;
2583 	}
2584 	return 1;
2585 }
2586 
2587 /**
2588  * Verify the Rx Queue list is empty
2589  *
2590  * @param dev
2591  *   Pointer to Ethernet device.
2592  *
2593  * @return
2594  *   The number of object not released.
2595  */
2596 int
2597 mlx5_rxq_verify(struct rte_eth_dev *dev)
2598 {
2599 	struct mlx5_priv *priv = dev->data->dev_private;
2600 	struct mlx5_rxq_ctrl *rxq_ctrl;
2601 	int ret = 0;
2602 
2603 	LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
2604 		DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced",
2605 			dev->data->port_id, rxq_ctrl->rxq.idx);
2606 		++ret;
2607 	}
2608 	return ret;
2609 }
2610 
2611 /**
2612  * Get a Rx queue type.
2613  *
2614  * @param dev
2615  *   Pointer to Ethernet device.
2616  * @param idx
2617  *   Rx queue index.
2618  *
2619  * @return
2620  *   The Rx queue type.
2621  */
2622 enum mlx5_rxq_type
2623 mlx5_rxq_get_type(struct rte_eth_dev *dev, uint16_t idx)
2624 {
2625 	struct mlx5_priv *priv = dev->data->dev_private;
2626 	struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
2627 
2628 	if (idx < priv->rxqs_n && (*priv->rxqs)[idx]) {
2629 		rxq_ctrl = container_of((*priv->rxqs)[idx],
2630 					struct mlx5_rxq_ctrl,
2631 					rxq);
2632 		return rxq_ctrl->type;
2633 	}
2634 	return MLX5_RXQ_TYPE_UNDEFINED;
2635 }
2636 
2637 /**
2638  * Create an indirection table.
2639  *
2640  * @param dev
2641  *   Pointer to Ethernet device.
2642  * @param queues
2643  *   Queues entering in the indirection table.
2644  * @param queues_n
2645  *   Number of queues in the array.
2646  *
2647  * @return
2648  *   The Verbs/DevX object initialised, NULL otherwise and rte_errno is set.
2649  */
2650 static struct mlx5_ind_table_obj *
2651 mlx5_ind_table_obj_new(struct rte_eth_dev *dev, const uint16_t *queues,
2652 		       uint32_t queues_n, enum mlx5_ind_tbl_type type)
2653 {
2654 	struct mlx5_priv *priv = dev->data->dev_private;
2655 	struct mlx5_ind_table_obj *ind_tbl;
2656 	unsigned int i = 0, j = 0, k = 0;
2657 
2658 	ind_tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*ind_tbl) +
2659 			      queues_n * sizeof(uint16_t), 0, SOCKET_ID_ANY);
2660 	if (!ind_tbl) {
2661 		rte_errno = ENOMEM;
2662 		return NULL;
2663 	}
2664 	ind_tbl->type = type;
2665 	if (ind_tbl->type == MLX5_IND_TBL_TYPE_IBV) {
2666 		const unsigned int wq_n = rte_is_power_of_2(queues_n) ?
2667 			log2above(queues_n) :
2668 			log2above(priv->config.ind_table_max_size);
2669 		struct ibv_wq *wq[1 << wq_n];
2670 
2671 		for (i = 0; i != queues_n; ++i) {
2672 			struct mlx5_rxq_ctrl *rxq = mlx5_rxq_get(dev,
2673 								 queues[i]);
2674 			if (!rxq)
2675 				goto error;
2676 			wq[i] = rxq->obj->wq;
2677 			ind_tbl->queues[i] = queues[i];
2678 		}
2679 		ind_tbl->queues_n = queues_n;
2680 		/* Finalise indirection table. */
2681 		k = i; /* Retain value of i for use in error case. */
2682 		for (j = 0; k != (unsigned int)(1 << wq_n); ++k, ++j)
2683 			wq[k] = wq[j];
2684 		ind_tbl->ind_table = mlx5_glue->create_rwq_ind_table
2685 			(priv->sh->ctx,
2686 			 &(struct ibv_rwq_ind_table_init_attr){
2687 				.log_ind_tbl_size = wq_n,
2688 				.ind_tbl = wq,
2689 				.comp_mask = 0,
2690 			});
2691 		if (!ind_tbl->ind_table) {
2692 			rte_errno = errno;
2693 			goto error;
2694 		}
2695 	} else { /* ind_tbl->type == MLX5_IND_TBL_TYPE_DEVX */
2696 		struct mlx5_devx_rqt_attr *rqt_attr = NULL;
2697 		const unsigned int rqt_n =
2698 			1 << (rte_is_power_of_2(queues_n) ?
2699 			      log2above(queues_n) :
2700 			      log2above(priv->config.ind_table_max_size));
2701 
2702 		rqt_attr = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rqt_attr) +
2703 				      rqt_n * sizeof(uint32_t), 0,
2704 				      SOCKET_ID_ANY);
2705 		if (!rqt_attr) {
2706 			DRV_LOG(ERR, "port %u cannot allocate RQT resources",
2707 				dev->data->port_id);
2708 			rte_errno = ENOMEM;
2709 			goto error;
2710 		}
2711 		rqt_attr->rqt_max_size = priv->config.ind_table_max_size;
2712 		rqt_attr->rqt_actual_size = rqt_n;
2713 		for (i = 0; i != queues_n; ++i) {
2714 			struct mlx5_rxq_ctrl *rxq = mlx5_rxq_get(dev,
2715 								 queues[i]);
2716 			if (!rxq)
2717 				goto error;
2718 			rqt_attr->rq_list[i] = rxq->obj->rq->id;
2719 			ind_tbl->queues[i] = queues[i];
2720 		}
2721 		k = i; /* Retain value of i for use in error case. */
2722 		for (j = 0; k != rqt_n; ++k, ++j)
2723 			rqt_attr->rq_list[k] = rqt_attr->rq_list[j];
2724 		ind_tbl->rqt = mlx5_devx_cmd_create_rqt(priv->sh->ctx,
2725 							rqt_attr);
2726 		mlx5_free(rqt_attr);
2727 		if (!ind_tbl->rqt) {
2728 			DRV_LOG(ERR, "port %u cannot create DevX RQT",
2729 				dev->data->port_id);
2730 			rte_errno = errno;
2731 			goto error;
2732 		}
2733 		ind_tbl->queues_n = queues_n;
2734 	}
2735 	rte_atomic32_inc(&ind_tbl->refcnt);
2736 	LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
2737 	return ind_tbl;
2738 error:
2739 	for (j = 0; j < i; j++)
2740 		mlx5_rxq_release(dev, ind_tbl->queues[j]);
2741 	mlx5_free(ind_tbl);
2742 	DEBUG("port %u cannot create indirection table", dev->data->port_id);
2743 	return NULL;
2744 }
2745 
2746 /**
2747  * Get an indirection table.
2748  *
2749  * @param dev
2750  *   Pointer to Ethernet device.
2751  * @param queues
2752  *   Queues entering in the indirection table.
2753  * @param queues_n
2754  *   Number of queues in the array.
2755  *
2756  * @return
2757  *   An indirection table if found.
2758  */
2759 static struct mlx5_ind_table_obj *
2760 mlx5_ind_table_obj_get(struct rte_eth_dev *dev, const uint16_t *queues,
2761 		       uint32_t queues_n)
2762 {
2763 	struct mlx5_priv *priv = dev->data->dev_private;
2764 	struct mlx5_ind_table_obj *ind_tbl;
2765 
2766 	LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
2767 		if ((ind_tbl->queues_n == queues_n) &&
2768 		    (memcmp(ind_tbl->queues, queues,
2769 			    ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))
2770 		     == 0))
2771 			break;
2772 	}
2773 	if (ind_tbl) {
2774 		unsigned int i;
2775 
2776 		rte_atomic32_inc(&ind_tbl->refcnt);
2777 		for (i = 0; i != ind_tbl->queues_n; ++i)
2778 			mlx5_rxq_get(dev, ind_tbl->queues[i]);
2779 	}
2780 	return ind_tbl;
2781 }
2782 
2783 /**
2784  * Release an indirection table.
2785  *
2786  * @param dev
2787  *   Pointer to Ethernet device.
2788  * @param ind_table
2789  *   Indirection table to release.
2790  *
2791  * @return
2792  *   1 while a reference on it exists, 0 when freed.
2793  */
2794 static int
2795 mlx5_ind_table_obj_release(struct rte_eth_dev *dev,
2796 			   struct mlx5_ind_table_obj *ind_tbl)
2797 {
2798 	unsigned int i;
2799 
2800 	if (rte_atomic32_dec_and_test(&ind_tbl->refcnt)) {
2801 		if (ind_tbl->type == MLX5_IND_TBL_TYPE_IBV)
2802 			claim_zero(mlx5_glue->destroy_rwq_ind_table
2803 							(ind_tbl->ind_table));
2804 		else if (ind_tbl->type == MLX5_IND_TBL_TYPE_DEVX)
2805 			claim_zero(mlx5_devx_cmd_destroy(ind_tbl->rqt));
2806 	}
2807 	for (i = 0; i != ind_tbl->queues_n; ++i)
2808 		claim_nonzero(mlx5_rxq_release(dev, ind_tbl->queues[i]));
2809 	if (!rte_atomic32_read(&ind_tbl->refcnt)) {
2810 		LIST_REMOVE(ind_tbl, next);
2811 		mlx5_free(ind_tbl);
2812 		return 0;
2813 	}
2814 	return 1;
2815 }
2816 
2817 /**
2818  * Verify the Rx Queue list is empty
2819  *
2820  * @param dev
2821  *   Pointer to Ethernet device.
2822  *
2823  * @return
2824  *   The number of object not released.
2825  */
2826 int
2827 mlx5_ind_table_obj_verify(struct rte_eth_dev *dev)
2828 {
2829 	struct mlx5_priv *priv = dev->data->dev_private;
2830 	struct mlx5_ind_table_obj *ind_tbl;
2831 	int ret = 0;
2832 
2833 	LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
2834 		DRV_LOG(DEBUG,
2835 			"port %u indirection table obj %p still referenced",
2836 			dev->data->port_id, (void *)ind_tbl);
2837 		++ret;
2838 	}
2839 	return ret;
2840 }
2841 
2842 /**
2843  * Create an Rx Hash queue.
2844  *
2845  * @param dev
2846  *   Pointer to Ethernet device.
2847  * @param rss_key
2848  *   RSS key for the Rx hash queue.
2849  * @param rss_key_len
2850  *   RSS key length.
2851  * @param hash_fields
2852  *   Verbs protocol hash field to make the RSS on.
2853  * @param queues
2854  *   Queues entering in hash queue. In case of empty hash_fields only the
2855  *   first queue index will be taken for the indirection table.
2856  * @param queues_n
2857  *   Number of queues.
2858  * @param tunnel
2859  *   Tunnel type.
2860  *
2861  * @return
2862  *   The Verbs/DevX object initialised index, 0 otherwise and rte_errno is set.
2863  */
2864 uint32_t
2865 mlx5_hrxq_new(struct rte_eth_dev *dev,
2866 	      const uint8_t *rss_key, uint32_t rss_key_len,
2867 	      uint64_t hash_fields,
2868 	      const uint16_t *queues, uint32_t queues_n,
2869 	      int tunnel __rte_unused)
2870 {
2871 	struct mlx5_priv *priv = dev->data->dev_private;
2872 	struct mlx5_hrxq *hrxq;
2873 	uint32_t hrxq_idx = 0;
2874 	struct ibv_qp *qp = NULL;
2875 	struct mlx5_ind_table_obj *ind_tbl;
2876 	int err;
2877 	struct mlx5_devx_obj *tir = NULL;
2878 	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[queues[0]];
2879 	struct mlx5_rxq_ctrl *rxq_ctrl =
2880 		container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2881 
2882 	queues_n = hash_fields ? queues_n : 1;
2883 	ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n);
2884 	if (!ind_tbl) {
2885 		enum mlx5_ind_tbl_type type;
2886 
2887 		type = rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_IBV ?
2888 				MLX5_IND_TBL_TYPE_IBV : MLX5_IND_TBL_TYPE_DEVX;
2889 		ind_tbl = mlx5_ind_table_obj_new(dev, queues, queues_n, type);
2890 	}
2891 	if (!ind_tbl) {
2892 		rte_errno = ENOMEM;
2893 		return 0;
2894 	}
2895 	if (ind_tbl->type == MLX5_IND_TBL_TYPE_IBV) {
2896 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
2897 		struct mlx5dv_qp_init_attr qp_init_attr;
2898 
2899 		memset(&qp_init_attr, 0, sizeof(qp_init_attr));
2900 		if (tunnel) {
2901 			qp_init_attr.comp_mask =
2902 				MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
2903 			qp_init_attr.create_flags =
2904 				MLX5DV_QP_CREATE_TUNNEL_OFFLOADS;
2905 		}
2906 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
2907 		if (dev->data->dev_conf.lpbk_mode) {
2908 			/*
2909 			 * Allow packet sent from NIC loop back
2910 			 * w/o source MAC check.
2911 			 */
2912 			qp_init_attr.comp_mask |=
2913 				MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
2914 			qp_init_attr.create_flags |=
2915 				MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC;
2916 		}
2917 #endif
2918 		qp = mlx5_glue->dv_create_qp
2919 			(priv->sh->ctx,
2920 			 &(struct ibv_qp_init_attr_ex){
2921 				.qp_type = IBV_QPT_RAW_PACKET,
2922 				.comp_mask =
2923 					IBV_QP_INIT_ATTR_PD |
2924 					IBV_QP_INIT_ATTR_IND_TABLE |
2925 					IBV_QP_INIT_ATTR_RX_HASH,
2926 				.rx_hash_conf = (struct ibv_rx_hash_conf){
2927 					.rx_hash_function =
2928 						IBV_RX_HASH_FUNC_TOEPLITZ,
2929 					.rx_hash_key_len = rss_key_len,
2930 					.rx_hash_key =
2931 						(void *)(uintptr_t)rss_key,
2932 					.rx_hash_fields_mask = hash_fields,
2933 				},
2934 				.rwq_ind_tbl = ind_tbl->ind_table,
2935 				.pd = priv->sh->pd,
2936 			  },
2937 			  &qp_init_attr);
2938 #else
2939 		qp = mlx5_glue->create_qp_ex
2940 			(priv->sh->ctx,
2941 			 &(struct ibv_qp_init_attr_ex){
2942 				.qp_type = IBV_QPT_RAW_PACKET,
2943 				.comp_mask =
2944 					IBV_QP_INIT_ATTR_PD |
2945 					IBV_QP_INIT_ATTR_IND_TABLE |
2946 					IBV_QP_INIT_ATTR_RX_HASH,
2947 				.rx_hash_conf = (struct ibv_rx_hash_conf){
2948 					.rx_hash_function =
2949 						IBV_RX_HASH_FUNC_TOEPLITZ,
2950 					.rx_hash_key_len = rss_key_len,
2951 					.rx_hash_key =
2952 						(void *)(uintptr_t)rss_key,
2953 					.rx_hash_fields_mask = hash_fields,
2954 				},
2955 				.rwq_ind_tbl = ind_tbl->ind_table,
2956 				.pd = priv->sh->pd,
2957 			 });
2958 #endif
2959 		if (!qp) {
2960 			rte_errno = errno;
2961 			goto error;
2962 		}
2963 	} else { /* ind_tbl->type == MLX5_IND_TBL_TYPE_DEVX */
2964 		struct mlx5_devx_tir_attr tir_attr;
2965 		uint32_t i;
2966 		uint32_t lro = 1;
2967 
2968 		/* Enable TIR LRO only if all the queues were configured for. */
2969 		for (i = 0; i < queues_n; ++i) {
2970 			if (!(*priv->rxqs)[queues[i]]->lro) {
2971 				lro = 0;
2972 				break;
2973 			}
2974 		}
2975 		memset(&tir_attr, 0, sizeof(tir_attr));
2976 		tir_attr.disp_type = MLX5_TIRC_DISP_TYPE_INDIRECT;
2977 		tir_attr.rx_hash_fn = MLX5_RX_HASH_FN_TOEPLITZ;
2978 		tir_attr.tunneled_offload_en = !!tunnel;
2979 		/* If needed, translate hash_fields bitmap to PRM format. */
2980 		if (hash_fields) {
2981 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
2982 			struct mlx5_rx_hash_field_select *rx_hash_field_select =
2983 					hash_fields & IBV_RX_HASH_INNER ?
2984 					&tir_attr.rx_hash_field_selector_inner :
2985 					&tir_attr.rx_hash_field_selector_outer;
2986 #else
2987 			struct mlx5_rx_hash_field_select *rx_hash_field_select =
2988 					&tir_attr.rx_hash_field_selector_outer;
2989 #endif
2990 
2991 			/* 1 bit: 0: IPv4, 1: IPv6. */
2992 			rx_hash_field_select->l3_prot_type =
2993 				!!(hash_fields & MLX5_IPV6_IBV_RX_HASH);
2994 			/* 1 bit: 0: TCP, 1: UDP. */
2995 			rx_hash_field_select->l4_prot_type =
2996 				!!(hash_fields & MLX5_UDP_IBV_RX_HASH);
2997 			/* Bitmask which sets which fields to use in RX Hash. */
2998 			rx_hash_field_select->selected_fields =
2999 			((!!(hash_fields & MLX5_L3_SRC_IBV_RX_HASH)) <<
3000 			 MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_SRC_IP) |
3001 			(!!(hash_fields & MLX5_L3_DST_IBV_RX_HASH)) <<
3002 			 MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_DST_IP |
3003 			(!!(hash_fields & MLX5_L4_SRC_IBV_RX_HASH)) <<
3004 			 MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_L4_SPORT |
3005 			(!!(hash_fields & MLX5_L4_DST_IBV_RX_HASH)) <<
3006 			 MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_L4_DPORT;
3007 		}
3008 		if (rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_DEVX_HAIRPIN)
3009 			tir_attr.transport_domain = priv->sh->td->id;
3010 		else
3011 			tir_attr.transport_domain = priv->sh->tdn;
3012 		memcpy(tir_attr.rx_hash_toeplitz_key, rss_key,
3013 		       MLX5_RSS_HASH_KEY_LEN);
3014 		tir_attr.indirect_table = ind_tbl->rqt->id;
3015 		if (dev->data->dev_conf.lpbk_mode)
3016 			tir_attr.self_lb_block =
3017 					MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
3018 		if (lro) {
3019 			tir_attr.lro_timeout_period_usecs =
3020 					priv->config.lro.timeout;
3021 			tir_attr.lro_max_msg_sz = priv->max_lro_msg_size;
3022 			tir_attr.lro_enable_mask =
3023 					MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
3024 					MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO;
3025 		}
3026 		tir = mlx5_devx_cmd_create_tir(priv->sh->ctx, &tir_attr);
3027 		if (!tir) {
3028 			DRV_LOG(ERR, "port %u cannot create DevX TIR",
3029 				dev->data->port_id);
3030 			rte_errno = errno;
3031 			goto error;
3032 		}
3033 	}
3034 	hrxq = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_HRXQ], &hrxq_idx);
3035 	if (!hrxq)
3036 		goto error;
3037 	hrxq->ind_table = ind_tbl;
3038 	if (ind_tbl->type == MLX5_IND_TBL_TYPE_IBV) {
3039 		hrxq->qp = qp;
3040 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
3041 		hrxq->action =
3042 			mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);
3043 		if (!hrxq->action) {
3044 			rte_errno = errno;
3045 			goto error;
3046 		}
3047 #endif
3048 	} else { /* ind_tbl->type == MLX5_IND_TBL_TYPE_DEVX */
3049 		hrxq->tir = tir;
3050 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
3051 		hrxq->action = mlx5_glue->dv_create_flow_action_dest_devx_tir
3052 							(hrxq->tir->obj);
3053 		if (!hrxq->action) {
3054 			rte_errno = errno;
3055 			goto error;
3056 		}
3057 #endif
3058 	}
3059 	hrxq->rss_key_len = rss_key_len;
3060 	hrxq->hash_fields = hash_fields;
3061 	memcpy(hrxq->rss_key, rss_key, rss_key_len);
3062 	rte_atomic32_inc(&hrxq->refcnt);
3063 	ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_HRXQ], &priv->hrxqs, hrxq_idx,
3064 		     hrxq, next);
3065 	return hrxq_idx;
3066 error:
3067 	err = rte_errno; /* Save rte_errno before cleanup. */
3068 	mlx5_ind_table_obj_release(dev, ind_tbl);
3069 	if (qp)
3070 		claim_zero(mlx5_glue->destroy_qp(qp));
3071 	else if (tir)
3072 		claim_zero(mlx5_devx_cmd_destroy(tir));
3073 	rte_errno = err; /* Restore rte_errno. */
3074 	return 0;
3075 }
3076 
3077 /**
3078  * Get an Rx Hash queue.
3079  *
3080  * @param dev
3081  *   Pointer to Ethernet device.
3082  * @param rss_conf
3083  *   RSS configuration for the Rx hash queue.
3084  * @param queues
3085  *   Queues entering in hash queue. In case of empty hash_fields only the
3086  *   first queue index will be taken for the indirection table.
3087  * @param queues_n
3088  *   Number of queues.
3089  *
3090  * @return
3091  *   An hash Rx queue index on success.
3092  */
3093 uint32_t
3094 mlx5_hrxq_get(struct rte_eth_dev *dev,
3095 	      const uint8_t *rss_key, uint32_t rss_key_len,
3096 	      uint64_t hash_fields,
3097 	      const uint16_t *queues, uint32_t queues_n)
3098 {
3099 	struct mlx5_priv *priv = dev->data->dev_private;
3100 	struct mlx5_hrxq *hrxq;
3101 	uint32_t idx;
3102 
3103 	queues_n = hash_fields ? queues_n : 1;
3104 	ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_HRXQ], priv->hrxqs, idx,
3105 		      hrxq, next) {
3106 		struct mlx5_ind_table_obj *ind_tbl;
3107 
3108 		if (hrxq->rss_key_len != rss_key_len)
3109 			continue;
3110 		if (memcmp(hrxq->rss_key, rss_key, rss_key_len))
3111 			continue;
3112 		if (hrxq->hash_fields != hash_fields)
3113 			continue;
3114 		ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n);
3115 		if (!ind_tbl)
3116 			continue;
3117 		if (ind_tbl != hrxq->ind_table) {
3118 			mlx5_ind_table_obj_release(dev, ind_tbl);
3119 			continue;
3120 		}
3121 		rte_atomic32_inc(&hrxq->refcnt);
3122 		return idx;
3123 	}
3124 	return 0;
3125 }
3126 
3127 /**
3128  * Release the hash Rx queue.
3129  *
3130  * @param dev
3131  *   Pointer to Ethernet device.
3132  * @param hrxq
3133  *   Index to Hash Rx queue to release.
3134  *
3135  * @return
3136  *   1 while a reference on it exists, 0 when freed.
3137  */
3138 int
3139 mlx5_hrxq_release(struct rte_eth_dev *dev, uint32_t hrxq_idx)
3140 {
3141 	struct mlx5_priv *priv = dev->data->dev_private;
3142 	struct mlx5_hrxq *hrxq;
3143 
3144 	hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx);
3145 	if (!hrxq)
3146 		return 0;
3147 	if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
3148 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
3149 		mlx5_glue->destroy_flow_action(hrxq->action);
3150 #endif
3151 		if (hrxq->ind_table->type == MLX5_IND_TBL_TYPE_IBV)
3152 			claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
3153 		else /* hrxq->ind_table->type == MLX5_IND_TBL_TYPE_DEVX */
3154 			claim_zero(mlx5_devx_cmd_destroy(hrxq->tir));
3155 		mlx5_ind_table_obj_release(dev, hrxq->ind_table);
3156 		ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_HRXQ], &priv->hrxqs,
3157 			     hrxq_idx, hrxq, next);
3158 		mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx);
3159 		return 0;
3160 	}
3161 	claim_nonzero(mlx5_ind_table_obj_release(dev, hrxq->ind_table));
3162 	return 1;
3163 }
3164 
3165 /**
3166  * Verify the Rx Queue list is empty
3167  *
3168  * @param dev
3169  *   Pointer to Ethernet device.
3170  *
3171  * @return
3172  *   The number of object not released.
3173  */
3174 int
3175 mlx5_hrxq_verify(struct rte_eth_dev *dev)
3176 {
3177 	struct mlx5_priv *priv = dev->data->dev_private;
3178 	struct mlx5_hrxq *hrxq;
3179 	uint32_t idx;
3180 	int ret = 0;
3181 
3182 	ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_HRXQ], priv->hrxqs, idx,
3183 		      hrxq, next) {
3184 		DRV_LOG(DEBUG,
3185 			"port %u hash Rx queue %p still referenced",
3186 			dev->data->port_id, (void *)hrxq);
3187 		++ret;
3188 	}
3189 	return ret;
3190 }
3191 
3192 /**
3193  * Create a drop Rx queue Verbs/DevX object.
3194  *
3195  * @param dev
3196  *   Pointer to Ethernet device.
3197  *
3198  * @return
3199  *   The Verbs/DevX object initialised, NULL otherwise and rte_errno is set.
3200  */
3201 static struct mlx5_rxq_obj *
3202 mlx5_rxq_obj_drop_new(struct rte_eth_dev *dev)
3203 {
3204 	struct mlx5_priv *priv = dev->data->dev_private;
3205 	struct ibv_context *ctx = priv->sh->ctx;
3206 	struct ibv_cq *cq;
3207 	struct ibv_wq *wq = NULL;
3208 	struct mlx5_rxq_obj *rxq;
3209 
3210 	if (priv->drop_queue.rxq)
3211 		return priv->drop_queue.rxq;
3212 	cq = mlx5_glue->create_cq(ctx, 1, NULL, NULL, 0);
3213 	if (!cq) {
3214 		DEBUG("port %u cannot allocate CQ for drop queue",
3215 		      dev->data->port_id);
3216 		rte_errno = errno;
3217 		goto error;
3218 	}
3219 	wq = mlx5_glue->create_wq(ctx,
3220 		 &(struct ibv_wq_init_attr){
3221 			.wq_type = IBV_WQT_RQ,
3222 			.max_wr = 1,
3223 			.max_sge = 1,
3224 			.pd = priv->sh->pd,
3225 			.cq = cq,
3226 		 });
3227 	if (!wq) {
3228 		DEBUG("port %u cannot allocate WQ for drop queue",
3229 		      dev->data->port_id);
3230 		rte_errno = errno;
3231 		goto error;
3232 	}
3233 	rxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq), 0, SOCKET_ID_ANY);
3234 	if (!rxq) {
3235 		DEBUG("port %u cannot allocate drop Rx queue memory",
3236 		      dev->data->port_id);
3237 		rte_errno = ENOMEM;
3238 		goto error;
3239 	}
3240 	rxq->ibv_cq = cq;
3241 	rxq->wq = wq;
3242 	priv->drop_queue.rxq = rxq;
3243 	return rxq;
3244 error:
3245 	if (wq)
3246 		claim_zero(mlx5_glue->destroy_wq(wq));
3247 	if (cq)
3248 		claim_zero(mlx5_glue->destroy_cq(cq));
3249 	return NULL;
3250 }
3251 
3252 /**
3253  * Release a drop Rx queue Verbs/DevX object.
3254  *
3255  * @param dev
3256  *   Pointer to Ethernet device.
3257  *
3258  * @return
3259  *   The Verbs/DevX object initialised, NULL otherwise and rte_errno is set.
3260  */
3261 static void
3262 mlx5_rxq_obj_drop_release(struct rte_eth_dev *dev)
3263 {
3264 	struct mlx5_priv *priv = dev->data->dev_private;
3265 	struct mlx5_rxq_obj *rxq = priv->drop_queue.rxq;
3266 
3267 	if (rxq->wq)
3268 		claim_zero(mlx5_glue->destroy_wq(rxq->wq));
3269 	if (rxq->ibv_cq)
3270 		claim_zero(mlx5_glue->destroy_cq(rxq->ibv_cq));
3271 	mlx5_free(rxq);
3272 	priv->drop_queue.rxq = NULL;
3273 }
3274 
3275 /**
3276  * Create a drop indirection table.
3277  *
3278  * @param dev
3279  *   Pointer to Ethernet device.
3280  *
3281  * @return
3282  *   The Verbs/DevX object initialised, NULL otherwise and rte_errno is set.
3283  */
3284 static struct mlx5_ind_table_obj *
3285 mlx5_ind_table_obj_drop_new(struct rte_eth_dev *dev)
3286 {
3287 	struct mlx5_priv *priv = dev->data->dev_private;
3288 	struct mlx5_ind_table_obj *ind_tbl;
3289 	struct mlx5_rxq_obj *rxq;
3290 	struct mlx5_ind_table_obj tmpl;
3291 
3292 	rxq = mlx5_rxq_obj_drop_new(dev);
3293 	if (!rxq)
3294 		return NULL;
3295 	tmpl.ind_table = mlx5_glue->create_rwq_ind_table
3296 		(priv->sh->ctx,
3297 		 &(struct ibv_rwq_ind_table_init_attr){
3298 			.log_ind_tbl_size = 0,
3299 			.ind_tbl = &rxq->wq,
3300 			.comp_mask = 0,
3301 		 });
3302 	if (!tmpl.ind_table) {
3303 		DEBUG("port %u cannot allocate indirection table for drop"
3304 		      " queue",
3305 		      dev->data->port_id);
3306 		rte_errno = errno;
3307 		goto error;
3308 	}
3309 	ind_tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*ind_tbl), 0,
3310 			      SOCKET_ID_ANY);
3311 	if (!ind_tbl) {
3312 		rte_errno = ENOMEM;
3313 		goto error;
3314 	}
3315 	ind_tbl->ind_table = tmpl.ind_table;
3316 	return ind_tbl;
3317 error:
3318 	mlx5_rxq_obj_drop_release(dev);
3319 	return NULL;
3320 }
3321 
3322 /**
3323  * Release a drop indirection table.
3324  *
3325  * @param dev
3326  *   Pointer to Ethernet device.
3327  */
3328 static void
3329 mlx5_ind_table_obj_drop_release(struct rte_eth_dev *dev)
3330 {
3331 	struct mlx5_priv *priv = dev->data->dev_private;
3332 	struct mlx5_ind_table_obj *ind_tbl = priv->drop_queue.hrxq->ind_table;
3333 
3334 	claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl->ind_table));
3335 	mlx5_rxq_obj_drop_release(dev);
3336 	mlx5_free(ind_tbl);
3337 	priv->drop_queue.hrxq->ind_table = NULL;
3338 }
3339 
3340 /**
3341  * Create a drop Rx Hash queue.
3342  *
3343  * @param dev
3344  *   Pointer to Ethernet device.
3345  *
3346  * @return
3347  *   The Verbs/DevX object initialised, NULL otherwise and rte_errno is set.
3348  */
3349 struct mlx5_hrxq *
3350 mlx5_hrxq_drop_new(struct rte_eth_dev *dev)
3351 {
3352 	struct mlx5_priv *priv = dev->data->dev_private;
3353 	struct mlx5_ind_table_obj *ind_tbl = NULL;
3354 	struct ibv_qp *qp = NULL;
3355 	struct mlx5_hrxq *hrxq = NULL;
3356 
3357 	if (priv->drop_queue.hrxq) {
3358 		rte_atomic32_inc(&priv->drop_queue.hrxq->refcnt);
3359 		return priv->drop_queue.hrxq;
3360 	}
3361 	hrxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*hrxq), 0, SOCKET_ID_ANY);
3362 	if (!hrxq) {
3363 		DRV_LOG(WARNING,
3364 			"port %u cannot allocate memory for drop queue",
3365 			dev->data->port_id);
3366 		rte_errno = ENOMEM;
3367 		goto error;
3368 	}
3369 	priv->drop_queue.hrxq = hrxq;
3370 	ind_tbl = mlx5_ind_table_obj_drop_new(dev);
3371 	if (!ind_tbl)
3372 		goto error;
3373 	hrxq->ind_table = ind_tbl;
3374 	qp = mlx5_glue->create_qp_ex(priv->sh->ctx,
3375 		 &(struct ibv_qp_init_attr_ex){
3376 			.qp_type = IBV_QPT_RAW_PACKET,
3377 			.comp_mask =
3378 				IBV_QP_INIT_ATTR_PD |
3379 				IBV_QP_INIT_ATTR_IND_TABLE |
3380 				IBV_QP_INIT_ATTR_RX_HASH,
3381 			.rx_hash_conf = (struct ibv_rx_hash_conf){
3382 				.rx_hash_function =
3383 					IBV_RX_HASH_FUNC_TOEPLITZ,
3384 				.rx_hash_key_len = MLX5_RSS_HASH_KEY_LEN,
3385 				.rx_hash_key = rss_hash_default_key,
3386 				.rx_hash_fields_mask = 0,
3387 				},
3388 			.rwq_ind_tbl = ind_tbl->ind_table,
3389 			.pd = priv->sh->pd
3390 		 });
3391 	if (!qp) {
3392 		DEBUG("port %u cannot allocate QP for drop queue",
3393 		      dev->data->port_id);
3394 		rte_errno = errno;
3395 		goto error;
3396 	}
3397 	hrxq->qp = qp;
3398 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
3399 	hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);
3400 	if (!hrxq->action) {
3401 		rte_errno = errno;
3402 		goto error;
3403 	}
3404 #endif
3405 	rte_atomic32_set(&hrxq->refcnt, 1);
3406 	return hrxq;
3407 error:
3408 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
3409 	if (hrxq && hrxq->action)
3410 		mlx5_glue->destroy_flow_action(hrxq->action);
3411 #endif
3412 	if (qp)
3413 		claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
3414 	if (ind_tbl)
3415 		mlx5_ind_table_obj_drop_release(dev);
3416 	if (hrxq) {
3417 		priv->drop_queue.hrxq = NULL;
3418 		mlx5_free(hrxq);
3419 	}
3420 	return NULL;
3421 }
3422 
3423 /**
3424  * Release a drop hash Rx queue.
3425  *
3426  * @param dev
3427  *   Pointer to Ethernet device.
3428  */
3429 void
3430 mlx5_hrxq_drop_release(struct rte_eth_dev *dev)
3431 {
3432 	struct mlx5_priv *priv = dev->data->dev_private;
3433 	struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq;
3434 
3435 	if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
3436 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
3437 		mlx5_glue->destroy_flow_action(hrxq->action);
3438 #endif
3439 		claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
3440 		mlx5_ind_table_obj_drop_release(dev);
3441 		mlx5_free(hrxq);
3442 		priv->drop_queue.hrxq = NULL;
3443 	}
3444 }
3445 
3446 
3447 /**
3448  * Set the Rx queue timestamp conversion parameters
3449  *
3450  * @param[in] dev
3451  *   Pointer to the Ethernet device structure.
3452  */
3453 void
3454 mlx5_rxq_timestamp_set(struct rte_eth_dev *dev)
3455 {
3456 	struct mlx5_priv *priv = dev->data->dev_private;
3457 	struct mlx5_dev_ctx_shared *sh = priv->sh;
3458 	struct mlx5_rxq_data *data;
3459 	unsigned int i;
3460 
3461 	for (i = 0; i != priv->rxqs_n; ++i) {
3462 		if (!(*priv->rxqs)[i])
3463 			continue;
3464 		data = (*priv->rxqs)[i];
3465 		data->sh = sh;
3466 		data->rt_timestamp = priv->config.rt_timestamp;
3467 	}
3468 }
3469