xref: /dpdk/drivers/net/mlx5/mlx5_rxq.c (revision 8809f78c7dd9f33a44a4f89c58fc91ded34296ed)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5 
6 #include <stddef.h>
7 #include <errno.h>
8 #include <string.h>
9 #include <stdint.h>
10 #include <fcntl.h>
11 #include <sys/queue.h>
12 
13 #include <rte_mbuf.h>
14 #include <rte_malloc.h>
15 #include <rte_ethdev_driver.h>
16 #include <rte_common.h>
17 #include <rte_interrupts.h>
18 #include <rte_debug.h>
19 #include <rte_io.h>
20 #include <rte_eal_paging.h>
21 
22 #include <mlx5_glue.h>
23 #include <mlx5_malloc.h>
24 
25 #include "mlx5_defs.h"
26 #include "mlx5.h"
27 #include "mlx5_rxtx.h"
28 #include "mlx5_utils.h"
29 #include "mlx5_autoconf.h"
30 
31 
32 /* Default RSS hash key also used for ConnectX-3. */
33 uint8_t rss_hash_default_key[] = {
34 	0x2c, 0xc6, 0x81, 0xd1,
35 	0x5b, 0xdb, 0xf4, 0xf7,
36 	0xfc, 0xa2, 0x83, 0x19,
37 	0xdb, 0x1a, 0x3e, 0x94,
38 	0x6b, 0x9e, 0x38, 0xd9,
39 	0x2c, 0x9c, 0x03, 0xd1,
40 	0xad, 0x99, 0x44, 0xa7,
41 	0xd9, 0x56, 0x3d, 0x59,
42 	0x06, 0x3c, 0x25, 0xf3,
43 	0xfc, 0x1f, 0xdc, 0x2a,
44 };
45 
46 /* Length of the default RSS hash key. */
47 static_assert(MLX5_RSS_HASH_KEY_LEN ==
48 	      (unsigned int)sizeof(rss_hash_default_key),
49 	      "wrong RSS default key size.");
50 
51 /**
52  * Check whether Multi-Packet RQ can be enabled for the device.
53  *
54  * @param dev
55  *   Pointer to Ethernet device.
56  *
57  * @return
58  *   1 if supported, negative errno value if not.
59  */
60 inline int
61 mlx5_check_mprq_support(struct rte_eth_dev *dev)
62 {
63 	struct mlx5_priv *priv = dev->data->dev_private;
64 
65 	if (priv->config.mprq.enabled &&
66 	    priv->rxqs_n >= priv->config.mprq.min_rxqs_num)
67 		return 1;
68 	return -ENOTSUP;
69 }
70 
71 /**
72  * Check whether Multi-Packet RQ is enabled for the Rx queue.
73  *
74  *  @param rxq
75  *     Pointer to receive queue structure.
76  *
77  * @return
78  *   0 if disabled, otherwise enabled.
79  */
80 inline int
81 mlx5_rxq_mprq_enabled(struct mlx5_rxq_data *rxq)
82 {
83 	return rxq->strd_num_n > 0;
84 }
85 
86 /**
87  * Check whether Multi-Packet RQ is enabled for the device.
88  *
89  * @param dev
90  *   Pointer to Ethernet device.
91  *
92  * @return
93  *   0 if disabled, otherwise enabled.
94  */
95 inline int
96 mlx5_mprq_enabled(struct rte_eth_dev *dev)
97 {
98 	struct mlx5_priv *priv = dev->data->dev_private;
99 	uint32_t i;
100 	uint16_t n = 0;
101 	uint16_t n_ibv = 0;
102 
103 	if (mlx5_check_mprq_support(dev) < 0)
104 		return 0;
105 	/* All the configured queues should be enabled. */
106 	for (i = 0; i < priv->rxqs_n; ++i) {
107 		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
108 		struct mlx5_rxq_ctrl *rxq_ctrl = container_of
109 			(rxq, struct mlx5_rxq_ctrl, rxq);
110 
111 		if (rxq == NULL || rxq_ctrl->type != MLX5_RXQ_TYPE_STANDARD)
112 			continue;
113 		n_ibv++;
114 		if (mlx5_rxq_mprq_enabled(rxq))
115 			++n;
116 	}
117 	/* Multi-Packet RQ can't be partially configured. */
118 	MLX5_ASSERT(n == 0 || n == n_ibv);
119 	return n == n_ibv;
120 }
121 
122 /**
123  * Calculate the number of CQEs in CQ for the Rx queue.
124  *
125  *  @param rxq_data
126  *     Pointer to receive queue structure.
127  *
128  * @return
129  *   Number of CQEs in CQ.
130  */
131 unsigned int
132 mlx5_rxq_cqe_num(struct mlx5_rxq_data *rxq_data)
133 {
134 	unsigned int cqe_n;
135 	unsigned int wqe_n = 1 << rxq_data->elts_n;
136 
137 	if (mlx5_rxq_mprq_enabled(rxq_data))
138 		cqe_n = wqe_n * (1 << rxq_data->strd_num_n) - 1;
139 	else
140 		cqe_n = wqe_n - 1;
141 	return cqe_n;
142 }
143 
144 /**
145  * Allocate RX queue elements for Multi-Packet RQ.
146  *
147  * @param rxq_ctrl
148  *   Pointer to RX queue structure.
149  *
150  * @return
151  *   0 on success, a negative errno value otherwise and rte_errno is set.
152  */
153 static int
154 rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
155 {
156 	struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
157 	unsigned int wqe_n = 1 << rxq->elts_n;
158 	unsigned int i;
159 	int err;
160 
161 	/* Iterate on segments. */
162 	for (i = 0; i <= wqe_n; ++i) {
163 		struct mlx5_mprq_buf *buf;
164 
165 		if (rte_mempool_get(rxq->mprq_mp, (void **)&buf) < 0) {
166 			DRV_LOG(ERR, "port %u empty mbuf pool", rxq->port_id);
167 			rte_errno = ENOMEM;
168 			goto error;
169 		}
170 		if (i < wqe_n)
171 			(*rxq->mprq_bufs)[i] = buf;
172 		else
173 			rxq->mprq_repl = buf;
174 	}
175 	DRV_LOG(DEBUG,
176 		"port %u Rx queue %u allocated and configured %u segments",
177 		rxq->port_id, rxq->idx, wqe_n);
178 	return 0;
179 error:
180 	err = rte_errno; /* Save rte_errno before cleanup. */
181 	wqe_n = i;
182 	for (i = 0; (i != wqe_n); ++i) {
183 		if ((*rxq->mprq_bufs)[i] != NULL)
184 			rte_mempool_put(rxq->mprq_mp,
185 					(*rxq->mprq_bufs)[i]);
186 		(*rxq->mprq_bufs)[i] = NULL;
187 	}
188 	DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
189 		rxq->port_id, rxq->idx);
190 	rte_errno = err; /* Restore rte_errno. */
191 	return -rte_errno;
192 }
193 
194 /**
195  * Allocate RX queue elements for Single-Packet RQ.
196  *
197  * @param rxq_ctrl
198  *   Pointer to RX queue structure.
199  *
200  * @return
201  *   0 on success, errno value on failure.
202  */
203 static int
204 rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
205 {
206 	const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
207 	unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
208 	unsigned int i;
209 	int err;
210 
211 	/* Iterate on segments. */
212 	for (i = 0; (i != elts_n); ++i) {
213 		struct rte_mbuf *buf;
214 
215 		buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
216 		if (buf == NULL) {
217 			DRV_LOG(ERR, "port %u empty mbuf pool",
218 				PORT_ID(rxq_ctrl->priv));
219 			rte_errno = ENOMEM;
220 			goto error;
221 		}
222 		/* Headroom is reserved by rte_pktmbuf_alloc(). */
223 		MLX5_ASSERT(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
224 		/* Buffer is supposed to be empty. */
225 		MLX5_ASSERT(rte_pktmbuf_data_len(buf) == 0);
226 		MLX5_ASSERT(rte_pktmbuf_pkt_len(buf) == 0);
227 		MLX5_ASSERT(!buf->next);
228 		/* Only the first segment keeps headroom. */
229 		if (i % sges_n)
230 			SET_DATA_OFF(buf, 0);
231 		PORT(buf) = rxq_ctrl->rxq.port_id;
232 		DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
233 		PKT_LEN(buf) = DATA_LEN(buf);
234 		NB_SEGS(buf) = 1;
235 		(*rxq_ctrl->rxq.elts)[i] = buf;
236 	}
237 	/* If Rx vector is activated. */
238 	if (mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
239 		struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
240 		struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
241 		struct rte_pktmbuf_pool_private *priv =
242 			(struct rte_pktmbuf_pool_private *)
243 				rte_mempool_get_priv(rxq_ctrl->rxq.mp);
244 		int j;
245 
246 		/* Initialize default rearm_data for vPMD. */
247 		mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
248 		rte_mbuf_refcnt_set(mbuf_init, 1);
249 		mbuf_init->nb_segs = 1;
250 		mbuf_init->port = rxq->port_id;
251 		if (priv->flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF)
252 			mbuf_init->ol_flags = EXT_ATTACHED_MBUF;
253 		/*
254 		 * prevent compiler reordering:
255 		 * rearm_data covers previous fields.
256 		 */
257 		rte_compiler_barrier();
258 		rxq->mbuf_initializer =
259 			*(rte_xmm_t *)&mbuf_init->rearm_data;
260 		/* Padding with a fake mbuf for vectorized Rx. */
261 		for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
262 			(*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
263 	}
264 	DRV_LOG(DEBUG,
265 		"port %u Rx queue %u allocated and configured %u segments"
266 		" (max %u packets)",
267 		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx, elts_n,
268 		elts_n / (1 << rxq_ctrl->rxq.sges_n));
269 	return 0;
270 error:
271 	err = rte_errno; /* Save rte_errno before cleanup. */
272 	elts_n = i;
273 	for (i = 0; (i != elts_n); ++i) {
274 		if ((*rxq_ctrl->rxq.elts)[i] != NULL)
275 			rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
276 		(*rxq_ctrl->rxq.elts)[i] = NULL;
277 	}
278 	DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
279 		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx);
280 	rte_errno = err; /* Restore rte_errno. */
281 	return -rte_errno;
282 }
283 
284 /**
285  * Allocate RX queue elements.
286  *
287  * @param rxq_ctrl
288  *   Pointer to RX queue structure.
289  *
290  * @return
291  *   0 on success, errno value on failure.
292  */
293 int
294 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
295 {
296 	return mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
297 	       rxq_alloc_elts_mprq(rxq_ctrl) : rxq_alloc_elts_sprq(rxq_ctrl);
298 }
299 
300 /**
301  * Free RX queue elements for Multi-Packet RQ.
302  *
303  * @param rxq_ctrl
304  *   Pointer to RX queue structure.
305  */
306 static void
307 rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
308 {
309 	struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
310 	uint16_t i;
311 
312 	DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing WRs",
313 		rxq->port_id, rxq->idx);
314 	if (rxq->mprq_bufs == NULL)
315 		return;
316 	MLX5_ASSERT(mlx5_rxq_check_vec_support(rxq) < 0);
317 	for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
318 		if ((*rxq->mprq_bufs)[i] != NULL)
319 			mlx5_mprq_buf_free((*rxq->mprq_bufs)[i]);
320 		(*rxq->mprq_bufs)[i] = NULL;
321 	}
322 	if (rxq->mprq_repl != NULL) {
323 		mlx5_mprq_buf_free(rxq->mprq_repl);
324 		rxq->mprq_repl = NULL;
325 	}
326 }
327 
328 /**
329  * Free RX queue elements for Single-Packet RQ.
330  *
331  * @param rxq_ctrl
332  *   Pointer to RX queue structure.
333  */
334 static void
335 rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
336 {
337 	struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
338 	const uint16_t q_n = (1 << rxq->elts_n);
339 	const uint16_t q_mask = q_n - 1;
340 	uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
341 	uint16_t i;
342 
343 	DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs",
344 		PORT_ID(rxq_ctrl->priv), rxq->idx);
345 	if (rxq->elts == NULL)
346 		return;
347 	/**
348 	 * Some mbuf in the Ring belongs to the application.  They cannot be
349 	 * freed.
350 	 */
351 	if (mlx5_rxq_check_vec_support(rxq) > 0) {
352 		for (i = 0; i < used; ++i)
353 			(*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL;
354 		rxq->rq_pi = rxq->rq_ci;
355 	}
356 	for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
357 		if ((*rxq->elts)[i] != NULL)
358 			rte_pktmbuf_free_seg((*rxq->elts)[i]);
359 		(*rxq->elts)[i] = NULL;
360 	}
361 }
362 
363 /**
364  * Free RX queue elements.
365  *
366  * @param rxq_ctrl
367  *   Pointer to RX queue structure.
368  */
369 static void
370 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
371 {
372 	if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq))
373 		rxq_free_elts_mprq(rxq_ctrl);
374 	else
375 		rxq_free_elts_sprq(rxq_ctrl);
376 }
377 
378 /**
379  * Returns the per-queue supported offloads.
380  *
381  * @param dev
382  *   Pointer to Ethernet device.
383  *
384  * @return
385  *   Supported Rx offloads.
386  */
387 uint64_t
388 mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev)
389 {
390 	struct mlx5_priv *priv = dev->data->dev_private;
391 	struct mlx5_dev_config *config = &priv->config;
392 	uint64_t offloads = (DEV_RX_OFFLOAD_SCATTER |
393 			     DEV_RX_OFFLOAD_TIMESTAMP |
394 			     DEV_RX_OFFLOAD_JUMBO_FRAME |
395 			     DEV_RX_OFFLOAD_RSS_HASH);
396 
397 	if (config->hw_fcs_strip)
398 		offloads |= DEV_RX_OFFLOAD_KEEP_CRC;
399 
400 	if (config->hw_csum)
401 		offloads |= (DEV_RX_OFFLOAD_IPV4_CKSUM |
402 			     DEV_RX_OFFLOAD_UDP_CKSUM |
403 			     DEV_RX_OFFLOAD_TCP_CKSUM);
404 	if (config->hw_vlan_strip)
405 		offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
406 	if (MLX5_LRO_SUPPORTED(dev))
407 		offloads |= DEV_RX_OFFLOAD_TCP_LRO;
408 	return offloads;
409 }
410 
411 
412 /**
413  * Returns the per-port supported offloads.
414  *
415  * @return
416  *   Supported Rx offloads.
417  */
418 uint64_t
419 mlx5_get_rx_port_offloads(void)
420 {
421 	uint64_t offloads = DEV_RX_OFFLOAD_VLAN_FILTER;
422 
423 	return offloads;
424 }
425 
426 /**
427  * Verify if the queue can be released.
428  *
429  * @param dev
430  *   Pointer to Ethernet device.
431  * @param idx
432  *   RX queue index.
433  *
434  * @return
435  *   1 if the queue can be released
436  *   0 if the queue can not be released, there are references to it.
437  *   Negative errno and rte_errno is set if queue doesn't exist.
438  */
439 static int
440 mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx)
441 {
442 	struct mlx5_priv *priv = dev->data->dev_private;
443 	struct mlx5_rxq_ctrl *rxq_ctrl;
444 
445 	if (!(*priv->rxqs)[idx]) {
446 		rte_errno = EINVAL;
447 		return -rte_errno;
448 	}
449 	rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
450 	return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1);
451 }
452 
453 /* Fetches and drops all SW-owned and error CQEs to synchronize CQ. */
454 static void
455 rxq_sync_cq(struct mlx5_rxq_data *rxq)
456 {
457 	const uint16_t cqe_n = 1 << rxq->cqe_n;
458 	const uint16_t cqe_mask = cqe_n - 1;
459 	volatile struct mlx5_cqe *cqe;
460 	int ret, i;
461 
462 	i = cqe_n;
463 	do {
464 		cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask];
465 		ret = check_cqe(cqe, cqe_n, rxq->cq_ci);
466 		if (ret == MLX5_CQE_STATUS_HW_OWN)
467 			break;
468 		if (ret == MLX5_CQE_STATUS_ERR) {
469 			rxq->cq_ci++;
470 			continue;
471 		}
472 		MLX5_ASSERT(ret == MLX5_CQE_STATUS_SW_OWN);
473 		if (MLX5_CQE_FORMAT(cqe->op_own) != MLX5_COMPRESSED) {
474 			rxq->cq_ci++;
475 			continue;
476 		}
477 		/* Compute the next non compressed CQE. */
478 		rxq->cq_ci += rte_be_to_cpu_32(cqe->byte_cnt);
479 
480 	} while (--i);
481 	/* Move all CQEs to HW ownership, including possible MiniCQEs. */
482 	for (i = 0; i < cqe_n; i++) {
483 		cqe = &(*rxq->cqes)[i];
484 		cqe->op_own = MLX5_CQE_INVALIDATE;
485 	}
486 	/* Resync CQE and WQE (WQ in RESET state). */
487 	rte_io_wmb();
488 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
489 	rte_io_wmb();
490 	*rxq->rq_db = rte_cpu_to_be_32(0);
491 	rte_io_wmb();
492 }
493 
494 /**
495  * Rx queue stop. Device queue goes to the RESET state,
496  * all involved mbufs are freed from WQ.
497  *
498  * @param dev
499  *   Pointer to Ethernet device structure.
500  * @param idx
501  *   RX queue index.
502  *
503  * @return
504  *   0 on success, a negative errno value otherwise and rte_errno is set.
505  */
506 int
507 mlx5_rx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t idx)
508 {
509 	struct mlx5_priv *priv = dev->data->dev_private;
510 	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
511 	struct mlx5_rxq_ctrl *rxq_ctrl =
512 			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
513 	int ret;
514 
515 	MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
516 	ret = priv->obj_ops.rxq_obj_modify(rxq_ctrl->obj, MLX5_RXQ_MOD_RDY2RST);
517 	if (ret) {
518 		DRV_LOG(ERR, "Cannot change Rx WQ state to RESET:  %s",
519 			strerror(errno));
520 		rte_errno = errno;
521 		return ret;
522 	}
523 	/* Remove all processes CQEs. */
524 	rxq_sync_cq(rxq);
525 	/* Free all involved mbufs. */
526 	rxq_free_elts(rxq_ctrl);
527 	/* Set the actual queue state. */
528 	dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STOPPED;
529 	return 0;
530 }
531 
532 /**
533  * Rx queue stop. Device queue goes to the RESET state,
534  * all involved mbufs are freed from WQ.
535  *
536  * @param dev
537  *   Pointer to Ethernet device structure.
538  * @param idx
539  *   RX queue index.
540  *
541  * @return
542  *   0 on success, a negative errno value otherwise and rte_errno is set.
543  */
544 int
545 mlx5_rx_queue_stop(struct rte_eth_dev *dev, uint16_t idx)
546 {
547 	eth_rx_burst_t pkt_burst = dev->rx_pkt_burst;
548 	int ret;
549 
550 	if (rte_eth_dev_is_rx_hairpin_queue(dev, idx)) {
551 		DRV_LOG(ERR, "Hairpin queue can't be stopped");
552 		rte_errno = EINVAL;
553 		return -EINVAL;
554 	}
555 	if (dev->data->rx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STOPPED)
556 		return 0;
557 	/*
558 	 * Vectorized Rx burst requires the CQ and RQ indices
559 	 * synchronized, that might be broken on RQ restart
560 	 * and cause Rx malfunction, so queue stopping is
561 	 * not supported if vectorized Rx burst is engaged.
562 	 * The routine pointer depends on the process
563 	 * type, should perform check there.
564 	 */
565 	if (pkt_burst == mlx5_rx_burst_vec) {
566 		DRV_LOG(ERR, "Rx queue stop is not supported "
567 			"for vectorized Rx");
568 		rte_errno = EINVAL;
569 		return -EINVAL;
570 	}
571 	if (rte_eal_process_type() ==  RTE_PROC_SECONDARY) {
572 		ret = mlx5_mp_os_req_queue_control(dev, idx,
573 						   MLX5_MP_REQ_QUEUE_RX_STOP);
574 	} else {
575 		ret = mlx5_rx_queue_stop_primary(dev, idx);
576 	}
577 	return ret;
578 }
579 
580 /**
581  * Rx queue start. Device queue goes to the ready state,
582  * all required mbufs are allocated and WQ is replenished.
583  *
584  * @param dev
585  *   Pointer to Ethernet device structure.
586  * @param idx
587  *   RX queue index.
588  *
589  * @return
590  *   0 on success, a negative errno value otherwise and rte_errno is set.
591  */
592 int
593 mlx5_rx_queue_start_primary(struct rte_eth_dev *dev, uint16_t idx)
594 {
595 	struct mlx5_priv *priv = dev->data->dev_private;
596 	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
597 	struct mlx5_rxq_ctrl *rxq_ctrl =
598 			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
599 	int ret;
600 
601 	MLX5_ASSERT(rte_eal_process_type() ==  RTE_PROC_PRIMARY);
602 	/* Allocate needed buffers. */
603 	ret = rxq_alloc_elts(rxq_ctrl);
604 	if (ret) {
605 		DRV_LOG(ERR, "Cannot reallocate buffers for Rx WQ");
606 		rte_errno = errno;
607 		return ret;
608 	}
609 	rte_io_wmb();
610 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
611 	rte_io_wmb();
612 	/* Reset RQ consumer before moving queue ro READY state. */
613 	*rxq->rq_db = rte_cpu_to_be_32(0);
614 	rte_io_wmb();
615 	ret = priv->obj_ops.rxq_obj_modify(rxq_ctrl->obj, MLX5_RXQ_MOD_RST2RDY);
616 	if (ret) {
617 		DRV_LOG(ERR, "Cannot change Rx WQ state to READY:  %s",
618 			strerror(errno));
619 		rte_errno = errno;
620 		return ret;
621 	}
622 	/* Reinitialize RQ - set WQEs. */
623 	mlx5_rxq_initialize(rxq);
624 	rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
625 	/* Set actual queue state. */
626 	dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
627 	return 0;
628 }
629 
630 /**
631  * Rx queue start. Device queue goes to the ready state,
632  * all required mbufs are allocated and WQ is replenished.
633  *
634  * @param dev
635  *   Pointer to Ethernet device structure.
636  * @param idx
637  *   RX queue index.
638  *
639  * @return
640  *   0 on success, a negative errno value otherwise and rte_errno is set.
641  */
642 int
643 mlx5_rx_queue_start(struct rte_eth_dev *dev, uint16_t idx)
644 {
645 	int ret;
646 
647 	if (rte_eth_dev_is_rx_hairpin_queue(dev, idx)) {
648 		DRV_LOG(ERR, "Hairpin queue can't be started");
649 		rte_errno = EINVAL;
650 		return -EINVAL;
651 	}
652 	if (dev->data->rx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STARTED)
653 		return 0;
654 	if (rte_eal_process_type() ==  RTE_PROC_SECONDARY) {
655 		ret = mlx5_mp_os_req_queue_control(dev, idx,
656 						   MLX5_MP_REQ_QUEUE_RX_START);
657 	} else {
658 		ret = mlx5_rx_queue_start_primary(dev, idx);
659 	}
660 	return ret;
661 }
662 
663 /**
664  * Rx queue presetup checks.
665  *
666  * @param dev
667  *   Pointer to Ethernet device structure.
668  * @param idx
669  *   RX queue index.
670  * @param desc
671  *   Number of descriptors to configure in queue.
672  *
673  * @return
674  *   0 on success, a negative errno value otherwise and rte_errno is set.
675  */
676 static int
677 mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc)
678 {
679 	struct mlx5_priv *priv = dev->data->dev_private;
680 
681 	if (!rte_is_power_of_2(*desc)) {
682 		*desc = 1 << log2above(*desc);
683 		DRV_LOG(WARNING,
684 			"port %u increased number of descriptors in Rx queue %u"
685 			" to the next power of two (%d)",
686 			dev->data->port_id, idx, *desc);
687 	}
688 	DRV_LOG(DEBUG, "port %u configuring Rx queue %u for %u descriptors",
689 		dev->data->port_id, idx, *desc);
690 	if (idx >= priv->rxqs_n) {
691 		DRV_LOG(ERR, "port %u Rx queue index out of range (%u >= %u)",
692 			dev->data->port_id, idx, priv->rxqs_n);
693 		rte_errno = EOVERFLOW;
694 		return -rte_errno;
695 	}
696 	if (!mlx5_rxq_releasable(dev, idx)) {
697 		DRV_LOG(ERR, "port %u unable to release queue index %u",
698 			dev->data->port_id, idx);
699 		rte_errno = EBUSY;
700 		return -rte_errno;
701 	}
702 	mlx5_rxq_release(dev, idx);
703 	return 0;
704 }
705 
706 /**
707  *
708  * @param dev
709  *   Pointer to Ethernet device structure.
710  * @param idx
711  *   RX queue index.
712  * @param desc
713  *   Number of descriptors to configure in queue.
714  * @param socket
715  *   NUMA socket on which memory must be allocated.
716  * @param[in] conf
717  *   Thresholds parameters.
718  * @param mp
719  *   Memory pool for buffer allocations.
720  *
721  * @return
722  *   0 on success, a negative errno value otherwise and rte_errno is set.
723  */
724 int
725 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
726 		    unsigned int socket, const struct rte_eth_rxconf *conf,
727 		    struct rte_mempool *mp)
728 {
729 	struct mlx5_priv *priv = dev->data->dev_private;
730 	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
731 	struct mlx5_rxq_ctrl *rxq_ctrl =
732 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
733 	int res;
734 
735 	res = mlx5_rx_queue_pre_setup(dev, idx, &desc);
736 	if (res)
737 		return res;
738 	rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, mp);
739 	if (!rxq_ctrl) {
740 		DRV_LOG(ERR, "port %u unable to allocate queue index %u",
741 			dev->data->port_id, idx);
742 		rte_errno = ENOMEM;
743 		return -rte_errno;
744 	}
745 	DRV_LOG(DEBUG, "port %u adding Rx queue %u to list",
746 		dev->data->port_id, idx);
747 	(*priv->rxqs)[idx] = &rxq_ctrl->rxq;
748 	return 0;
749 }
750 
751 /**
752  *
753  * @param dev
754  *   Pointer to Ethernet device structure.
755  * @param idx
756  *   RX queue index.
757  * @param desc
758  *   Number of descriptors to configure in queue.
759  * @param hairpin_conf
760  *   Hairpin configuration parameters.
761  *
762  * @return
763  *   0 on success, a negative errno value otherwise and rte_errno is set.
764  */
765 int
766 mlx5_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
767 			    uint16_t desc,
768 			    const struct rte_eth_hairpin_conf *hairpin_conf)
769 {
770 	struct mlx5_priv *priv = dev->data->dev_private;
771 	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
772 	struct mlx5_rxq_ctrl *rxq_ctrl =
773 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
774 	int res;
775 
776 	res = mlx5_rx_queue_pre_setup(dev, idx, &desc);
777 	if (res)
778 		return res;
779 	if (hairpin_conf->peer_count != 1 ||
780 	    hairpin_conf->peers[0].port != dev->data->port_id ||
781 	    hairpin_conf->peers[0].queue >= priv->txqs_n) {
782 		DRV_LOG(ERR, "port %u unable to setup hairpin queue index %u "
783 			" invalid hairpind configuration", dev->data->port_id,
784 			idx);
785 		rte_errno = EINVAL;
786 		return -rte_errno;
787 	}
788 	rxq_ctrl = mlx5_rxq_hairpin_new(dev, idx, desc, hairpin_conf);
789 	if (!rxq_ctrl) {
790 		DRV_LOG(ERR, "port %u unable to allocate queue index %u",
791 			dev->data->port_id, idx);
792 		rte_errno = ENOMEM;
793 		return -rte_errno;
794 	}
795 	DRV_LOG(DEBUG, "port %u adding Rx queue %u to list",
796 		dev->data->port_id, idx);
797 	(*priv->rxqs)[idx] = &rxq_ctrl->rxq;
798 	return 0;
799 }
800 
801 /**
802  * DPDK callback to release a RX queue.
803  *
804  * @param dpdk_rxq
805  *   Generic RX queue pointer.
806  */
807 void
808 mlx5_rx_queue_release(void *dpdk_rxq)
809 {
810 	struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq;
811 	struct mlx5_rxq_ctrl *rxq_ctrl;
812 	struct mlx5_priv *priv;
813 
814 	if (rxq == NULL)
815 		return;
816 	rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
817 	priv = rxq_ctrl->priv;
818 	if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.idx))
819 		rte_panic("port %u Rx queue %u is still used by a flow and"
820 			  " cannot be removed\n",
821 			  PORT_ID(priv), rxq->idx);
822 	mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.idx);
823 }
824 
825 /**
826  * Allocate queue vector and fill epoll fd list for Rx interrupts.
827  *
828  * @param dev
829  *   Pointer to Ethernet device.
830  *
831  * @return
832  *   0 on success, a negative errno value otherwise and rte_errno is set.
833  */
834 int
835 mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev)
836 {
837 	struct mlx5_priv *priv = dev->data->dev_private;
838 	unsigned int i;
839 	unsigned int rxqs_n = priv->rxqs_n;
840 	unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
841 	unsigned int count = 0;
842 	struct rte_intr_handle *intr_handle = dev->intr_handle;
843 
844 	if (!dev->data->dev_conf.intr_conf.rxq)
845 		return 0;
846 	mlx5_rx_intr_vec_disable(dev);
847 	intr_handle->intr_vec = mlx5_malloc(0,
848 				n * sizeof(intr_handle->intr_vec[0]),
849 				0, SOCKET_ID_ANY);
850 	if (intr_handle->intr_vec == NULL) {
851 		DRV_LOG(ERR,
852 			"port %u failed to allocate memory for interrupt"
853 			" vector, Rx interrupts will not be supported",
854 			dev->data->port_id);
855 		rte_errno = ENOMEM;
856 		return -rte_errno;
857 	}
858 	intr_handle->type = RTE_INTR_HANDLE_EXT;
859 	for (i = 0; i != n; ++i) {
860 		/* This rxq obj must not be released in this function. */
861 		struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
862 		struct mlx5_rxq_obj *rxq_obj = rxq_ctrl ? rxq_ctrl->obj : NULL;
863 		int rc;
864 
865 		/* Skip queues that cannot request interrupts. */
866 		if (!rxq_obj || (!rxq_obj->ibv_channel &&
867 				 !rxq_obj->devx_channel)) {
868 			/* Use invalid intr_vec[] index to disable entry. */
869 			intr_handle->intr_vec[i] =
870 				RTE_INTR_VEC_RXTX_OFFSET +
871 				RTE_MAX_RXTX_INTR_VEC_ID;
872 			/* Decrease the rxq_ctrl's refcnt */
873 			if (rxq_ctrl)
874 				mlx5_rxq_release(dev, i);
875 			continue;
876 		}
877 		if (count >= RTE_MAX_RXTX_INTR_VEC_ID) {
878 			DRV_LOG(ERR,
879 				"port %u too many Rx queues for interrupt"
880 				" vector size (%d), Rx interrupts cannot be"
881 				" enabled",
882 				dev->data->port_id, RTE_MAX_RXTX_INTR_VEC_ID);
883 			mlx5_rx_intr_vec_disable(dev);
884 			rte_errno = ENOMEM;
885 			return -rte_errno;
886 		}
887 		rc = mlx5_os_set_nonblock_channel_fd(rxq_obj->fd);
888 		if (rc < 0) {
889 			rte_errno = errno;
890 			DRV_LOG(ERR,
891 				"port %u failed to make Rx interrupt file"
892 				" descriptor %d non-blocking for queue index"
893 				" %d",
894 				dev->data->port_id, rxq_obj->fd, i);
895 			mlx5_rx_intr_vec_disable(dev);
896 			return -rte_errno;
897 		}
898 		intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count;
899 		intr_handle->efds[count] = rxq_obj->fd;
900 		count++;
901 	}
902 	if (!count)
903 		mlx5_rx_intr_vec_disable(dev);
904 	else
905 		intr_handle->nb_efd = count;
906 	return 0;
907 }
908 
909 /**
910  * Clean up Rx interrupts handler.
911  *
912  * @param dev
913  *   Pointer to Ethernet device.
914  */
915 void
916 mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev)
917 {
918 	struct mlx5_priv *priv = dev->data->dev_private;
919 	struct rte_intr_handle *intr_handle = dev->intr_handle;
920 	unsigned int i;
921 	unsigned int rxqs_n = priv->rxqs_n;
922 	unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
923 
924 	if (!dev->data->dev_conf.intr_conf.rxq)
925 		return;
926 	if (!intr_handle->intr_vec)
927 		goto free;
928 	for (i = 0; i != n; ++i) {
929 		if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET +
930 		    RTE_MAX_RXTX_INTR_VEC_ID)
931 			continue;
932 		/**
933 		 * Need to access directly the queue to release the reference
934 		 * kept in mlx5_rx_intr_vec_enable().
935 		 */
936 		mlx5_rxq_release(dev, i);
937 	}
938 free:
939 	rte_intr_free_epoll_fd(intr_handle);
940 	if (intr_handle->intr_vec)
941 		mlx5_free(intr_handle->intr_vec);
942 	intr_handle->nb_efd = 0;
943 	intr_handle->intr_vec = NULL;
944 }
945 
946 /**
947  *  MLX5 CQ notification .
948  *
949  *  @param rxq
950  *     Pointer to receive queue structure.
951  *  @param sq_n_rxq
952  *     Sequence number per receive queue .
953  */
954 static inline void
955 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
956 {
957 	int sq_n = 0;
958 	uint32_t doorbell_hi;
959 	uint64_t doorbell;
960 	void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL;
961 
962 	sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK;
963 	doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK);
964 	doorbell = (uint64_t)doorbell_hi << 32;
965 	doorbell |= rxq->cqn;
966 	rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
967 	mlx5_uar_write64(rte_cpu_to_be_64(doorbell),
968 			 cq_db_reg, rxq->uar_lock_cq);
969 }
970 
971 /**
972  * DPDK callback for Rx queue interrupt enable.
973  *
974  * @param dev
975  *   Pointer to Ethernet device structure.
976  * @param rx_queue_id
977  *   Rx queue number.
978  *
979  * @return
980  *   0 on success, a negative errno value otherwise and rte_errno is set.
981  */
982 int
983 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
984 {
985 	struct mlx5_rxq_ctrl *rxq_ctrl;
986 
987 	rxq_ctrl = mlx5_rxq_get(dev, rx_queue_id);
988 	if (!rxq_ctrl)
989 		goto error;
990 	if (rxq_ctrl->irq) {
991 		if (!rxq_ctrl->obj) {
992 			mlx5_rxq_release(dev, rx_queue_id);
993 			goto error;
994 		}
995 		mlx5_arm_cq(&rxq_ctrl->rxq, rxq_ctrl->rxq.cq_arm_sn);
996 	}
997 	mlx5_rxq_release(dev, rx_queue_id);
998 	return 0;
999 error:
1000 	rte_errno = EINVAL;
1001 	return -rte_errno;
1002 }
1003 
1004 /**
1005  * DPDK callback for Rx queue interrupt disable.
1006  *
1007  * @param dev
1008  *   Pointer to Ethernet device structure.
1009  * @param rx_queue_id
1010  *   Rx queue number.
1011  *
1012  * @return
1013  *   0 on success, a negative errno value otherwise and rte_errno is set.
1014  */
1015 int
1016 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1017 {
1018 	struct mlx5_priv *priv = dev->data->dev_private;
1019 	struct mlx5_rxq_ctrl *rxq_ctrl;
1020 	int ret = 0;
1021 
1022 	rxq_ctrl = mlx5_rxq_get(dev, rx_queue_id);
1023 	if (!rxq_ctrl) {
1024 		rte_errno = EINVAL;
1025 		return -rte_errno;
1026 	}
1027 	if (!rxq_ctrl->obj)
1028 		goto error;
1029 	if (rxq_ctrl->irq) {
1030 		ret = priv->obj_ops.rxq_event_get(rxq_ctrl->obj);
1031 		if (ret < 0)
1032 			goto error;
1033 		rxq_ctrl->rxq.cq_arm_sn++;
1034 	}
1035 	mlx5_rxq_release(dev, rx_queue_id);
1036 	return 0;
1037 error:
1038 	/**
1039 	 * The ret variable may be EAGAIN which means the get_event function was
1040 	 * called before receiving one.
1041 	 */
1042 	if (ret < 0)
1043 		rte_errno = errno;
1044 	else
1045 		rte_errno = EINVAL;
1046 	ret = rte_errno; /* Save rte_errno before cleanup. */
1047 	mlx5_rxq_release(dev, rx_queue_id);
1048 	if (ret != EAGAIN)
1049 		DRV_LOG(WARNING, "port %u unable to disable interrupt on Rx queue %d",
1050 			dev->data->port_id, rx_queue_id);
1051 	rte_errno = ret; /* Restore rte_errno. */
1052 	return -rte_errno;
1053 }
1054 
1055 /**
1056  * Verify the Rx queue objects list is empty
1057  *
1058  * @param dev
1059  *   Pointer to Ethernet device.
1060  *
1061  * @return
1062  *   The number of objects not released.
1063  */
1064 int
1065 mlx5_rxq_obj_verify(struct rte_eth_dev *dev)
1066 {
1067 	struct mlx5_priv *priv = dev->data->dev_private;
1068 	int ret = 0;
1069 	struct mlx5_rxq_obj *rxq_obj;
1070 
1071 	LIST_FOREACH(rxq_obj, &priv->rxqsobj, next) {
1072 		DRV_LOG(DEBUG, "port %u Rx queue %u still referenced",
1073 			dev->data->port_id, rxq_obj->rxq_ctrl->rxq.idx);
1074 		++ret;
1075 	}
1076 	return ret;
1077 }
1078 
1079 /**
1080  * Callback function to initialize mbufs for Multi-Packet RQ.
1081  */
1082 static inline void
1083 mlx5_mprq_buf_init(struct rte_mempool *mp, void *opaque_arg,
1084 		    void *_m, unsigned int i __rte_unused)
1085 {
1086 	struct mlx5_mprq_buf *buf = _m;
1087 	struct rte_mbuf_ext_shared_info *shinfo;
1088 	unsigned int strd_n = (unsigned int)(uintptr_t)opaque_arg;
1089 	unsigned int j;
1090 
1091 	memset(_m, 0, sizeof(*buf));
1092 	buf->mp = mp;
1093 	__atomic_store_n(&buf->refcnt, 1, __ATOMIC_RELAXED);
1094 	for (j = 0; j != strd_n; ++j) {
1095 		shinfo = &buf->shinfos[j];
1096 		shinfo->free_cb = mlx5_mprq_buf_free_cb;
1097 		shinfo->fcb_opaque = buf;
1098 	}
1099 }
1100 
1101 /**
1102  * Free mempool of Multi-Packet RQ.
1103  *
1104  * @param dev
1105  *   Pointer to Ethernet device.
1106  *
1107  * @return
1108  *   0 on success, negative errno value on failure.
1109  */
1110 int
1111 mlx5_mprq_free_mp(struct rte_eth_dev *dev)
1112 {
1113 	struct mlx5_priv *priv = dev->data->dev_private;
1114 	struct rte_mempool *mp = priv->mprq_mp;
1115 	unsigned int i;
1116 
1117 	if (mp == NULL)
1118 		return 0;
1119 	DRV_LOG(DEBUG, "port %u freeing mempool (%s) for Multi-Packet RQ",
1120 		dev->data->port_id, mp->name);
1121 	/*
1122 	 * If a buffer in the pool has been externally attached to a mbuf and it
1123 	 * is still in use by application, destroying the Rx queue can spoil
1124 	 * the packet. It is unlikely to happen but if application dynamically
1125 	 * creates and destroys with holding Rx packets, this can happen.
1126 	 *
1127 	 * TODO: It is unavoidable for now because the mempool for Multi-Packet
1128 	 * RQ isn't provided by application but managed by PMD.
1129 	 */
1130 	if (!rte_mempool_full(mp)) {
1131 		DRV_LOG(ERR,
1132 			"port %u mempool for Multi-Packet RQ is still in use",
1133 			dev->data->port_id);
1134 		rte_errno = EBUSY;
1135 		return -rte_errno;
1136 	}
1137 	rte_mempool_free(mp);
1138 	/* Unset mempool for each Rx queue. */
1139 	for (i = 0; i != priv->rxqs_n; ++i) {
1140 		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
1141 
1142 		if (rxq == NULL)
1143 			continue;
1144 		rxq->mprq_mp = NULL;
1145 	}
1146 	priv->mprq_mp = NULL;
1147 	return 0;
1148 }
1149 
1150 /**
1151  * Allocate a mempool for Multi-Packet RQ. All configured Rx queues share the
1152  * mempool. If already allocated, reuse it if there're enough elements.
1153  * Otherwise, resize it.
1154  *
1155  * @param dev
1156  *   Pointer to Ethernet device.
1157  *
1158  * @return
1159  *   0 on success, negative errno value on failure.
1160  */
1161 int
1162 mlx5_mprq_alloc_mp(struct rte_eth_dev *dev)
1163 {
1164 	struct mlx5_priv *priv = dev->data->dev_private;
1165 	struct rte_mempool *mp = priv->mprq_mp;
1166 	char name[RTE_MEMPOOL_NAMESIZE];
1167 	unsigned int desc = 0;
1168 	unsigned int buf_len;
1169 	unsigned int obj_num;
1170 	unsigned int obj_size;
1171 	unsigned int strd_num_n = 0;
1172 	unsigned int strd_sz_n = 0;
1173 	unsigned int i;
1174 	unsigned int n_ibv = 0;
1175 
1176 	if (!mlx5_mprq_enabled(dev))
1177 		return 0;
1178 	/* Count the total number of descriptors configured. */
1179 	for (i = 0; i != priv->rxqs_n; ++i) {
1180 		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
1181 		struct mlx5_rxq_ctrl *rxq_ctrl = container_of
1182 			(rxq, struct mlx5_rxq_ctrl, rxq);
1183 
1184 		if (rxq == NULL || rxq_ctrl->type != MLX5_RXQ_TYPE_STANDARD)
1185 			continue;
1186 		n_ibv++;
1187 		desc += 1 << rxq->elts_n;
1188 		/* Get the max number of strides. */
1189 		if (strd_num_n < rxq->strd_num_n)
1190 			strd_num_n = rxq->strd_num_n;
1191 		/* Get the max size of a stride. */
1192 		if (strd_sz_n < rxq->strd_sz_n)
1193 			strd_sz_n = rxq->strd_sz_n;
1194 	}
1195 	MLX5_ASSERT(strd_num_n && strd_sz_n);
1196 	buf_len = (1 << strd_num_n) * (1 << strd_sz_n);
1197 	obj_size = sizeof(struct mlx5_mprq_buf) + buf_len + (1 << strd_num_n) *
1198 		sizeof(struct rte_mbuf_ext_shared_info) + RTE_PKTMBUF_HEADROOM;
1199 	/*
1200 	 * Received packets can be either memcpy'd or externally referenced. In
1201 	 * case that the packet is attached to an mbuf as an external buffer, as
1202 	 * it isn't possible to predict how the buffers will be queued by
1203 	 * application, there's no option to exactly pre-allocate needed buffers
1204 	 * in advance but to speculatively prepares enough buffers.
1205 	 *
1206 	 * In the data path, if this Mempool is depleted, PMD will try to memcpy
1207 	 * received packets to buffers provided by application (rxq->mp) until
1208 	 * this Mempool gets available again.
1209 	 */
1210 	desc *= 4;
1211 	obj_num = desc + MLX5_MPRQ_MP_CACHE_SZ * n_ibv;
1212 	/*
1213 	 * rte_mempool_create_empty() has sanity check to refuse large cache
1214 	 * size compared to the number of elements.
1215 	 * CACHE_FLUSHTHRESH_MULTIPLIER is defined in a C file, so using a
1216 	 * constant number 2 instead.
1217 	 */
1218 	obj_num = RTE_MAX(obj_num, MLX5_MPRQ_MP_CACHE_SZ * 2);
1219 	/* Check a mempool is already allocated and if it can be resued. */
1220 	if (mp != NULL && mp->elt_size >= obj_size && mp->size >= obj_num) {
1221 		DRV_LOG(DEBUG, "port %u mempool %s is being reused",
1222 			dev->data->port_id, mp->name);
1223 		/* Reuse. */
1224 		goto exit;
1225 	} else if (mp != NULL) {
1226 		DRV_LOG(DEBUG, "port %u mempool %s should be resized, freeing it",
1227 			dev->data->port_id, mp->name);
1228 		/*
1229 		 * If failed to free, which means it may be still in use, no way
1230 		 * but to keep using the existing one. On buffer underrun,
1231 		 * packets will be memcpy'd instead of external buffer
1232 		 * attachment.
1233 		 */
1234 		if (mlx5_mprq_free_mp(dev)) {
1235 			if (mp->elt_size >= obj_size)
1236 				goto exit;
1237 			else
1238 				return -rte_errno;
1239 		}
1240 	}
1241 	snprintf(name, sizeof(name), "port-%u-mprq", dev->data->port_id);
1242 	mp = rte_mempool_create(name, obj_num, obj_size, MLX5_MPRQ_MP_CACHE_SZ,
1243 				0, NULL, NULL, mlx5_mprq_buf_init,
1244 				(void *)(uintptr_t)(1 << strd_num_n),
1245 				dev->device->numa_node, 0);
1246 	if (mp == NULL) {
1247 		DRV_LOG(ERR,
1248 			"port %u failed to allocate a mempool for"
1249 			" Multi-Packet RQ, count=%u, size=%u",
1250 			dev->data->port_id, obj_num, obj_size);
1251 		rte_errno = ENOMEM;
1252 		return -rte_errno;
1253 	}
1254 	priv->mprq_mp = mp;
1255 exit:
1256 	/* Set mempool for each Rx queue. */
1257 	for (i = 0; i != priv->rxqs_n; ++i) {
1258 		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
1259 		struct mlx5_rxq_ctrl *rxq_ctrl = container_of
1260 			(rxq, struct mlx5_rxq_ctrl, rxq);
1261 
1262 		if (rxq == NULL || rxq_ctrl->type != MLX5_RXQ_TYPE_STANDARD)
1263 			continue;
1264 		rxq->mprq_mp = mp;
1265 	}
1266 	DRV_LOG(INFO, "port %u Multi-Packet RQ is configured",
1267 		dev->data->port_id);
1268 	return 0;
1269 }
1270 
1271 #define MLX5_MAX_TCP_HDR_OFFSET ((unsigned int)(sizeof(struct rte_ether_hdr) + \
1272 					sizeof(struct rte_vlan_hdr) * 2 + \
1273 					sizeof(struct rte_ipv6_hdr)))
1274 #define MAX_TCP_OPTION_SIZE 40u
1275 #define MLX5_MAX_LRO_HEADER_FIX ((unsigned int)(MLX5_MAX_TCP_HDR_OFFSET + \
1276 				 sizeof(struct rte_tcp_hdr) + \
1277 				 MAX_TCP_OPTION_SIZE))
1278 
1279 /**
1280  * Adjust the maximum LRO massage size.
1281  *
1282  * @param dev
1283  *   Pointer to Ethernet device.
1284  * @param idx
1285  *   RX queue index.
1286  * @param max_lro_size
1287  *   The maximum size for LRO packet.
1288  */
1289 static void
1290 mlx5_max_lro_msg_size_adjust(struct rte_eth_dev *dev, uint16_t idx,
1291 			     uint32_t max_lro_size)
1292 {
1293 	struct mlx5_priv *priv = dev->data->dev_private;
1294 
1295 	if (priv->config.hca_attr.lro_max_msg_sz_mode ==
1296 	    MLX5_LRO_MAX_MSG_SIZE_START_FROM_L4 && max_lro_size >
1297 	    MLX5_MAX_TCP_HDR_OFFSET)
1298 		max_lro_size -= MLX5_MAX_TCP_HDR_OFFSET;
1299 	max_lro_size = RTE_MIN(max_lro_size, MLX5_MAX_LRO_SIZE);
1300 	MLX5_ASSERT(max_lro_size >= MLX5_LRO_SEG_CHUNK_SIZE);
1301 	max_lro_size /= MLX5_LRO_SEG_CHUNK_SIZE;
1302 	if (priv->max_lro_msg_size)
1303 		priv->max_lro_msg_size =
1304 			RTE_MIN((uint32_t)priv->max_lro_msg_size, max_lro_size);
1305 	else
1306 		priv->max_lro_msg_size = max_lro_size;
1307 	DRV_LOG(DEBUG,
1308 		"port %u Rx Queue %u max LRO message size adjusted to %u bytes",
1309 		dev->data->port_id, idx,
1310 		priv->max_lro_msg_size * MLX5_LRO_SEG_CHUNK_SIZE);
1311 }
1312 
1313 /**
1314  * Create a DPDK Rx queue.
1315  *
1316  * @param dev
1317  *   Pointer to Ethernet device.
1318  * @param idx
1319  *   RX queue index.
1320  * @param desc
1321  *   Number of descriptors to configure in queue.
1322  * @param socket
1323  *   NUMA socket on which memory must be allocated.
1324  *
1325  * @return
1326  *   A DPDK queue object on success, NULL otherwise and rte_errno is set.
1327  */
1328 struct mlx5_rxq_ctrl *
1329 mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
1330 	     unsigned int socket, const struct rte_eth_rxconf *conf,
1331 	     struct rte_mempool *mp)
1332 {
1333 	struct mlx5_priv *priv = dev->data->dev_private;
1334 	struct mlx5_rxq_ctrl *tmpl;
1335 	unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
1336 	unsigned int mprq_stride_nums;
1337 	unsigned int mprq_stride_size;
1338 	unsigned int mprq_stride_cap;
1339 	struct mlx5_dev_config *config = &priv->config;
1340 	/*
1341 	 * Always allocate extra slots, even if eventually
1342 	 * the vector Rx will not be used.
1343 	 */
1344 	uint16_t desc_n =
1345 		desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
1346 	uint64_t offloads = conf->offloads |
1347 			   dev->data->dev_conf.rxmode.offloads;
1348 	unsigned int lro_on_queue = !!(offloads & DEV_RX_OFFLOAD_TCP_LRO);
1349 	const int mprq_en = mlx5_check_mprq_support(dev) > 0;
1350 	unsigned int max_rx_pkt_len = lro_on_queue ?
1351 			dev->data->dev_conf.rxmode.max_lro_pkt_size :
1352 			dev->data->dev_conf.rxmode.max_rx_pkt_len;
1353 	unsigned int non_scatter_min_mbuf_size = max_rx_pkt_len +
1354 							RTE_PKTMBUF_HEADROOM;
1355 	unsigned int max_lro_size = 0;
1356 	unsigned int first_mb_free_size = mb_len - RTE_PKTMBUF_HEADROOM;
1357 
1358 	if (non_scatter_min_mbuf_size > mb_len && !(offloads &
1359 						    DEV_RX_OFFLOAD_SCATTER)) {
1360 		DRV_LOG(ERR, "port %u Rx queue %u: Scatter offload is not"
1361 			" configured and no enough mbuf space(%u) to contain "
1362 			"the maximum RX packet length(%u) with head-room(%u)",
1363 			dev->data->port_id, idx, mb_len, max_rx_pkt_len,
1364 			RTE_PKTMBUF_HEADROOM);
1365 		rte_errno = ENOSPC;
1366 		return NULL;
1367 	}
1368 	tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl) +
1369 			   desc_n * sizeof(struct rte_mbuf *), 0, socket);
1370 	if (!tmpl) {
1371 		rte_errno = ENOMEM;
1372 		return NULL;
1373 	}
1374 	tmpl->type = MLX5_RXQ_TYPE_STANDARD;
1375 	if (mlx5_mr_btree_init(&tmpl->rxq.mr_ctrl.cache_bh,
1376 			       MLX5_MR_BTREE_CACHE_N, socket)) {
1377 		/* rte_errno is already set. */
1378 		goto error;
1379 	}
1380 	tmpl->socket = socket;
1381 	if (dev->data->dev_conf.intr_conf.rxq)
1382 		tmpl->irq = 1;
1383 	mprq_stride_nums = config->mprq.stride_num_n ?
1384 		config->mprq.stride_num_n : MLX5_MPRQ_STRIDE_NUM_N;
1385 	mprq_stride_size = non_scatter_min_mbuf_size <=
1386 		(1U << config->mprq.max_stride_size_n) ?
1387 		log2above(non_scatter_min_mbuf_size) : MLX5_MPRQ_STRIDE_SIZE_N;
1388 	mprq_stride_cap = (config->mprq.stride_num_n ?
1389 		(1U << config->mprq.stride_num_n) : (1U << mprq_stride_nums)) *
1390 			(config->mprq.stride_size_n ?
1391 		(1U << config->mprq.stride_size_n) : (1U << mprq_stride_size));
1392 	/*
1393 	 * This Rx queue can be configured as a Multi-Packet RQ if all of the
1394 	 * following conditions are met:
1395 	 *  - MPRQ is enabled.
1396 	 *  - The number of descs is more than the number of strides.
1397 	 *  - max_rx_pkt_len plus overhead is less than the max size
1398 	 *    of a stride or mprq_stride_size is specified by a user.
1399 	 *    Need to nake sure that there are enough stides to encap
1400 	 *    the maximum packet size in case mprq_stride_size is set.
1401 	 *  Otherwise, enable Rx scatter if necessary.
1402 	 */
1403 	if (mprq_en && desc > (1U << mprq_stride_nums) &&
1404 	    (non_scatter_min_mbuf_size <=
1405 	     (1U << config->mprq.max_stride_size_n) ||
1406 	     (config->mprq.stride_size_n &&
1407 	      non_scatter_min_mbuf_size <= mprq_stride_cap))) {
1408 		/* TODO: Rx scatter isn't supported yet. */
1409 		tmpl->rxq.sges_n = 0;
1410 		/* Trim the number of descs needed. */
1411 		desc >>= mprq_stride_nums;
1412 		tmpl->rxq.strd_num_n = config->mprq.stride_num_n ?
1413 			config->mprq.stride_num_n : mprq_stride_nums;
1414 		tmpl->rxq.strd_sz_n = config->mprq.stride_size_n ?
1415 			config->mprq.stride_size_n : mprq_stride_size;
1416 		tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT;
1417 		tmpl->rxq.strd_scatter_en =
1418 				!!(offloads & DEV_RX_OFFLOAD_SCATTER);
1419 		tmpl->rxq.mprq_max_memcpy_len = RTE_MIN(first_mb_free_size,
1420 				config->mprq.max_memcpy_len);
1421 		max_lro_size = RTE_MIN(max_rx_pkt_len,
1422 				       (1u << tmpl->rxq.strd_num_n) *
1423 				       (1u << tmpl->rxq.strd_sz_n));
1424 		DRV_LOG(DEBUG,
1425 			"port %u Rx queue %u: Multi-Packet RQ is enabled"
1426 			" strd_num_n = %u, strd_sz_n = %u",
1427 			dev->data->port_id, idx,
1428 			tmpl->rxq.strd_num_n, tmpl->rxq.strd_sz_n);
1429 	} else if (max_rx_pkt_len <= first_mb_free_size) {
1430 		tmpl->rxq.sges_n = 0;
1431 		max_lro_size = max_rx_pkt_len;
1432 	} else if (offloads & DEV_RX_OFFLOAD_SCATTER) {
1433 		unsigned int size = non_scatter_min_mbuf_size;
1434 		unsigned int sges_n;
1435 
1436 		if (lro_on_queue && first_mb_free_size <
1437 		    MLX5_MAX_LRO_HEADER_FIX) {
1438 			DRV_LOG(ERR, "Not enough space in the first segment(%u)"
1439 				" to include the max header size(%u) for LRO",
1440 				first_mb_free_size, MLX5_MAX_LRO_HEADER_FIX);
1441 			rte_errno = ENOTSUP;
1442 			goto error;
1443 		}
1444 		/*
1445 		 * Determine the number of SGEs needed for a full packet
1446 		 * and round it to the next power of two.
1447 		 */
1448 		sges_n = log2above((size / mb_len) + !!(size % mb_len));
1449 		if (sges_n > MLX5_MAX_LOG_RQ_SEGS) {
1450 			DRV_LOG(ERR,
1451 				"port %u too many SGEs (%u) needed to handle"
1452 				" requested maximum packet size %u, the maximum"
1453 				" supported are %u", dev->data->port_id,
1454 				1 << sges_n, max_rx_pkt_len,
1455 				1u << MLX5_MAX_LOG_RQ_SEGS);
1456 			rte_errno = ENOTSUP;
1457 			goto error;
1458 		}
1459 		tmpl->rxq.sges_n = sges_n;
1460 		max_lro_size = max_rx_pkt_len;
1461 	}
1462 	if (config->mprq.enabled && !mlx5_rxq_mprq_enabled(&tmpl->rxq))
1463 		DRV_LOG(WARNING,
1464 			"port %u MPRQ is requested but cannot be enabled\n"
1465 			" (requested: pkt_sz = %u, desc_num = %u,"
1466 			" rxq_num = %u, stride_sz = %u, stride_num = %u\n"
1467 			"  supported: min_rxqs_num = %u,"
1468 			" min_stride_sz = %u, max_stride_sz = %u).",
1469 			dev->data->port_id, non_scatter_min_mbuf_size,
1470 			desc, priv->rxqs_n,
1471 			config->mprq.stride_size_n ?
1472 				(1U << config->mprq.stride_size_n) :
1473 				(1U << mprq_stride_size),
1474 			config->mprq.stride_num_n ?
1475 				(1U << config->mprq.stride_num_n) :
1476 				(1U << mprq_stride_nums),
1477 			config->mprq.min_rxqs_num,
1478 			(1U << config->mprq.min_stride_size_n),
1479 			(1U << config->mprq.max_stride_size_n));
1480 	DRV_LOG(DEBUG, "port %u maximum number of segments per packet: %u",
1481 		dev->data->port_id, 1 << tmpl->rxq.sges_n);
1482 	if (desc % (1 << tmpl->rxq.sges_n)) {
1483 		DRV_LOG(ERR,
1484 			"port %u number of Rx queue descriptors (%u) is not a"
1485 			" multiple of SGEs per packet (%u)",
1486 			dev->data->port_id,
1487 			desc,
1488 			1 << tmpl->rxq.sges_n);
1489 		rte_errno = EINVAL;
1490 		goto error;
1491 	}
1492 	mlx5_max_lro_msg_size_adjust(dev, idx, max_lro_size);
1493 	/* Toggle RX checksum offload if hardware supports it. */
1494 	tmpl->rxq.csum = !!(offloads & DEV_RX_OFFLOAD_CHECKSUM);
1495 	/* Configure Rx timestamp. */
1496 	tmpl->rxq.hw_timestamp = !!(offloads & DEV_RX_OFFLOAD_TIMESTAMP);
1497 	tmpl->rxq.timestamp_rx_flag = 0;
1498 	if (tmpl->rxq.hw_timestamp && rte_mbuf_dyn_rx_timestamp_register(
1499 			&tmpl->rxq.timestamp_offset,
1500 			&tmpl->rxq.timestamp_rx_flag) != 0) {
1501 		DRV_LOG(ERR, "Cannot register Rx timestamp field/flag");
1502 		goto error;
1503 	}
1504 	/* Configure VLAN stripping. */
1505 	tmpl->rxq.vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
1506 	/* By default, FCS (CRC) is stripped by hardware. */
1507 	tmpl->rxq.crc_present = 0;
1508 	tmpl->rxq.lro = lro_on_queue;
1509 	if (offloads & DEV_RX_OFFLOAD_KEEP_CRC) {
1510 		if (config->hw_fcs_strip) {
1511 			/*
1512 			 * RQs used for LRO-enabled TIRs should not be
1513 			 * configured to scatter the FCS.
1514 			 */
1515 			if (lro_on_queue)
1516 				DRV_LOG(WARNING,
1517 					"port %u CRC stripping has been "
1518 					"disabled but will still be performed "
1519 					"by hardware, because LRO is enabled",
1520 					dev->data->port_id);
1521 			else
1522 				tmpl->rxq.crc_present = 1;
1523 		} else {
1524 			DRV_LOG(WARNING,
1525 				"port %u CRC stripping has been disabled but will"
1526 				" still be performed by hardware, make sure MLNX_OFED"
1527 				" and firmware are up to date",
1528 				dev->data->port_id);
1529 		}
1530 	}
1531 	DRV_LOG(DEBUG,
1532 		"port %u CRC stripping is %s, %u bytes will be subtracted from"
1533 		" incoming frames to hide it",
1534 		dev->data->port_id,
1535 		tmpl->rxq.crc_present ? "disabled" : "enabled",
1536 		tmpl->rxq.crc_present << 2);
1537 	/* Save port ID. */
1538 	tmpl->rxq.rss_hash = !!priv->rss_conf.rss_hf &&
1539 		(!!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS));
1540 	tmpl->rxq.port_id = dev->data->port_id;
1541 	tmpl->priv = priv;
1542 	tmpl->rxq.mp = mp;
1543 	tmpl->rxq.elts_n = log2above(desc);
1544 	tmpl->rxq.rq_repl_thresh =
1545 		MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n);
1546 	tmpl->rxq.elts =
1547 		(struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
1548 #ifndef RTE_ARCH_64
1549 	tmpl->rxq.uar_lock_cq = &priv->sh->uar_lock_cq;
1550 #endif
1551 	tmpl->rxq.idx = idx;
1552 	rte_atomic32_inc(&tmpl->refcnt);
1553 	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
1554 	return tmpl;
1555 error:
1556 	mlx5_free(tmpl);
1557 	return NULL;
1558 }
1559 
1560 /**
1561  * Create a DPDK Rx hairpin queue.
1562  *
1563  * @param dev
1564  *   Pointer to Ethernet device.
1565  * @param idx
1566  *   RX queue index.
1567  * @param desc
1568  *   Number of descriptors to configure in queue.
1569  * @param hairpin_conf
1570  *   The hairpin binding configuration.
1571  *
1572  * @return
1573  *   A DPDK queue object on success, NULL otherwise and rte_errno is set.
1574  */
1575 struct mlx5_rxq_ctrl *
1576 mlx5_rxq_hairpin_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
1577 		     const struct rte_eth_hairpin_conf *hairpin_conf)
1578 {
1579 	struct mlx5_priv *priv = dev->data->dev_private;
1580 	struct mlx5_rxq_ctrl *tmpl;
1581 
1582 	tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl), 0,
1583 			   SOCKET_ID_ANY);
1584 	if (!tmpl) {
1585 		rte_errno = ENOMEM;
1586 		return NULL;
1587 	}
1588 	tmpl->type = MLX5_RXQ_TYPE_HAIRPIN;
1589 	tmpl->socket = SOCKET_ID_ANY;
1590 	tmpl->rxq.rss_hash = 0;
1591 	tmpl->rxq.port_id = dev->data->port_id;
1592 	tmpl->priv = priv;
1593 	tmpl->rxq.mp = NULL;
1594 	tmpl->rxq.elts_n = log2above(desc);
1595 	tmpl->rxq.elts = NULL;
1596 	tmpl->rxq.mr_ctrl.cache_bh = (struct mlx5_mr_btree) { 0 };
1597 	tmpl->hairpin_conf = *hairpin_conf;
1598 	tmpl->rxq.idx = idx;
1599 	rte_atomic32_inc(&tmpl->refcnt);
1600 	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
1601 	return tmpl;
1602 }
1603 
1604 /**
1605  * Get a Rx queue.
1606  *
1607  * @param dev
1608  *   Pointer to Ethernet device.
1609  * @param idx
1610  *   RX queue index.
1611  *
1612  * @return
1613  *   A pointer to the queue if it exists, NULL otherwise.
1614  */
1615 struct mlx5_rxq_ctrl *
1616 mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx)
1617 {
1618 	struct mlx5_priv *priv = dev->data->dev_private;
1619 	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1620 	struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
1621 
1622 	if (rxq_data) {
1623 		rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1624 		rte_atomic32_inc(&rxq_ctrl->refcnt);
1625 	}
1626 	return rxq_ctrl;
1627 }
1628 
1629 /**
1630  * Release a Rx queue.
1631  *
1632  * @param dev
1633  *   Pointer to Ethernet device.
1634  * @param idx
1635  *   RX queue index.
1636  *
1637  * @return
1638  *   1 while a reference on it exists, 0 when freed.
1639  */
1640 int
1641 mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx)
1642 {
1643 	struct mlx5_priv *priv = dev->data->dev_private;
1644 	struct mlx5_rxq_ctrl *rxq_ctrl;
1645 
1646 	if (!(*priv->rxqs)[idx])
1647 		return 0;
1648 	rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
1649 	if (!rte_atomic32_dec_and_test(&rxq_ctrl->refcnt))
1650 		return 1;
1651 	if (rxq_ctrl->obj) {
1652 		priv->obj_ops.rxq_obj_release(rxq_ctrl->obj);
1653 		LIST_REMOVE(rxq_ctrl->obj, next);
1654 		mlx5_free(rxq_ctrl->obj);
1655 		rxq_ctrl->obj = NULL;
1656 	}
1657 	if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
1658 		mlx5_mr_btree_free(&rxq_ctrl->rxq.mr_ctrl.cache_bh);
1659 		rxq_free_elts(rxq_ctrl);
1660 	}
1661 	LIST_REMOVE(rxq_ctrl, next);
1662 	mlx5_free(rxq_ctrl);
1663 	(*priv->rxqs)[idx] = NULL;
1664 	return 0;
1665 }
1666 
1667 /**
1668  * Verify the Rx Queue list is empty
1669  *
1670  * @param dev
1671  *   Pointer to Ethernet device.
1672  *
1673  * @return
1674  *   The number of object not released.
1675  */
1676 int
1677 mlx5_rxq_verify(struct rte_eth_dev *dev)
1678 {
1679 	struct mlx5_priv *priv = dev->data->dev_private;
1680 	struct mlx5_rxq_ctrl *rxq_ctrl;
1681 	int ret = 0;
1682 
1683 	LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
1684 		DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced",
1685 			dev->data->port_id, rxq_ctrl->rxq.idx);
1686 		++ret;
1687 	}
1688 	return ret;
1689 }
1690 
1691 /**
1692  * Get a Rx queue type.
1693  *
1694  * @param dev
1695  *   Pointer to Ethernet device.
1696  * @param idx
1697  *   Rx queue index.
1698  *
1699  * @return
1700  *   The Rx queue type.
1701  */
1702 enum mlx5_rxq_type
1703 mlx5_rxq_get_type(struct rte_eth_dev *dev, uint16_t idx)
1704 {
1705 	struct mlx5_priv *priv = dev->data->dev_private;
1706 	struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
1707 
1708 	if (idx < priv->rxqs_n && (*priv->rxqs)[idx]) {
1709 		rxq_ctrl = container_of((*priv->rxqs)[idx],
1710 					struct mlx5_rxq_ctrl,
1711 					rxq);
1712 		return rxq_ctrl->type;
1713 	}
1714 	return MLX5_RXQ_TYPE_UNDEFINED;
1715 }
1716 
1717 /**
1718  * Get an indirection table.
1719  *
1720  * @param dev
1721  *   Pointer to Ethernet device.
1722  * @param queues
1723  *   Queues entering in the indirection table.
1724  * @param queues_n
1725  *   Number of queues in the array.
1726  *
1727  * @return
1728  *   An indirection table if found.
1729  */
1730 struct mlx5_ind_table_obj *
1731 mlx5_ind_table_obj_get(struct rte_eth_dev *dev, const uint16_t *queues,
1732 		       uint32_t queues_n)
1733 {
1734 	struct mlx5_priv *priv = dev->data->dev_private;
1735 	struct mlx5_ind_table_obj *ind_tbl;
1736 
1737 	LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
1738 		if ((ind_tbl->queues_n == queues_n) &&
1739 		    (memcmp(ind_tbl->queues, queues,
1740 			    ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))
1741 		     == 0))
1742 			break;
1743 	}
1744 	if (ind_tbl) {
1745 		unsigned int i;
1746 
1747 		rte_atomic32_inc(&ind_tbl->refcnt);
1748 		for (i = 0; i != ind_tbl->queues_n; ++i)
1749 			mlx5_rxq_get(dev, ind_tbl->queues[i]);
1750 	}
1751 	return ind_tbl;
1752 }
1753 
1754 /**
1755  * Release an indirection table.
1756  *
1757  * @param dev
1758  *   Pointer to Ethernet device.
1759  * @param ind_table
1760  *   Indirection table to release.
1761  *
1762  * @return
1763  *   1 while a reference on it exists, 0 when freed.
1764  */
1765 int
1766 mlx5_ind_table_obj_release(struct rte_eth_dev *dev,
1767 			   struct mlx5_ind_table_obj *ind_tbl)
1768 {
1769 	struct mlx5_priv *priv = dev->data->dev_private;
1770 	unsigned int i;
1771 
1772 	if (rte_atomic32_dec_and_test(&ind_tbl->refcnt))
1773 		priv->obj_ops.ind_table_destroy(ind_tbl);
1774 	for (i = 0; i != ind_tbl->queues_n; ++i)
1775 		claim_nonzero(mlx5_rxq_release(dev, ind_tbl->queues[i]));
1776 	if (!rte_atomic32_read(&ind_tbl->refcnt)) {
1777 		LIST_REMOVE(ind_tbl, next);
1778 		mlx5_free(ind_tbl);
1779 		return 0;
1780 	}
1781 	return 1;
1782 }
1783 
1784 /**
1785  * Verify the Rx Queue list is empty
1786  *
1787  * @param dev
1788  *   Pointer to Ethernet device.
1789  *
1790  * @return
1791  *   The number of object not released.
1792  */
1793 int
1794 mlx5_ind_table_obj_verify(struct rte_eth_dev *dev)
1795 {
1796 	struct mlx5_priv *priv = dev->data->dev_private;
1797 	struct mlx5_ind_table_obj *ind_tbl;
1798 	int ret = 0;
1799 
1800 	LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
1801 		DRV_LOG(DEBUG,
1802 			"port %u indirection table obj %p still referenced",
1803 			dev->data->port_id, (void *)ind_tbl);
1804 		++ret;
1805 	}
1806 	return ret;
1807 }
1808 
1809 /**
1810  * Create an indirection table.
1811  *
1812  * @param dev
1813  *   Pointer to Ethernet device.
1814  * @param queues
1815  *   Queues entering in the indirection table.
1816  * @param queues_n
1817  *   Number of queues in the array.
1818  *
1819  * @return
1820  *   The Verbs/DevX object initialized, NULL otherwise and rte_errno is set.
1821  */
1822 static struct mlx5_ind_table_obj *
1823 mlx5_ind_table_obj_new(struct rte_eth_dev *dev, const uint16_t *queues,
1824 		       uint32_t queues_n)
1825 {
1826 	struct mlx5_priv *priv = dev->data->dev_private;
1827 	struct mlx5_ind_table_obj *ind_tbl;
1828 	const unsigned int n = rte_is_power_of_2(queues_n) ?
1829 			       log2above(queues_n) :
1830 			       log2above(priv->config.ind_table_max_size);
1831 	unsigned int i, j;
1832 	int ret;
1833 
1834 	ind_tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*ind_tbl) +
1835 			      queues_n * sizeof(uint16_t), 0, SOCKET_ID_ANY);
1836 	if (!ind_tbl) {
1837 		rte_errno = ENOMEM;
1838 		return NULL;
1839 	}
1840 	ind_tbl->queues_n = queues_n;
1841 	for (i = 0; i != queues_n; ++i) {
1842 		struct mlx5_rxq_ctrl *rxq = mlx5_rxq_get(dev, queues[i]);
1843 		if (!rxq)
1844 			goto error;
1845 		ind_tbl->queues[i] = queues[i];
1846 	}
1847 	ret = priv->obj_ops.ind_table_new(dev, n, ind_tbl);
1848 	if (ret < 0)
1849 		goto error;
1850 	rte_atomic32_inc(&ind_tbl->refcnt);
1851 	LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
1852 	return ind_tbl;
1853 error:
1854 	ret = rte_errno;
1855 	for (j = 0; j < i; j++)
1856 		mlx5_rxq_release(dev, ind_tbl->queues[j]);
1857 	rte_errno = ret;
1858 	mlx5_free(ind_tbl);
1859 	DEBUG("Port %u cannot create indirection table.", dev->data->port_id);
1860 	return NULL;
1861 }
1862 
1863 /**
1864  * Get an Rx Hash queue.
1865  *
1866  * @param dev
1867  *   Pointer to Ethernet device.
1868  * @param rss_conf
1869  *   RSS configuration for the Rx hash queue.
1870  * @param queues
1871  *   Queues entering in hash queue. In case of empty hash_fields only the
1872  *   first queue index will be taken for the indirection table.
1873  * @param queues_n
1874  *   Number of queues.
1875  *
1876  * @return
1877  *   An hash Rx queue index on success.
1878  */
1879 uint32_t
1880 mlx5_hrxq_get(struct rte_eth_dev *dev,
1881 	      const uint8_t *rss_key, uint32_t rss_key_len,
1882 	      uint64_t hash_fields,
1883 	      const uint16_t *queues, uint32_t queues_n)
1884 {
1885 	struct mlx5_priv *priv = dev->data->dev_private;
1886 	struct mlx5_hrxq *hrxq;
1887 	uint32_t idx;
1888 
1889 	queues_n = hash_fields ? queues_n : 1;
1890 	ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_HRXQ], priv->hrxqs, idx,
1891 		      hrxq, next) {
1892 		struct mlx5_ind_table_obj *ind_tbl;
1893 
1894 		if (hrxq->rss_key_len != rss_key_len)
1895 			continue;
1896 		if (memcmp(hrxq->rss_key, rss_key, rss_key_len))
1897 			continue;
1898 		if (hrxq->hash_fields != hash_fields)
1899 			continue;
1900 		ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n);
1901 		if (!ind_tbl)
1902 			continue;
1903 		if (ind_tbl != hrxq->ind_table) {
1904 			mlx5_ind_table_obj_release(dev, ind_tbl);
1905 			continue;
1906 		}
1907 		rte_atomic32_inc(&hrxq->refcnt);
1908 		return idx;
1909 	}
1910 	return 0;
1911 }
1912 
1913 /**
1914  * Release the hash Rx queue.
1915  *
1916  * @param dev
1917  *   Pointer to Ethernet device.
1918  * @param hrxq
1919  *   Index to Hash Rx queue to release.
1920  *
1921  * @return
1922  *   1 while a reference on it exists, 0 when freed.
1923  */
1924 int
1925 mlx5_hrxq_release(struct rte_eth_dev *dev, uint32_t hrxq_idx)
1926 {
1927 	struct mlx5_priv *priv = dev->data->dev_private;
1928 	struct mlx5_hrxq *hrxq;
1929 
1930 	hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx);
1931 	if (!hrxq)
1932 		return 0;
1933 	if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
1934 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
1935 		mlx5_glue->destroy_flow_action(hrxq->action);
1936 #endif
1937 		priv->obj_ops.hrxq_destroy(hrxq);
1938 		mlx5_ind_table_obj_release(dev, hrxq->ind_table);
1939 		ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_HRXQ], &priv->hrxqs,
1940 			     hrxq_idx, hrxq, next);
1941 		mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx);
1942 		return 0;
1943 	}
1944 	claim_nonzero(mlx5_ind_table_obj_release(dev, hrxq->ind_table));
1945 	return 1;
1946 }
1947 
1948 /**
1949  * Create an Rx Hash queue.
1950  *
1951  * @param dev
1952  *   Pointer to Ethernet device.
1953  * @param rss_key
1954  *   RSS key for the Rx hash queue.
1955  * @param rss_key_len
1956  *   RSS key length.
1957  * @param hash_fields
1958  *   Verbs protocol hash field to make the RSS on.
1959  * @param queues
1960  *   Queues entering in hash queue. In case of empty hash_fields only the
1961  *   first queue index will be taken for the indirection table.
1962  * @param queues_n
1963  *   Number of queues.
1964  * @param tunnel
1965  *   Tunnel type.
1966  *
1967  * @return
1968  *   The DevX object initialized index, 0 otherwise and rte_errno is set.
1969  */
1970 uint32_t
1971 mlx5_hrxq_new(struct rte_eth_dev *dev,
1972 	      const uint8_t *rss_key, uint32_t rss_key_len,
1973 	      uint64_t hash_fields,
1974 	      const uint16_t *queues, uint32_t queues_n,
1975 	      int tunnel __rte_unused)
1976 {
1977 	struct mlx5_priv *priv = dev->data->dev_private;
1978 	struct mlx5_hrxq *hrxq = NULL;
1979 	uint32_t hrxq_idx = 0;
1980 	struct mlx5_ind_table_obj *ind_tbl;
1981 	int ret;
1982 
1983 	queues_n = hash_fields ? queues_n : 1;
1984 	ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n);
1985 	if (!ind_tbl)
1986 		ind_tbl = mlx5_ind_table_obj_new(dev, queues, queues_n);
1987 	if (!ind_tbl) {
1988 		rte_errno = ENOMEM;
1989 		return 0;
1990 	}
1991 	hrxq = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_HRXQ], &hrxq_idx);
1992 	if (!hrxq)
1993 		goto error;
1994 	hrxq->ind_table = ind_tbl;
1995 	hrxq->rss_key_len = rss_key_len;
1996 	hrxq->hash_fields = hash_fields;
1997 	memcpy(hrxq->rss_key, rss_key, rss_key_len);
1998 	ret = priv->obj_ops.hrxq_new(dev, hrxq, tunnel);
1999 	if (ret < 0) {
2000 		rte_errno = errno;
2001 		goto error;
2002 	}
2003 	rte_atomic32_inc(&hrxq->refcnt);
2004 	ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_HRXQ], &priv->hrxqs, hrxq_idx,
2005 		     hrxq, next);
2006 	return hrxq_idx;
2007 error:
2008 	ret = rte_errno; /* Save rte_errno before cleanup. */
2009 	mlx5_ind_table_obj_release(dev, ind_tbl);
2010 	if (hrxq)
2011 		mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx);
2012 	rte_errno = ret; /* Restore rte_errno. */
2013 	return 0;
2014 }
2015 
2016 /**
2017  * Create a drop Rx Hash queue.
2018  *
2019  * @param dev
2020  *   Pointer to Ethernet device.
2021  *
2022  * @return
2023  *   The Verbs/DevX object initialized, NULL otherwise and rte_errno is set.
2024  */
2025 struct mlx5_hrxq *
2026 mlx5_drop_action_create(struct rte_eth_dev *dev)
2027 {
2028 	struct mlx5_priv *priv = dev->data->dev_private;
2029 	struct mlx5_hrxq *hrxq = NULL;
2030 	int ret;
2031 
2032 	if (priv->drop_queue.hrxq) {
2033 		rte_atomic32_inc(&priv->drop_queue.hrxq->refcnt);
2034 		return priv->drop_queue.hrxq;
2035 	}
2036 	hrxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*hrxq), 0, SOCKET_ID_ANY);
2037 	if (!hrxq) {
2038 		DRV_LOG(WARNING,
2039 			"Port %u cannot allocate memory for drop queue.",
2040 			dev->data->port_id);
2041 		rte_errno = ENOMEM;
2042 		goto error;
2043 	}
2044 	priv->drop_queue.hrxq = hrxq;
2045 	hrxq->ind_table = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*hrxq->ind_table),
2046 				      0, SOCKET_ID_ANY);
2047 	if (!hrxq->ind_table) {
2048 		rte_errno = ENOMEM;
2049 		goto error;
2050 	}
2051 	ret = priv->obj_ops.drop_action_create(dev);
2052 	if (ret < 0)
2053 		goto error;
2054 	rte_atomic32_set(&hrxq->refcnt, 1);
2055 	return hrxq;
2056 error:
2057 	if (hrxq) {
2058 		if (hrxq->ind_table)
2059 			mlx5_free(hrxq->ind_table);
2060 		priv->drop_queue.hrxq = NULL;
2061 		mlx5_free(hrxq);
2062 	}
2063 	return NULL;
2064 }
2065 
2066 /**
2067  * Release a drop hash Rx queue.
2068  *
2069  * @param dev
2070  *   Pointer to Ethernet device.
2071  */
2072 void
2073 mlx5_drop_action_destroy(struct rte_eth_dev *dev)
2074 {
2075 	struct mlx5_priv *priv = dev->data->dev_private;
2076 	struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq;
2077 
2078 	if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
2079 		priv->obj_ops.drop_action_destroy(dev);
2080 		mlx5_free(priv->drop_queue.rxq);
2081 		mlx5_free(hrxq->ind_table);
2082 		mlx5_free(hrxq);
2083 		priv->drop_queue.rxq = NULL;
2084 		priv->drop_queue.hrxq = NULL;
2085 	}
2086 }
2087 
2088 /**
2089  * Verify the Rx Queue list is empty
2090  *
2091  * @param dev
2092  *   Pointer to Ethernet device.
2093  *
2094  * @return
2095  *   The number of object not released.
2096  */
2097 int
2098 mlx5_hrxq_verify(struct rte_eth_dev *dev)
2099 {
2100 	struct mlx5_priv *priv = dev->data->dev_private;
2101 	struct mlx5_hrxq *hrxq;
2102 	uint32_t idx;
2103 	int ret = 0;
2104 
2105 	ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_HRXQ], priv->hrxqs, idx,
2106 		      hrxq, next) {
2107 		DRV_LOG(DEBUG,
2108 			"port %u hash Rx queue %p still referenced",
2109 			dev->data->port_id, (void *)hrxq);
2110 		++ret;
2111 	}
2112 	return ret;
2113 }
2114 
2115 /**
2116  * Set the Rx queue timestamp conversion parameters
2117  *
2118  * @param[in] dev
2119  *   Pointer to the Ethernet device structure.
2120  */
2121 void
2122 mlx5_rxq_timestamp_set(struct rte_eth_dev *dev)
2123 {
2124 	struct mlx5_priv *priv = dev->data->dev_private;
2125 	struct mlx5_dev_ctx_shared *sh = priv->sh;
2126 	struct mlx5_rxq_data *data;
2127 	unsigned int i;
2128 
2129 	for (i = 0; i != priv->rxqs_n; ++i) {
2130 		if (!(*priv->rxqs)[i])
2131 			continue;
2132 		data = (*priv->rxqs)[i];
2133 		data->sh = sh;
2134 		data->rt_timestamp = priv->config.rt_timestamp;
2135 	}
2136 }
2137