xref: /dpdk/drivers/net/mlx5/linux/mlx5_verbs.c (revision b53d106d34b5c638f5a2cbdfee0da5bd42d4383f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2020 Mellanox Technologies, Ltd
3  */
4 
5 #include <stddef.h>
6 #include <errno.h>
7 #include <string.h>
8 #include <stdint.h>
9 #include <unistd.h>
10 #include <inttypes.h>
11 #include <sys/queue.h>
12 
13 #include "mlx5_autoconf.h"
14 
15 #include <rte_mbuf.h>
16 #include <rte_malloc.h>
17 #include <ethdev_driver.h>
18 #include <rte_common.h>
19 #include <rte_eal_paging.h>
20 
21 #include <mlx5_glue.h>
22 #include <mlx5_common.h>
23 #include <mlx5_common_mr.h>
24 #include <mlx5_verbs.h>
25 #include <mlx5_rx.h>
26 #include <mlx5_tx.h>
27 #include <mlx5_utils.h>
28 #include <mlx5_malloc.h>
29 
30 /**
31  * Modify Rx WQ vlan stripping offload
32  *
33  * @param rxq
34  *   Rx queue.
35  *
36  * @return 0 on success, non-0 otherwise
37  */
38 static int
39 mlx5_rxq_obj_modify_wq_vlan_strip(struct mlx5_rxq_priv *rxq, int on)
40 {
41 	uint16_t vlan_offloads =
42 		(on ? IBV_WQ_FLAGS_CVLAN_STRIPPING : 0) |
43 		0;
44 	struct ibv_wq_attr mod;
45 	mod = (struct ibv_wq_attr){
46 		.attr_mask = IBV_WQ_ATTR_FLAGS,
47 		.flags_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING,
48 		.flags = vlan_offloads,
49 	};
50 
51 	return mlx5_glue->modify_wq(rxq->ctrl->obj->wq, &mod);
52 }
53 
54 /**
55  * Modifies the attributes for the specified WQ.
56  *
57  * @param rxq
58  *   Verbs Rx queue.
59  * @param type
60  *   Type of change queue state.
61  *
62  * @return
63  *   0 on success, a negative errno value otherwise and rte_errno is set.
64  */
65 static int
66 mlx5_ibv_modify_wq(struct mlx5_rxq_priv *rxq, uint8_t type)
67 {
68 	struct ibv_wq_attr mod = {
69 		.attr_mask = IBV_WQ_ATTR_STATE,
70 		.wq_state = (enum ibv_wq_state)type,
71 	};
72 
73 	return mlx5_glue->modify_wq(rxq->ctrl->obj->wq, &mod);
74 }
75 
76 /**
77  * Modify QP using Verbs API.
78  *
79  * @param txq_obj
80  *   Verbs Tx queue object.
81  * @param type
82  *   Type of change queue state.
83  * @param dev_port
84  *   IB device port number.
85  *
86  * @return
87  *   0 on success, a negative errno value otherwise and rte_errno is set.
88  */
89 static int
90 mlx5_ibv_modify_qp(struct mlx5_txq_obj *obj, enum mlx5_txq_modify_type type,
91 		   uint8_t dev_port)
92 {
93 	struct ibv_qp_attr mod = {
94 		.qp_state = IBV_QPS_RESET,
95 		.port_num = dev_port,
96 	};
97 	int attr_mask = (IBV_QP_STATE | IBV_QP_PORT);
98 	int ret;
99 
100 	if (type != MLX5_TXQ_MOD_RST2RDY) {
101 		ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);
102 		if (ret) {
103 			DRV_LOG(ERR, "Cannot change Tx QP state to RESET %s",
104 				strerror(errno));
105 			rte_errno = errno;
106 			return ret;
107 		}
108 		if (type == MLX5_TXQ_MOD_RDY2RST)
109 			return 0;
110 	}
111 	if (type == MLX5_TXQ_MOD_ERR2RDY)
112 		attr_mask = IBV_QP_STATE;
113 	mod.qp_state = IBV_QPS_INIT;
114 	ret = mlx5_glue->modify_qp(obj->qp, &mod, attr_mask);
115 	if (ret) {
116 		DRV_LOG(ERR, "Cannot change Tx QP state to INIT %s",
117 			strerror(errno));
118 		rte_errno = errno;
119 		return ret;
120 	}
121 	mod.qp_state = IBV_QPS_RTR;
122 	ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);
123 	if (ret) {
124 		DRV_LOG(ERR, "Cannot change Tx QP state to RTR %s",
125 			strerror(errno));
126 		rte_errno = errno;
127 		return ret;
128 	}
129 	mod.qp_state = IBV_QPS_RTS;
130 	ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);
131 	if (ret) {
132 		DRV_LOG(ERR, "Cannot change Tx QP state to RTS %s",
133 			strerror(errno));
134 		rte_errno = errno;
135 		return ret;
136 	}
137 	return 0;
138 }
139 
140 /**
141  * Create a CQ Verbs object.
142  *
143  * @param rxq
144  *   Pointer to Rx queue.
145  *
146  * @return
147  *   The Verbs CQ object initialized, NULL otherwise and rte_errno is set.
148  */
149 static struct ibv_cq *
150 mlx5_rxq_ibv_cq_create(struct mlx5_rxq_priv *rxq)
151 {
152 	struct mlx5_priv *priv = rxq->priv;
153 	struct mlx5_rxq_ctrl *rxq_ctrl = rxq->ctrl;
154 	struct mlx5_rxq_data *rxq_data = &rxq_ctrl->rxq;
155 	struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;
156 	unsigned int cqe_n = mlx5_rxq_cqe_num(rxq_data);
157 	struct {
158 		struct ibv_cq_init_attr_ex ibv;
159 		struct mlx5dv_cq_init_attr mlx5;
160 	} cq_attr;
161 
162 	cq_attr.ibv = (struct ibv_cq_init_attr_ex){
163 		.cqe = cqe_n,
164 		.channel = rxq_obj->ibv_channel,
165 		.comp_mask = 0,
166 	};
167 	cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){
168 		.comp_mask = 0,
169 	};
170 	if (priv->config.cqe_comp && !rxq_data->hw_timestamp) {
171 		cq_attr.mlx5.comp_mask |=
172 				MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
173 		rxq_data->byte_mask = UINT32_MAX;
174 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
175 		if (mlx5_rxq_mprq_enabled(rxq_data)) {
176 			cq_attr.mlx5.cqe_comp_res_format =
177 					MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX;
178 			rxq_data->mcqe_format =
179 					MLX5_CQE_RESP_FORMAT_CSUM_STRIDX;
180 		} else {
181 			cq_attr.mlx5.cqe_comp_res_format =
182 					MLX5DV_CQE_RES_FORMAT_HASH;
183 			rxq_data->mcqe_format =
184 					MLX5_CQE_RESP_FORMAT_HASH;
185 		}
186 #else
187 		cq_attr.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
188 		rxq_data->mcqe_format = MLX5_CQE_RESP_FORMAT_HASH;
189 #endif
190 		/*
191 		 * For vectorized Rx, it must not be doubled in order to
192 		 * make cq_ci and rq_ci aligned.
193 		 */
194 		if (mlx5_rxq_check_vec_support(rxq_data) < 0)
195 			cq_attr.ibv.cqe *= 2;
196 	} else if (priv->config.cqe_comp && rxq_data->hw_timestamp) {
197 		DRV_LOG(DEBUG,
198 			"Port %u Rx CQE compression is disabled for HW"
199 			" timestamp.",
200 			priv->dev_data->port_id);
201 	}
202 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
203 	if (RTE_CACHE_LINE_SIZE == 128) {
204 		cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS;
205 		cq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD;
206 	}
207 #endif
208 	return mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq
209 							   (priv->sh->cdev->ctx,
210 							    &cq_attr.ibv,
211 							    &cq_attr.mlx5));
212 }
213 
214 /**
215  * Create a WQ Verbs object.
216  *
217  * @param rxq
218  *   Pointer to Rx queue.
219  *
220  * @return
221  *   The Verbs WQ object initialized, NULL otherwise and rte_errno is set.
222  */
223 static struct ibv_wq *
224 mlx5_rxq_ibv_wq_create(struct mlx5_rxq_priv *rxq)
225 {
226 	struct mlx5_priv *priv = rxq->priv;
227 	struct mlx5_rxq_ctrl *rxq_ctrl = rxq->ctrl;
228 	struct mlx5_rxq_data *rxq_data = &rxq_ctrl->rxq;
229 	struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;
230 	unsigned int wqe_n = 1 << rxq_data->elts_n;
231 	struct {
232 		struct ibv_wq_init_attr ibv;
233 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
234 		struct mlx5dv_wq_init_attr mlx5;
235 #endif
236 	} wq_attr;
237 
238 	wq_attr.ibv = (struct ibv_wq_init_attr){
239 		.wq_context = NULL, /* Could be useful in the future. */
240 		.wq_type = IBV_WQT_RQ,
241 		/* Max number of outstanding WRs. */
242 		.max_wr = wqe_n >> rxq_data->sges_n,
243 		/* Max number of scatter/gather elements in a WR. */
244 		.max_sge = 1 << rxq_data->sges_n,
245 		.pd = priv->sh->cdev->pd,
246 		.cq = rxq_obj->ibv_cq,
247 		.comp_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING | 0,
248 		.create_flags = (rxq_data->vlan_strip ?
249 				 IBV_WQ_FLAGS_CVLAN_STRIPPING : 0),
250 	};
251 	/* By default, FCS (CRC) is stripped by hardware. */
252 	if (rxq_data->crc_present) {
253 		wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
254 		wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
255 	}
256 	if (priv->config.hw_padding) {
257 #if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING)
258 		wq_attr.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
259 		wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
260 #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING)
261 		wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING;
262 		wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
263 #endif
264 	}
265 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
266 	wq_attr.mlx5 = (struct mlx5dv_wq_init_attr){
267 		.comp_mask = 0,
268 	};
269 	if (mlx5_rxq_mprq_enabled(rxq_data)) {
270 		struct mlx5dv_striding_rq_init_attr *mprq_attr =
271 						&wq_attr.mlx5.striding_rq_attrs;
272 
273 		wq_attr.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ;
274 		*mprq_attr = (struct mlx5dv_striding_rq_init_attr){
275 			.single_stride_log_num_of_bytes = rxq_data->strd_sz_n,
276 			.single_wqe_log_num_of_strides = rxq_data->strd_num_n,
277 			.two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT,
278 		};
279 	}
280 	rxq_obj->wq = mlx5_glue->dv_create_wq(priv->sh->cdev->ctx, &wq_attr.ibv,
281 					      &wq_attr.mlx5);
282 #else
283 	rxq_obj->wq = mlx5_glue->create_wq(priv->sh->cdev->ctx, &wq_attr.ibv);
284 #endif
285 	if (rxq_obj->wq) {
286 		/*
287 		 * Make sure number of WRs*SGEs match expectations since a queue
288 		 * cannot allocate more than "desc" buffers.
289 		 */
290 		if (wq_attr.ibv.max_wr != (wqe_n >> rxq_data->sges_n) ||
291 		    wq_attr.ibv.max_sge != (1u << rxq_data->sges_n)) {
292 			DRV_LOG(ERR,
293 				"Port %u Rx queue %u requested %u*%u but got"
294 				" %u*%u WRs*SGEs.",
295 				priv->dev_data->port_id, rxq->idx,
296 				wqe_n >> rxq_data->sges_n,
297 				(1 << rxq_data->sges_n),
298 				wq_attr.ibv.max_wr, wq_attr.ibv.max_sge);
299 			claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
300 			rxq_obj->wq = NULL;
301 			rte_errno = EINVAL;
302 		}
303 	}
304 	return rxq_obj->wq;
305 }
306 
307 /**
308  * Create the Rx queue Verbs object.
309  *
310  * @param rxq
311  *   Pointer to Rx queue.
312  *
313  * @return
314  *   0 on success, a negative errno value otherwise and rte_errno is set.
315  */
316 static int
317 mlx5_rxq_ibv_obj_new(struct mlx5_rxq_priv *rxq)
318 {
319 	uint16_t idx = rxq->idx;
320 	struct mlx5_priv *priv = rxq->priv;
321 	uint16_t port_id = priv->dev_data->port_id;
322 	struct mlx5_rxq_ctrl *rxq_ctrl = rxq->ctrl;
323 	struct mlx5_rxq_data *rxq_data = &rxq_ctrl->rxq;
324 	struct mlx5_rxq_obj *tmpl = rxq_ctrl->obj;
325 	struct mlx5dv_cq cq_info;
326 	struct mlx5dv_rwq rwq;
327 	int ret = 0;
328 	struct mlx5dv_obj obj;
329 
330 	MLX5_ASSERT(rxq_data);
331 	MLX5_ASSERT(tmpl);
332 	tmpl->rxq_ctrl = rxq_ctrl;
333 	if (rxq_ctrl->irq) {
334 		tmpl->ibv_channel =
335 			mlx5_glue->create_comp_channel(priv->sh->cdev->ctx);
336 		if (!tmpl->ibv_channel) {
337 			DRV_LOG(ERR, "Port %u: comp channel creation failure.",
338 				port_id);
339 			rte_errno = ENOMEM;
340 			goto error;
341 		}
342 		tmpl->fd = ((struct ibv_comp_channel *)(tmpl->ibv_channel))->fd;
343 	}
344 	/* Create CQ using Verbs API. */
345 	tmpl->ibv_cq = mlx5_rxq_ibv_cq_create(rxq);
346 	if (!tmpl->ibv_cq) {
347 		DRV_LOG(ERR, "Port %u Rx queue %u CQ creation failure.",
348 			port_id, idx);
349 		rte_errno = ENOMEM;
350 		goto error;
351 	}
352 	obj.cq.in = tmpl->ibv_cq;
353 	obj.cq.out = &cq_info;
354 	ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ);
355 	if (ret) {
356 		rte_errno = ret;
357 		goto error;
358 	}
359 	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
360 		DRV_LOG(ERR,
361 			"Port %u wrong MLX5_CQE_SIZE environment "
362 			"variable value: it should be set to %u.",
363 			port_id, RTE_CACHE_LINE_SIZE);
364 		rte_errno = EINVAL;
365 		goto error;
366 	}
367 	/* Fill the rings. */
368 	rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
369 	rxq_data->cq_db = cq_info.dbrec;
370 	rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
371 	rxq_data->uar_data.db = RTE_PTR_ADD(cq_info.cq_uar, MLX5_CQ_DOORBELL);
372 #ifndef RTE_ARCH_64
373 	rxq_data->uar_data.sl_p = &priv->sh->uar_lock_cq;
374 #endif
375 	rxq_data->cqn = cq_info.cqn;
376 	/* Create WQ (RQ) using Verbs API. */
377 	tmpl->wq = mlx5_rxq_ibv_wq_create(rxq);
378 	if (!tmpl->wq) {
379 		DRV_LOG(ERR, "Port %u Rx queue %u WQ creation failure.",
380 			port_id, idx);
381 		rte_errno = ENOMEM;
382 		goto error;
383 	}
384 	/* Change queue state to ready. */
385 	ret = mlx5_ibv_modify_wq(rxq, IBV_WQS_RDY);
386 	if (ret) {
387 		DRV_LOG(ERR,
388 			"Port %u Rx queue %u WQ state to IBV_WQS_RDY failed.",
389 			port_id, idx);
390 		rte_errno = ret;
391 		goto error;
392 	}
393 	obj.rwq.in = tmpl->wq;
394 	obj.rwq.out = &rwq;
395 	ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_RWQ);
396 	if (ret) {
397 		rte_errno = ret;
398 		goto error;
399 	}
400 	rxq_data->wqes = rwq.buf;
401 	rxq_data->rq_db = rwq.dbrec;
402 	rxq_data->cq_arm_sn = 0;
403 	mlx5_rxq_initialize(rxq_data);
404 	rxq_data->cq_ci = 0;
405 	priv->dev_data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
406 	rxq_ctrl->wqn = ((struct ibv_wq *)(tmpl->wq))->wq_num;
407 	return 0;
408 error:
409 	ret = rte_errno; /* Save rte_errno before cleanup. */
410 	if (tmpl->wq)
411 		claim_zero(mlx5_glue->destroy_wq(tmpl->wq));
412 	if (tmpl->ibv_cq)
413 		claim_zero(mlx5_glue->destroy_cq(tmpl->ibv_cq));
414 	if (tmpl->ibv_channel)
415 		claim_zero(mlx5_glue->destroy_comp_channel(tmpl->ibv_channel));
416 	rte_errno = ret; /* Restore rte_errno. */
417 	return -rte_errno;
418 }
419 
420 /**
421  * Release an Rx verbs queue object.
422  *
423  * @param rxq
424  *   Pointer to Rx queue.
425  */
426 static void
427 mlx5_rxq_ibv_obj_release(struct mlx5_rxq_priv *rxq)
428 {
429 	struct mlx5_rxq_obj *rxq_obj = rxq->ctrl->obj;
430 
431 	if (rxq_obj == NULL || rxq_obj->wq == NULL)
432 		return;
433 	claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
434 	rxq_obj->wq = NULL;
435 	MLX5_ASSERT(rxq_obj->ibv_cq);
436 	claim_zero(mlx5_glue->destroy_cq(rxq_obj->ibv_cq));
437 	if (rxq_obj->ibv_channel)
438 		claim_zero(mlx5_glue->destroy_comp_channel
439 							(rxq_obj->ibv_channel));
440 	rxq->ctrl->started = false;
441 }
442 
443 /**
444  * Get event for an Rx verbs queue object.
445  *
446  * @param rxq_obj
447  *   Verbs Rx queue object.
448  *
449  * @return
450  *   0 on success, a negative errno value otherwise and rte_errno is set.
451  */
452 static int
453 mlx5_rx_ibv_get_event(struct mlx5_rxq_obj *rxq_obj)
454 {
455 	struct ibv_cq *ev_cq;
456 	void *ev_ctx;
457 	int ret = mlx5_glue->get_cq_event(rxq_obj->ibv_channel,
458 					  &ev_cq, &ev_ctx);
459 
460 	if (ret < 0 || ev_cq != rxq_obj->ibv_cq)
461 		goto exit;
462 	mlx5_glue->ack_cq_events(rxq_obj->ibv_cq, 1);
463 	return 0;
464 exit:
465 	if (ret < 0)
466 		rte_errno = errno;
467 	else
468 		rte_errno = EINVAL;
469 	return -rte_errno;
470 }
471 
472 /**
473  * Creates a receive work queue as a filed of indirection table.
474  *
475  * @param dev
476  *   Pointer to Ethernet device.
477  * @param log_n
478  *   Log of number of queues in the array.
479  * @param ind_tbl
480  *   Verbs indirection table object.
481  *
482  * @return
483  *   0 on success, a negative errno value otherwise and rte_errno is set.
484  */
485 static int
486 mlx5_ibv_ind_table_new(struct rte_eth_dev *dev, const unsigned int log_n,
487 		       struct mlx5_ind_table_obj *ind_tbl)
488 {
489 	struct mlx5_priv *priv = dev->data->dev_private;
490 	struct ibv_wq *wq[1 << log_n];
491 	unsigned int i, j;
492 
493 	MLX5_ASSERT(ind_tbl);
494 	for (i = 0; i != ind_tbl->queues_n; ++i) {
495 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev,
496 							 ind_tbl->queues[i]);
497 
498 		wq[i] = rxq->ctrl->obj->wq;
499 	}
500 	MLX5_ASSERT(i > 0);
501 	/* Finalise indirection table. */
502 	for (j = 0; i != (unsigned int)(1 << log_n); ++j, ++i)
503 		wq[i] = wq[j];
504 	ind_tbl->ind_table = mlx5_glue->create_rwq_ind_table
505 					(priv->sh->cdev->ctx,
506 					 &(struct ibv_rwq_ind_table_init_attr){
507 						 .log_ind_tbl_size = log_n,
508 						 .ind_tbl = wq,
509 						 .comp_mask = 0,
510 					 });
511 	if (!ind_tbl->ind_table) {
512 		rte_errno = errno;
513 		return -rte_errno;
514 	}
515 	return 0;
516 }
517 
518 /**
519  * Destroys the specified Indirection Table.
520  *
521  * @param ind_table
522  *   Indirection table to release.
523  */
524 static void
525 mlx5_ibv_ind_table_destroy(struct mlx5_ind_table_obj *ind_tbl)
526 {
527 	claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl->ind_table));
528 }
529 
530 /**
531  * Create an Rx Hash queue.
532  *
533  * @param dev
534  *   Pointer to Ethernet device.
535  * @param hrxq
536  *   Pointer to Rx Hash queue.
537  * @param tunnel
538  *   Tunnel type.
539  *
540  * @return
541  *   0 on success, a negative errno value otherwise and rte_errno is set.
542  */
543 static int
544 mlx5_ibv_hrxq_new(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq,
545 		  int tunnel __rte_unused)
546 {
547 	struct mlx5_priv *priv = dev->data->dev_private;
548 	struct ibv_qp *qp = NULL;
549 	struct mlx5_ind_table_obj *ind_tbl = hrxq->ind_table;
550 	const uint8_t *rss_key = hrxq->rss_key;
551 	uint64_t hash_fields = hrxq->hash_fields;
552 	int err;
553 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
554 	struct mlx5dv_qp_init_attr qp_init_attr;
555 
556 	memset(&qp_init_attr, 0, sizeof(qp_init_attr));
557 	if (tunnel) {
558 		qp_init_attr.comp_mask =
559 				       MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
560 		qp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS;
561 	}
562 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
563 	if (dev->data->dev_conf.lpbk_mode) {
564 		/* Allow packet sent from NIC loop back w/o source MAC check. */
565 		qp_init_attr.comp_mask |=
566 				MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
567 		qp_init_attr.create_flags |=
568 				MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC;
569 	}
570 #endif
571 	qp = mlx5_glue->dv_create_qp
572 			(priv->sh->cdev->ctx,
573 			 &(struct ibv_qp_init_attr_ex){
574 				.qp_type = IBV_QPT_RAW_PACKET,
575 				.comp_mask =
576 					IBV_QP_INIT_ATTR_PD |
577 					IBV_QP_INIT_ATTR_IND_TABLE |
578 					IBV_QP_INIT_ATTR_RX_HASH,
579 				.rx_hash_conf = (struct ibv_rx_hash_conf){
580 					.rx_hash_function =
581 						IBV_RX_HASH_FUNC_TOEPLITZ,
582 					.rx_hash_key_len = hrxq->rss_key_len,
583 					.rx_hash_key =
584 						(void *)(uintptr_t)rss_key,
585 					.rx_hash_fields_mask = hash_fields,
586 				},
587 				.rwq_ind_tbl = ind_tbl->ind_table,
588 				.pd = priv->sh->cdev->pd,
589 			  },
590 			  &qp_init_attr);
591 #else
592 	qp = mlx5_glue->create_qp_ex
593 			(priv->sh->cdev->ctx,
594 			 &(struct ibv_qp_init_attr_ex){
595 				.qp_type = IBV_QPT_RAW_PACKET,
596 				.comp_mask =
597 					IBV_QP_INIT_ATTR_PD |
598 					IBV_QP_INIT_ATTR_IND_TABLE |
599 					IBV_QP_INIT_ATTR_RX_HASH,
600 				.rx_hash_conf = (struct ibv_rx_hash_conf){
601 					.rx_hash_function =
602 						IBV_RX_HASH_FUNC_TOEPLITZ,
603 					.rx_hash_key_len = hrxq->rss_key_len,
604 					.rx_hash_key =
605 						(void *)(uintptr_t)rss_key,
606 					.rx_hash_fields_mask = hash_fields,
607 				},
608 				.rwq_ind_tbl = ind_tbl->ind_table,
609 				.pd = priv->sh->cdev->pd,
610 			 });
611 #endif
612 	if (!qp) {
613 		rte_errno = errno;
614 		goto error;
615 	}
616 	hrxq->qp = qp;
617 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
618 	hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);
619 	if (!hrxq->action) {
620 		rte_errno = errno;
621 		goto error;
622 	}
623 #endif
624 	return 0;
625 error:
626 	err = rte_errno; /* Save rte_errno before cleanup. */
627 	if (qp)
628 		claim_zero(mlx5_glue->destroy_qp(qp));
629 	rte_errno = err; /* Restore rte_errno. */
630 	return -rte_errno;
631 }
632 
633 /**
634  * Destroy a Verbs queue pair.
635  *
636  * @param hrxq
637  *   Hash Rx queue to release its qp.
638  */
639 static void
640 mlx5_ibv_qp_destroy(struct mlx5_hrxq *hrxq)
641 {
642 	claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
643 }
644 
645 /**
646  * Release a drop Rx queue Verbs object.
647  *
648  * @param dev
649  *   Pointer to Ethernet device.
650  */
651 static void
652 mlx5_rxq_ibv_obj_drop_release(struct rte_eth_dev *dev)
653 {
654 	struct mlx5_priv *priv = dev->data->dev_private;
655 	struct mlx5_rxq_priv *rxq = priv->drop_queue.rxq;
656 	struct mlx5_rxq_obj *rxq_obj;
657 
658 	if (rxq == NULL)
659 		return;
660 	if (rxq->ctrl == NULL)
661 		goto free_priv;
662 	rxq_obj = rxq->ctrl->obj;
663 	if (rxq_obj == NULL)
664 		goto free_ctrl;
665 	if (rxq_obj->wq)
666 		claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
667 	if (rxq_obj->ibv_cq)
668 		claim_zero(mlx5_glue->destroy_cq(rxq_obj->ibv_cq));
669 	mlx5_free(rxq_obj);
670 free_ctrl:
671 	mlx5_free(rxq->ctrl);
672 free_priv:
673 	mlx5_free(rxq);
674 	priv->drop_queue.rxq = NULL;
675 }
676 
677 /**
678  * Create a drop Rx queue Verbs object.
679  *
680  * @param dev
681  *   Pointer to Ethernet device.
682  *
683  * @return
684  *   0 on success, a negative errno value otherwise and rte_errno is set.
685  */
686 static int
687 mlx5_rxq_ibv_obj_drop_create(struct rte_eth_dev *dev)
688 {
689 	struct mlx5_priv *priv = dev->data->dev_private;
690 	struct ibv_context *ctx = priv->sh->cdev->ctx;
691 	struct mlx5_rxq_priv *rxq = priv->drop_queue.rxq;
692 	struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
693 	struct mlx5_rxq_obj *rxq_obj = NULL;
694 
695 	if (rxq != NULL)
696 		return 0;
697 	rxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq), 0, SOCKET_ID_ANY);
698 	if (rxq == NULL) {
699 		DRV_LOG(DEBUG, "Port %u cannot allocate drop Rx queue memory.",
700 		      dev->data->port_id);
701 		rte_errno = ENOMEM;
702 		return -rte_errno;
703 	}
704 	priv->drop_queue.rxq = rxq;
705 	rxq_ctrl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq_ctrl), 0,
706 			       SOCKET_ID_ANY);
707 	if (rxq_ctrl == NULL) {
708 		DRV_LOG(DEBUG, "Port %u cannot allocate drop Rx queue control memory.",
709 		      dev->data->port_id);
710 		rte_errno = ENOMEM;
711 		goto error;
712 	}
713 	rxq->ctrl = rxq_ctrl;
714 	rxq_obj = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq_obj), 0,
715 			      SOCKET_ID_ANY);
716 	if (rxq_obj == NULL) {
717 		DRV_LOG(DEBUG, "Port %u cannot allocate drop Rx queue memory.",
718 		      dev->data->port_id);
719 		rte_errno = ENOMEM;
720 		goto error;
721 	}
722 	rxq_ctrl->obj = rxq_obj;
723 	rxq_obj->ibv_cq = mlx5_glue->create_cq(ctx, 1, NULL, NULL, 0);
724 	if (!rxq_obj->ibv_cq) {
725 		DRV_LOG(DEBUG, "Port %u cannot allocate CQ for drop queue.",
726 		      dev->data->port_id);
727 		rte_errno = errno;
728 		goto error;
729 	}
730 	rxq_obj->wq = mlx5_glue->create_wq(ctx, &(struct ibv_wq_init_attr){
731 						    .wq_type = IBV_WQT_RQ,
732 						    .max_wr = 1,
733 						    .max_sge = 1,
734 						    .pd = priv->sh->cdev->pd,
735 						    .cq = rxq_obj->ibv_cq,
736 					      });
737 	if (!rxq_obj->wq) {
738 		DRV_LOG(DEBUG, "Port %u cannot allocate WQ for drop queue.",
739 		      dev->data->port_id);
740 		rte_errno = errno;
741 		goto error;
742 	}
743 	return 0;
744 error:
745 	mlx5_rxq_ibv_obj_drop_release(dev);
746 	return -rte_errno;
747 }
748 
749 /**
750  * Create a Verbs drop action for Rx Hash queue.
751  *
752  * @param dev
753  *   Pointer to Ethernet device.
754  *
755  * @return
756  *   0 on success, a negative errno value otherwise and rte_errno is set.
757  */
758 static int
759 mlx5_ibv_drop_action_create(struct rte_eth_dev *dev)
760 {
761 	struct mlx5_priv *priv = dev->data->dev_private;
762 	struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq;
763 	struct ibv_rwq_ind_table *ind_tbl = NULL;
764 	struct mlx5_rxq_obj *rxq;
765 	int ret;
766 
767 	MLX5_ASSERT(hrxq && hrxq->ind_table);
768 	ret = mlx5_rxq_ibv_obj_drop_create(dev);
769 	if (ret < 0)
770 		goto error;
771 	rxq = priv->drop_queue.rxq->ctrl->obj;
772 	ind_tbl = mlx5_glue->create_rwq_ind_table
773 				(priv->sh->cdev->ctx,
774 				 &(struct ibv_rwq_ind_table_init_attr){
775 					.log_ind_tbl_size = 0,
776 					.ind_tbl = (struct ibv_wq **)&rxq->wq,
777 					.comp_mask = 0,
778 				 });
779 	if (!ind_tbl) {
780 		DRV_LOG(DEBUG, "Port %u"
781 			" cannot allocate indirection table for drop queue.",
782 			dev->data->port_id);
783 		rte_errno = errno;
784 		goto error;
785 	}
786 	hrxq->qp = mlx5_glue->create_qp_ex(priv->sh->cdev->ctx,
787 		 &(struct ibv_qp_init_attr_ex){
788 			.qp_type = IBV_QPT_RAW_PACKET,
789 			.comp_mask = IBV_QP_INIT_ATTR_PD |
790 				     IBV_QP_INIT_ATTR_IND_TABLE |
791 				     IBV_QP_INIT_ATTR_RX_HASH,
792 			.rx_hash_conf = (struct ibv_rx_hash_conf){
793 				.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
794 				.rx_hash_key_len = MLX5_RSS_HASH_KEY_LEN,
795 				.rx_hash_key = rss_hash_default_key,
796 				.rx_hash_fields_mask = 0,
797 				},
798 			.rwq_ind_tbl = ind_tbl,
799 			.pd = priv->sh->cdev->pd
800 		 });
801 	if (!hrxq->qp) {
802 		DRV_LOG(DEBUG, "Port %u cannot allocate QP for drop queue.",
803 		      dev->data->port_id);
804 		rte_errno = errno;
805 		goto error;
806 	}
807 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
808 	hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);
809 	if (!hrxq->action) {
810 		rte_errno = errno;
811 		goto error;
812 	}
813 #endif
814 	hrxq->ind_table->ind_table = ind_tbl;
815 	return 0;
816 error:
817 	if (hrxq->qp)
818 		claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
819 	if (ind_tbl)
820 		claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl));
821 	if (priv->drop_queue.rxq)
822 		mlx5_rxq_ibv_obj_drop_release(dev);
823 	return -rte_errno;
824 }
825 
826 /**
827  * Release a drop hash Rx queue.
828  *
829  * @param dev
830  *   Pointer to Ethernet device.
831  */
832 static void
833 mlx5_ibv_drop_action_destroy(struct rte_eth_dev *dev)
834 {
835 	struct mlx5_priv *priv = dev->data->dev_private;
836 	struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq;
837 	struct ibv_rwq_ind_table *ind_tbl = hrxq->ind_table->ind_table;
838 
839 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
840 	claim_zero(mlx5_glue->destroy_flow_action(hrxq->action));
841 #endif
842 	claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
843 	claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl));
844 	mlx5_rxq_ibv_obj_drop_release(dev);
845 }
846 
847 /**
848  * Create a QP Verbs object.
849  *
850  * @param dev
851  *   Pointer to Ethernet device.
852  * @param idx
853  *   Queue index in DPDK Tx queue array.
854  *
855  * @return
856  *   The QP Verbs object, NULL otherwise and rte_errno is set.
857  */
858 static struct ibv_qp *
859 mlx5_txq_ibv_qp_create(struct rte_eth_dev *dev, uint16_t idx)
860 {
861 	struct mlx5_priv *priv = dev->data->dev_private;
862 	struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
863 	struct mlx5_txq_ctrl *txq_ctrl =
864 			container_of(txq_data, struct mlx5_txq_ctrl, txq);
865 	struct ibv_qp *qp_obj = NULL;
866 	struct ibv_qp_init_attr_ex qp_attr = { 0 };
867 	const int desc = 1 << txq_data->elts_n;
868 
869 	MLX5_ASSERT(txq_ctrl->obj->cq);
870 	/* CQ to be associated with the send queue. */
871 	qp_attr.send_cq = txq_ctrl->obj->cq;
872 	/* CQ to be associated with the receive queue. */
873 	qp_attr.recv_cq = txq_ctrl->obj->cq;
874 	/* Max number of outstanding WRs. */
875 	qp_attr.cap.max_send_wr = ((priv->sh->device_attr.max_qp_wr < desc) ?
876 				   priv->sh->device_attr.max_qp_wr : desc);
877 	/*
878 	 * Max number of scatter/gather elements in a WR, must be 1 to prevent
879 	 * libmlx5 from trying to affect must be 1 to prevent libmlx5 from
880 	 * trying to affect too much memory. TX gather is not impacted by the
881 	 * device_attr.max_sge limit and will still work properly.
882 	 */
883 	qp_attr.cap.max_send_sge = 1;
884 	qp_attr.qp_type = IBV_QPT_RAW_PACKET,
885 	/* Do *NOT* enable this, completions events are managed per Tx burst. */
886 	qp_attr.sq_sig_all = 0;
887 	qp_attr.pd = priv->sh->cdev->pd;
888 	qp_attr.comp_mask = IBV_QP_INIT_ATTR_PD;
889 	if (txq_data->inlen_send)
890 		qp_attr.cap.max_inline_data = txq_ctrl->max_inline_data;
891 	if (txq_data->tso_en) {
892 		qp_attr.max_tso_header = txq_ctrl->max_tso_header;
893 		qp_attr.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER;
894 	}
895 	qp_obj = mlx5_glue->create_qp_ex(priv->sh->cdev->ctx, &qp_attr);
896 	if (qp_obj == NULL) {
897 		DRV_LOG(ERR, "Port %u Tx queue %u QP creation failure.",
898 			dev->data->port_id, idx);
899 		rte_errno = errno;
900 	}
901 	return qp_obj;
902 }
903 
904 /**
905  * Initialize Tx UAR registers for primary process.
906  *
907  * @param txq_ctrl
908  *   Pointer to Tx queue control structure.
909  * @param bf_reg
910  *   BlueFlame register from Verbs UAR.
911  */
912 static void
913 mlx5_txq_ibv_uar_init(struct mlx5_txq_ctrl *txq_ctrl, void *bf_reg)
914 {
915 	struct mlx5_priv *priv = txq_ctrl->priv;
916 	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
917 	const size_t page_size = rte_mem_page_size();
918 	struct mlx5_txq_data *txq = &txq_ctrl->txq;
919 	off_t uar_mmap_offset = txq_ctrl->uar_mmap_offset;
920 #ifndef RTE_ARCH_64
921 	unsigned int lock_idx;
922 #endif
923 
924 	MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
925 	MLX5_ASSERT(ppriv);
926 	if (page_size == (size_t)-1) {
927 		DRV_LOG(ERR, "Failed to get mem page size");
928 		rte_errno = ENOMEM;
929 	}
930 	txq->db_heu = priv->sh->cdev->config.dbnc == MLX5_TXDB_HEURISTIC;
931 	txq->db_nc = mlx5_db_map_type_get(uar_mmap_offset, page_size);
932 	ppriv->uar_table[txq->idx].db = bf_reg;
933 #ifndef RTE_ARCH_64
934 	/* Assign an UAR lock according to UAR page number. */
935 	lock_idx = (uar_mmap_offset / page_size) & MLX5_UAR_PAGE_NUM_MASK;
936 	ppriv->uar_table[txq->idx].sl_p = &priv->sh->uar_lock[lock_idx];
937 #endif
938 }
939 
940 /**
941  * Create the Tx queue Verbs object.
942  *
943  * @param dev
944  *   Pointer to Ethernet device.
945  * @param idx
946  *   Queue index in DPDK Tx queue array.
947  *
948  * @return
949  *   0 on success, a negative errno value otherwise and rte_errno is set.
950  */
951 int
952 mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx)
953 {
954 	struct mlx5_priv *priv = dev->data->dev_private;
955 	struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
956 	struct mlx5_txq_ctrl *txq_ctrl =
957 		container_of(txq_data, struct mlx5_txq_ctrl, txq);
958 	struct mlx5_txq_obj *txq_obj = txq_ctrl->obj;
959 	unsigned int cqe_n;
960 	struct mlx5dv_qp qp;
961 	struct mlx5dv_cq cq_info;
962 	struct mlx5dv_obj obj;
963 	const int desc = 1 << txq_data->elts_n;
964 	int ret = 0;
965 
966 	MLX5_ASSERT(txq_data);
967 	MLX5_ASSERT(txq_obj);
968 	txq_obj->txq_ctrl = txq_ctrl;
969 	if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
970 		DRV_LOG(ERR, "Port %u MLX5_ENABLE_CQE_COMPRESSION "
971 			"must never be set.", dev->data->port_id);
972 		rte_errno = EINVAL;
973 		return -rte_errno;
974 	}
975 	cqe_n = desc / MLX5_TX_COMP_THRESH +
976 		1 + MLX5_TX_COMP_THRESH_INLINE_DIV;
977 	txq_obj->cq = mlx5_glue->create_cq(priv->sh->cdev->ctx, cqe_n,
978 					   NULL, NULL, 0);
979 	if (txq_obj->cq == NULL) {
980 		DRV_LOG(ERR, "Port %u Tx queue %u CQ creation failure.",
981 			dev->data->port_id, idx);
982 		rte_errno = errno;
983 		goto error;
984 	}
985 	txq_obj->qp = mlx5_txq_ibv_qp_create(dev, idx);
986 	if (txq_obj->qp == NULL) {
987 		rte_errno = errno;
988 		goto error;
989 	}
990 	ret = mlx5_ibv_modify_qp(txq_obj, MLX5_TXQ_MOD_RST2RDY,
991 				 (uint8_t)priv->dev_port);
992 	if (ret) {
993 		DRV_LOG(ERR, "Port %u Tx queue %u QP state modifying failed.",
994 			dev->data->port_id, idx);
995 		rte_errno = errno;
996 		goto error;
997 	}
998 	qp.comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET;
999 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
1000 	/* If using DevX, need additional mask to read tisn value. */
1001 	if (priv->sh->devx && !priv->sh->tdn)
1002 		qp.comp_mask |= MLX5DV_QP_MASK_RAW_QP_HANDLES;
1003 #endif
1004 	obj.cq.in = txq_obj->cq;
1005 	obj.cq.out = &cq_info;
1006 	obj.qp.in = txq_obj->qp;
1007 	obj.qp.out = &qp;
1008 	ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP);
1009 	if (ret != 0) {
1010 		rte_errno = errno;
1011 		goto error;
1012 	}
1013 	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
1014 		DRV_LOG(ERR,
1015 			"Port %u wrong MLX5_CQE_SIZE environment variable"
1016 			" value: it should be set to %u.",
1017 			dev->data->port_id, RTE_CACHE_LINE_SIZE);
1018 		rte_errno = EINVAL;
1019 		goto error;
1020 	}
1021 	txq_data->cqe_n = log2above(cq_info.cqe_cnt);
1022 	txq_data->cqe_s = 1 << txq_data->cqe_n;
1023 	txq_data->cqe_m = txq_data->cqe_s - 1;
1024 	txq_data->qp_num_8s = ((struct ibv_qp *)txq_obj->qp)->qp_num << 8;
1025 	txq_data->wqes = qp.sq.buf;
1026 	txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
1027 	txq_data->wqe_s = 1 << txq_data->wqe_n;
1028 	txq_data->wqe_m = txq_data->wqe_s - 1;
1029 	txq_data->wqes_end = txq_data->wqes + txq_data->wqe_s;
1030 	txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
1031 	txq_data->cq_db = cq_info.dbrec;
1032 	txq_data->cqes = (volatile struct mlx5_cqe *)cq_info.buf;
1033 	txq_data->cq_ci = 0;
1034 	txq_data->cq_pi = 0;
1035 	txq_data->wqe_ci = 0;
1036 	txq_data->wqe_pi = 0;
1037 	txq_data->wqe_comp = 0;
1038 	txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV;
1039 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
1040 	/*
1041 	 * If using DevX need to query and store TIS transport domain value.
1042 	 * This is done once per port.
1043 	 * Will use this value on Rx, when creating matching TIR.
1044 	 */
1045 	if (priv->sh->devx && !priv->sh->tdn) {
1046 		ret = mlx5_devx_cmd_qp_query_tis_td(txq_obj->qp, qp.tisn,
1047 						    &priv->sh->tdn);
1048 		if (ret) {
1049 			DRV_LOG(ERR, "Fail to query port %u Tx queue %u QP TIS "
1050 				"transport domain.", dev->data->port_id, idx);
1051 			rte_errno = EINVAL;
1052 			goto error;
1053 		} else {
1054 			DRV_LOG(DEBUG, "Port %u Tx queue %u TIS number %d "
1055 				"transport domain %d.", dev->data->port_id,
1056 				idx, qp.tisn, priv->sh->tdn);
1057 		}
1058 	}
1059 #endif
1060 	if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
1061 		txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
1062 		DRV_LOG(DEBUG, "Port %u: uar_mmap_offset 0x%" PRIx64 ".",
1063 			dev->data->port_id, txq_ctrl->uar_mmap_offset);
1064 	} else {
1065 		DRV_LOG(ERR,
1066 			"Port %u failed to retrieve UAR info, invalid libmlx5.so",
1067 			dev->data->port_id);
1068 		rte_errno = EINVAL;
1069 		goto error;
1070 	}
1071 	mlx5_txq_ibv_uar_init(txq_ctrl, qp.bf.reg);
1072 	dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
1073 	return 0;
1074 error:
1075 	ret = rte_errno; /* Save rte_errno before cleanup. */
1076 	if (txq_obj->cq)
1077 		claim_zero(mlx5_glue->destroy_cq(txq_obj->cq));
1078 	if (txq_obj->qp)
1079 		claim_zero(mlx5_glue->destroy_qp(txq_obj->qp));
1080 	rte_errno = ret; /* Restore rte_errno. */
1081 	return -rte_errno;
1082 }
1083 
1084 /*
1085  * Create the dummy QP with minimal resources for loopback.
1086  *
1087  * @param dev
1088  *   Pointer to Ethernet device.
1089  *
1090  * @return
1091  *   0 on success, a negative errno value otherwise and rte_errno is set.
1092  */
1093 int
1094 mlx5_rxq_ibv_obj_dummy_lb_create(struct rte_eth_dev *dev)
1095 {
1096 #if defined(HAVE_IBV_DEVICE_TUNNEL_SUPPORT) && defined(HAVE_IBV_FLOW_DV_SUPPORT)
1097 	struct mlx5_priv *priv = dev->data->dev_private;
1098 	struct mlx5_dev_ctx_shared *sh = priv->sh;
1099 	struct ibv_context *ctx = sh->cdev->ctx;
1100 	struct mlx5dv_qp_init_attr qp_init_attr = {0};
1101 	struct {
1102 		struct ibv_cq_init_attr_ex ibv;
1103 		struct mlx5dv_cq_init_attr mlx5;
1104 	} cq_attr = {{0}};
1105 
1106 	if (dev->data->dev_conf.lpbk_mode) {
1107 		/* Allow packet sent from NIC loop back w/o source MAC check. */
1108 		qp_init_attr.comp_mask |=
1109 				MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
1110 		qp_init_attr.create_flags |=
1111 				MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC;
1112 	} else {
1113 		return 0;
1114 	}
1115 	/* Only need to check refcnt, 0 after "sh" is allocated. */
1116 	if (!!(__atomic_fetch_add(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED))) {
1117 		MLX5_ASSERT(sh->self_lb.ibv_cq && sh->self_lb.qp);
1118 		priv->lb_used = 1;
1119 		return 0;
1120 	}
1121 	cq_attr.ibv = (struct ibv_cq_init_attr_ex){
1122 		.cqe = 1,
1123 		.channel = NULL,
1124 		.comp_mask = 0,
1125 	};
1126 	cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){
1127 		.comp_mask = 0,
1128 	};
1129 	/* Only CQ is needed, no WQ(RQ) is required in this case. */
1130 	sh->self_lb.ibv_cq = mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq(ctx,
1131 							&cq_attr.ibv,
1132 							&cq_attr.mlx5));
1133 	if (!sh->self_lb.ibv_cq) {
1134 		DRV_LOG(ERR, "Port %u cannot allocate CQ for loopback.",
1135 			dev->data->port_id);
1136 		rte_errno = errno;
1137 		goto error;
1138 	}
1139 	sh->self_lb.qp = mlx5_glue->dv_create_qp(ctx,
1140 				&(struct ibv_qp_init_attr_ex){
1141 					.qp_type = IBV_QPT_RAW_PACKET,
1142 					.comp_mask = IBV_QP_INIT_ATTR_PD,
1143 					.pd = sh->cdev->pd,
1144 					.send_cq = sh->self_lb.ibv_cq,
1145 					.recv_cq = sh->self_lb.ibv_cq,
1146 					.cap.max_recv_wr = 1,
1147 				},
1148 				&qp_init_attr);
1149 	if (!sh->self_lb.qp) {
1150 		DRV_LOG(DEBUG, "Port %u cannot allocate QP for loopback.",
1151 			dev->data->port_id);
1152 		rte_errno = errno;
1153 		goto error;
1154 	}
1155 	priv->lb_used = 1;
1156 	return 0;
1157 error:
1158 	if (sh->self_lb.ibv_cq) {
1159 		claim_zero(mlx5_glue->destroy_cq(sh->self_lb.ibv_cq));
1160 		sh->self_lb.ibv_cq = NULL;
1161 	}
1162 	(void)__atomic_sub_fetch(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED);
1163 	return -rte_errno;
1164 #else
1165 	RTE_SET_USED(dev);
1166 	return 0;
1167 #endif
1168 }
1169 
1170 /*
1171  * Release the dummy queue resources for loopback.
1172  *
1173  * @param dev
1174  *   Pointer to Ethernet device.
1175  */
1176 void
1177 mlx5_rxq_ibv_obj_dummy_lb_release(struct rte_eth_dev *dev)
1178 {
1179 #if defined(HAVE_IBV_DEVICE_TUNNEL_SUPPORT) && defined(HAVE_IBV_FLOW_DV_SUPPORT)
1180 	struct mlx5_priv *priv = dev->data->dev_private;
1181 	struct mlx5_dev_ctx_shared *sh = priv->sh;
1182 
1183 	if (!priv->lb_used)
1184 		return;
1185 	MLX5_ASSERT(__atomic_load_n(&sh->self_lb.refcnt, __ATOMIC_RELAXED));
1186 	if (!(__atomic_sub_fetch(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED))) {
1187 		if (sh->self_lb.qp) {
1188 			claim_zero(mlx5_glue->destroy_qp(sh->self_lb.qp));
1189 			sh->self_lb.qp = NULL;
1190 		}
1191 		if (sh->self_lb.ibv_cq) {
1192 			claim_zero(mlx5_glue->destroy_cq(sh->self_lb.ibv_cq));
1193 			sh->self_lb.ibv_cq = NULL;
1194 		}
1195 	}
1196 	priv->lb_used = 0;
1197 #else
1198 	RTE_SET_USED(dev);
1199 	return;
1200 #endif
1201 }
1202 
1203 /**
1204  * Release an Tx verbs queue object.
1205  *
1206  * @param txq_obj
1207  *   Verbs Tx queue object..
1208  */
1209 void
1210 mlx5_txq_ibv_obj_release(struct mlx5_txq_obj *txq_obj)
1211 {
1212 	MLX5_ASSERT(txq_obj);
1213 	claim_zero(mlx5_glue->destroy_qp(txq_obj->qp));
1214 	claim_zero(mlx5_glue->destroy_cq(txq_obj->cq));
1215 }
1216 
1217 struct mlx5_obj_ops ibv_obj_ops = {
1218 	.rxq_obj_modify_vlan_strip = mlx5_rxq_obj_modify_wq_vlan_strip,
1219 	.rxq_obj_new = mlx5_rxq_ibv_obj_new,
1220 	.rxq_event_get = mlx5_rx_ibv_get_event,
1221 	.rxq_obj_modify = mlx5_ibv_modify_wq,
1222 	.rxq_obj_release = mlx5_rxq_ibv_obj_release,
1223 	.ind_table_new = mlx5_ibv_ind_table_new,
1224 	.ind_table_destroy = mlx5_ibv_ind_table_destroy,
1225 	.hrxq_new = mlx5_ibv_hrxq_new,
1226 	.hrxq_destroy = mlx5_ibv_qp_destroy,
1227 	.drop_action_create = mlx5_ibv_drop_action_create,
1228 	.drop_action_destroy = mlx5_ibv_drop_action_destroy,
1229 	.txq_obj_new = mlx5_txq_ibv_obj_new,
1230 	.txq_obj_modify = mlx5_ibv_modify_qp,
1231 	.txq_obj_release = mlx5_txq_ibv_obj_release,
1232 	.lb_dummy_queue_create = NULL,
1233 	.lb_dummy_queue_release = NULL,
1234 };
1235