xref: /dpdk/drivers/net/mlx5/linux/mlx5_verbs.c (revision 4c6d80f1c58e82e976ea91eae431f060e56c3365)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2020 Mellanox Technologies, Ltd
3  */
4 
5 #include <stddef.h>
6 #include <errno.h>
7 #include <string.h>
8 #include <stdint.h>
9 #include <unistd.h>
10 #include <inttypes.h>
11 #include <sys/queue.h>
12 
13 #include "mlx5_autoconf.h"
14 
15 #include <rte_mbuf.h>
16 #include <rte_malloc.h>
17 #include <rte_ethdev_driver.h>
18 #include <rte_common.h>
19 
20 #include <mlx5_glue.h>
21 #include <mlx5_common.h>
22 #include <mlx5_common_mr.h>
23 #include <mlx5_rxtx.h>
24 #include <mlx5_verbs.h>
25 #include <mlx5_utils.h>
26 #include <mlx5_malloc.h>
27 
28 /**
29  * Register mr. Given protection domain pointer, pointer to addr and length
30  * register the memory region.
31  *
32  * @param[in] pd
33  *   Pointer to protection domain context.
34  * @param[in] addr
35  *   Pointer to memory start address.
36  * @param[in] length
37  *   Length of the memory to register.
38  * @param[out] pmd_mr
39  *   pmd_mr struct set with lkey, address, length and pointer to mr object
40  *
41  * @return
42  *   0 on successful registration, -1 otherwise
43  */
44 static int
45 mlx5_reg_mr(void *pd, void *addr, size_t length,
46 		 struct mlx5_pmd_mr *pmd_mr)
47 {
48 	return mlx5_common_verbs_reg_mr(pd, addr, length, pmd_mr);
49 }
50 
51 /**
52  * Deregister mr. Given the mlx5 pmd MR - deregister the MR
53  *
54  * @param[in] pmd_mr
55  *   pmd_mr struct set with lkey, address, length and pointer to mr object
56  *
57  */
58 static void
59 mlx5_dereg_mr(struct mlx5_pmd_mr *pmd_mr)
60 {
61 	mlx5_common_verbs_dereg_mr(pmd_mr);
62 }
63 
64 /* verbs operations. */
65 const struct mlx5_verbs_ops mlx5_verbs_ops = {
66 	.reg_mr = mlx5_reg_mr,
67 	.dereg_mr = mlx5_dereg_mr,
68 };
69 
70 /**
71  * Modify Rx WQ vlan stripping offload
72  *
73  * @param rxq_obj
74  *   Rx queue object.
75  *
76  * @return 0 on success, non-0 otherwise
77  */
78 static int
79 mlx5_rxq_obj_modify_wq_vlan_strip(struct mlx5_rxq_obj *rxq_obj, int on)
80 {
81 	uint16_t vlan_offloads =
82 		(on ? IBV_WQ_FLAGS_CVLAN_STRIPPING : 0) |
83 		0;
84 	struct ibv_wq_attr mod;
85 	mod = (struct ibv_wq_attr){
86 		.attr_mask = IBV_WQ_ATTR_FLAGS,
87 		.flags_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING,
88 		.flags = vlan_offloads,
89 	};
90 
91 	return mlx5_glue->modify_wq(rxq_obj->wq, &mod);
92 }
93 
94 /**
95  * Modifies the attributes for the specified WQ.
96  *
97  * @param rxq_obj
98  *   Verbs Rx queue object.
99  * @param type
100  *   Type of change queue state.
101  *
102  * @return
103  *   0 on success, a negative errno value otherwise and rte_errno is set.
104  */
105 static int
106 mlx5_ibv_modify_wq(struct mlx5_rxq_obj *rxq_obj, uint8_t type)
107 {
108 	struct ibv_wq_attr mod = {
109 		.attr_mask = IBV_WQ_ATTR_STATE,
110 		.wq_state = (enum ibv_wq_state)type,
111 	};
112 
113 	return mlx5_glue->modify_wq(rxq_obj->wq, &mod);
114 }
115 
116 /**
117  * Modify QP using Verbs API.
118  *
119  * @param txq_obj
120  *   Verbs Tx queue object.
121  * @param type
122  *   Type of change queue state.
123  * @param dev_port
124  *   IB device port number.
125  *
126  * @return
127  *   0 on success, a negative errno value otherwise and rte_errno is set.
128  */
129 static int
130 mlx5_ibv_modify_qp(struct mlx5_txq_obj *obj, enum mlx5_txq_modify_type type,
131 		   uint8_t dev_port)
132 {
133 	struct ibv_qp_attr mod = {
134 		.qp_state = IBV_QPS_RESET,
135 		.port_num = dev_port,
136 	};
137 	int attr_mask = (IBV_QP_STATE | IBV_QP_PORT);
138 	int ret;
139 
140 	if (type != MLX5_TXQ_MOD_RST2RDY) {
141 		ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);
142 		if (ret) {
143 			DRV_LOG(ERR, "Cannot change Tx QP state to RESET %s",
144 				strerror(errno));
145 			rte_errno = errno;
146 			return ret;
147 		}
148 		if (type == MLX5_TXQ_MOD_RDY2RST)
149 			return 0;
150 	}
151 	if (type == MLX5_TXQ_MOD_ERR2RDY)
152 		attr_mask = IBV_QP_STATE;
153 	mod.qp_state = IBV_QPS_INIT;
154 	ret = mlx5_glue->modify_qp(obj->qp, &mod, attr_mask);
155 	if (ret) {
156 		DRV_LOG(ERR, "Cannot change Tx QP state to INIT %s",
157 			strerror(errno));
158 		rte_errno = errno;
159 		return ret;
160 	}
161 	mod.qp_state = IBV_QPS_RTR;
162 	ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);
163 	if (ret) {
164 		DRV_LOG(ERR, "Cannot change Tx QP state to RTR %s",
165 			strerror(errno));
166 		rte_errno = errno;
167 		return ret;
168 	}
169 	mod.qp_state = IBV_QPS_RTS;
170 	ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);
171 	if (ret) {
172 		DRV_LOG(ERR, "Cannot change Tx QP state to RTS %s",
173 			strerror(errno));
174 		rte_errno = errno;
175 		return ret;
176 	}
177 	return 0;
178 }
179 
180 /**
181  * Create a CQ Verbs object.
182  *
183  * @param dev
184  *   Pointer to Ethernet device.
185  * @param idx
186  *   Queue index in DPDK Rx queue array.
187  *
188  * @return
189  *   The Verbs CQ object initialized, NULL otherwise and rte_errno is set.
190  */
191 static struct ibv_cq *
192 mlx5_rxq_ibv_cq_create(struct rte_eth_dev *dev, uint16_t idx)
193 {
194 	struct mlx5_priv *priv = dev->data->dev_private;
195 	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
196 	struct mlx5_rxq_ctrl *rxq_ctrl =
197 		container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
198 	struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;
199 	unsigned int cqe_n = mlx5_rxq_cqe_num(rxq_data);
200 	struct {
201 		struct ibv_cq_init_attr_ex ibv;
202 		struct mlx5dv_cq_init_attr mlx5;
203 	} cq_attr;
204 
205 	cq_attr.ibv = (struct ibv_cq_init_attr_ex){
206 		.cqe = cqe_n,
207 		.channel = rxq_obj->ibv_channel,
208 		.comp_mask = 0,
209 	};
210 	cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){
211 		.comp_mask = 0,
212 	};
213 	if (priv->config.cqe_comp && !rxq_data->hw_timestamp) {
214 		cq_attr.mlx5.comp_mask |=
215 				MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
216 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
217 		cq_attr.mlx5.cqe_comp_res_format =
218 				mlx5_rxq_mprq_enabled(rxq_data) ?
219 				MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX :
220 				MLX5DV_CQE_RES_FORMAT_HASH;
221 #else
222 		cq_attr.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
223 #endif
224 		/*
225 		 * For vectorized Rx, it must not be doubled in order to
226 		 * make cq_ci and rq_ci aligned.
227 		 */
228 		if (mlx5_rxq_check_vec_support(rxq_data) < 0)
229 			cq_attr.ibv.cqe *= 2;
230 	} else if (priv->config.cqe_comp && rxq_data->hw_timestamp) {
231 		DRV_LOG(DEBUG,
232 			"Port %u Rx CQE compression is disabled for HW"
233 			" timestamp.",
234 			dev->data->port_id);
235 	}
236 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
237 	if (priv->config.cqe_pad) {
238 		cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS;
239 		cq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD;
240 	}
241 #endif
242 	return mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq(priv->sh->ctx,
243 							      &cq_attr.ibv,
244 							      &cq_attr.mlx5));
245 }
246 
247 /**
248  * Create a WQ Verbs object.
249  *
250  * @param dev
251  *   Pointer to Ethernet device.
252  * @param idx
253  *   Queue index in DPDK Rx queue array.
254  *
255  * @return
256  *   The Verbs WQ object initialized, NULL otherwise and rte_errno is set.
257  */
258 static struct ibv_wq *
259 mlx5_rxq_ibv_wq_create(struct rte_eth_dev *dev, uint16_t idx)
260 {
261 	struct mlx5_priv *priv = dev->data->dev_private;
262 	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
263 	struct mlx5_rxq_ctrl *rxq_ctrl =
264 		container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
265 	struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;
266 	unsigned int wqe_n = 1 << rxq_data->elts_n;
267 	struct {
268 		struct ibv_wq_init_attr ibv;
269 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
270 		struct mlx5dv_wq_init_attr mlx5;
271 #endif
272 	} wq_attr;
273 
274 	wq_attr.ibv = (struct ibv_wq_init_attr){
275 		.wq_context = NULL, /* Could be useful in the future. */
276 		.wq_type = IBV_WQT_RQ,
277 		/* Max number of outstanding WRs. */
278 		.max_wr = wqe_n >> rxq_data->sges_n,
279 		/* Max number of scatter/gather elements in a WR. */
280 		.max_sge = 1 << rxq_data->sges_n,
281 		.pd = priv->sh->pd,
282 		.cq = rxq_obj->ibv_cq,
283 		.comp_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING | 0,
284 		.create_flags = (rxq_data->vlan_strip ?
285 				 IBV_WQ_FLAGS_CVLAN_STRIPPING : 0),
286 	};
287 	/* By default, FCS (CRC) is stripped by hardware. */
288 	if (rxq_data->crc_present) {
289 		wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
290 		wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
291 	}
292 	if (priv->config.hw_padding) {
293 #if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING)
294 		wq_attr.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
295 		wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
296 #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING)
297 		wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING;
298 		wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
299 #endif
300 	}
301 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
302 	wq_attr.mlx5 = (struct mlx5dv_wq_init_attr){
303 		.comp_mask = 0,
304 	};
305 	if (mlx5_rxq_mprq_enabled(rxq_data)) {
306 		struct mlx5dv_striding_rq_init_attr *mprq_attr =
307 						&wq_attr.mlx5.striding_rq_attrs;
308 
309 		wq_attr.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ;
310 		*mprq_attr = (struct mlx5dv_striding_rq_init_attr){
311 			.single_stride_log_num_of_bytes = rxq_data->strd_sz_n,
312 			.single_wqe_log_num_of_strides = rxq_data->strd_num_n,
313 			.two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT,
314 		};
315 	}
316 	rxq_obj->wq = mlx5_glue->dv_create_wq(priv->sh->ctx, &wq_attr.ibv,
317 					      &wq_attr.mlx5);
318 #else
319 	rxq_obj->wq = mlx5_glue->create_wq(priv->sh->ctx, &wq_attr.ibv);
320 #endif
321 	if (rxq_obj->wq) {
322 		/*
323 		 * Make sure number of WRs*SGEs match expectations since a queue
324 		 * cannot allocate more than "desc" buffers.
325 		 */
326 		if (wq_attr.ibv.max_wr != (wqe_n >> rxq_data->sges_n) ||
327 		    wq_attr.ibv.max_sge != (1u << rxq_data->sges_n)) {
328 			DRV_LOG(ERR,
329 				"Port %u Rx queue %u requested %u*%u but got"
330 				" %u*%u WRs*SGEs.",
331 				dev->data->port_id, idx,
332 				wqe_n >> rxq_data->sges_n,
333 				(1 << rxq_data->sges_n),
334 				wq_attr.ibv.max_wr, wq_attr.ibv.max_sge);
335 			claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
336 			rxq_obj->wq = NULL;
337 			rte_errno = EINVAL;
338 		}
339 	}
340 	return rxq_obj->wq;
341 }
342 
343 /**
344  * Create the Rx queue Verbs object.
345  *
346  * @param dev
347  *   Pointer to Ethernet device.
348  * @param idx
349  *   Queue index in DPDK Rx queue array.
350  *
351  * @return
352  *   0 on success, a negative errno value otherwise and rte_errno is set.
353  */
354 static int
355 mlx5_rxq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx)
356 {
357 	struct mlx5_priv *priv = dev->data->dev_private;
358 	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
359 	struct mlx5_rxq_ctrl *rxq_ctrl =
360 		container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
361 	struct mlx5_rxq_obj *tmpl = rxq_ctrl->obj;
362 	struct mlx5dv_cq cq_info;
363 	struct mlx5dv_rwq rwq;
364 	int ret = 0;
365 	struct mlx5dv_obj obj;
366 
367 	MLX5_ASSERT(rxq_data);
368 	MLX5_ASSERT(tmpl);
369 	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE;
370 	priv->verbs_alloc_ctx.obj = rxq_ctrl;
371 	tmpl->type = MLX5_RXQ_OBJ_TYPE_IBV;
372 	tmpl->rxq_ctrl = rxq_ctrl;
373 	if (rxq_ctrl->irq) {
374 		tmpl->ibv_channel =
375 				mlx5_glue->create_comp_channel(priv->sh->ctx);
376 		if (!tmpl->ibv_channel) {
377 			DRV_LOG(ERR, "Port %u: comp channel creation failure.",
378 				dev->data->port_id);
379 			rte_errno = ENOMEM;
380 			goto error;
381 		}
382 		tmpl->fd = ((struct ibv_comp_channel *)(tmpl->ibv_channel))->fd;
383 	}
384 	/* Create CQ using Verbs API. */
385 	tmpl->ibv_cq = mlx5_rxq_ibv_cq_create(dev, idx);
386 	if (!tmpl->ibv_cq) {
387 		DRV_LOG(ERR, "Port %u Rx queue %u CQ creation failure.",
388 			dev->data->port_id, idx);
389 		rte_errno = ENOMEM;
390 		goto error;
391 	}
392 	obj.cq.in = tmpl->ibv_cq;
393 	obj.cq.out = &cq_info;
394 	ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ);
395 	if (ret) {
396 		rte_errno = ret;
397 		goto error;
398 	}
399 	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
400 		DRV_LOG(ERR,
401 			"Port %u wrong MLX5_CQE_SIZE environment "
402 			"variable value: it should be set to %u.",
403 			dev->data->port_id, RTE_CACHE_LINE_SIZE);
404 		rte_errno = EINVAL;
405 		goto error;
406 	}
407 	/* Fill the rings. */
408 	rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
409 	rxq_data->cq_db = cq_info.dbrec;
410 	rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
411 	rxq_data->cq_uar = cq_info.cq_uar;
412 	rxq_data->cqn = cq_info.cqn;
413 	/* Create WQ (RQ) using Verbs API. */
414 	tmpl->wq = mlx5_rxq_ibv_wq_create(dev, idx);
415 	if (!tmpl->wq) {
416 		DRV_LOG(ERR, "Port %u Rx queue %u WQ creation failure.",
417 			dev->data->port_id, idx);
418 		rte_errno = ENOMEM;
419 		goto error;
420 	}
421 	/* Change queue state to ready. */
422 	ret = mlx5_ibv_modify_wq(tmpl, IBV_WQS_RDY);
423 	if (ret) {
424 		DRV_LOG(ERR,
425 			"Port %u Rx queue %u WQ state to IBV_WQS_RDY failed.",
426 			dev->data->port_id, idx);
427 		rte_errno = ret;
428 		goto error;
429 	}
430 	obj.rwq.in = tmpl->wq;
431 	obj.rwq.out = &rwq;
432 	ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_RWQ);
433 	if (ret) {
434 		rte_errno = ret;
435 		goto error;
436 	}
437 	rxq_data->wqes = rwq.buf;
438 	rxq_data->rq_db = rwq.dbrec;
439 	rxq_data->cq_arm_sn = 0;
440 	mlx5_rxq_initialize(rxq_data);
441 	rxq_data->cq_ci = 0;
442 	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
443 	dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
444 	rxq_ctrl->wqn = ((struct ibv_wq *)(tmpl->wq))->wq_num;
445 	return 0;
446 error:
447 	ret = rte_errno; /* Save rte_errno before cleanup. */
448 	if (tmpl->wq)
449 		claim_zero(mlx5_glue->destroy_wq(tmpl->wq));
450 	if (tmpl->ibv_cq)
451 		claim_zero(mlx5_glue->destroy_cq(tmpl->ibv_cq));
452 	if (tmpl->ibv_channel)
453 		claim_zero(mlx5_glue->destroy_comp_channel(tmpl->ibv_channel));
454 	rte_errno = ret; /* Restore rte_errno. */
455 	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
456 	return -rte_errno;
457 }
458 
459 /**
460  * Release an Rx verbs queue object.
461  *
462  * @param rxq_obj
463  *   Verbs Rx queue object.
464  */
465 static void
466 mlx5_rxq_ibv_obj_release(struct mlx5_rxq_obj *rxq_obj)
467 {
468 	MLX5_ASSERT(rxq_obj);
469 	MLX5_ASSERT(rxq_obj->wq);
470 	MLX5_ASSERT(rxq_obj->ibv_cq);
471 	claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
472 	claim_zero(mlx5_glue->destroy_cq(rxq_obj->ibv_cq));
473 	if (rxq_obj->ibv_channel)
474 		claim_zero(mlx5_glue->destroy_comp_channel
475 							(rxq_obj->ibv_channel));
476 }
477 
478 /**
479  * Get event for an Rx verbs queue object.
480  *
481  * @param rxq_obj
482  *   Verbs Rx queue object.
483  *
484  * @return
485  *   0 on success, a negative errno value otherwise and rte_errno is set.
486  */
487 static int
488 mlx5_rx_ibv_get_event(struct mlx5_rxq_obj *rxq_obj)
489 {
490 	struct ibv_cq *ev_cq;
491 	void *ev_ctx;
492 	int ret = mlx5_glue->get_cq_event(rxq_obj->ibv_channel,
493 					  &ev_cq, &ev_ctx);
494 
495 	if (ret < 0 || ev_cq != rxq_obj->ibv_cq)
496 		goto exit;
497 	mlx5_glue->ack_cq_events(rxq_obj->ibv_cq, 1);
498 	return 0;
499 exit:
500 	if (ret < 0)
501 		rte_errno = errno;
502 	else
503 		rte_errno = EINVAL;
504 	return -rte_errno;
505 }
506 
507 /**
508  * Creates a receive work queue as a filed of indirection table.
509  *
510  * @param dev
511  *   Pointer to Ethernet device.
512  * @param log_n
513  *   Log of number of queues in the array.
514  * @param ind_tbl
515  *   Verbs indirection table object.
516  *
517  * @return
518  *   0 on success, a negative errno value otherwise and rte_errno is set.
519  */
520 static int
521 mlx5_ibv_ind_table_new(struct rte_eth_dev *dev, const unsigned int log_n,
522 		       struct mlx5_ind_table_obj *ind_tbl)
523 {
524 	struct mlx5_priv *priv = dev->data->dev_private;
525 	struct ibv_wq *wq[1 << log_n];
526 	unsigned int i, j;
527 
528 	MLX5_ASSERT(ind_tbl);
529 	for (i = 0; i != ind_tbl->queues_n; ++i) {
530 		struct mlx5_rxq_data *rxq = (*priv->rxqs)[ind_tbl->queues[i]];
531 		struct mlx5_rxq_ctrl *rxq_ctrl =
532 				container_of(rxq, struct mlx5_rxq_ctrl, rxq);
533 
534 		wq[i] = rxq_ctrl->obj->wq;
535 	}
536 	MLX5_ASSERT(i > 0);
537 	/* Finalise indirection table. */
538 	for (j = 0; i != (unsigned int)(1 << log_n); ++j, ++i)
539 		wq[i] = wq[j];
540 	ind_tbl->ind_table = mlx5_glue->create_rwq_ind_table(priv->sh->ctx,
541 					&(struct ibv_rwq_ind_table_init_attr){
542 						.log_ind_tbl_size = log_n,
543 						.ind_tbl = wq,
544 						.comp_mask = 0,
545 					});
546 	if (!ind_tbl->ind_table) {
547 		rte_errno = errno;
548 		return -rte_errno;
549 	}
550 	return 0;
551 }
552 
553 /**
554  * Destroys the specified Indirection Table.
555  *
556  * @param ind_table
557  *   Indirection table to release.
558  */
559 static void
560 mlx5_ibv_ind_table_destroy(struct mlx5_ind_table_obj *ind_tbl)
561 {
562 	claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl->ind_table));
563 }
564 
565 /**
566  * Create an Rx Hash queue.
567  *
568  * @param dev
569  *   Pointer to Ethernet device.
570  * @param hrxq
571  *   Pointer to Rx Hash queue.
572  * @param tunnel
573  *   Tunnel type.
574  *
575  * @return
576  *   0 on success, a negative errno value otherwise and rte_errno is set.
577  */
578 static int
579 mlx5_ibv_hrxq_new(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq,
580 		  int tunnel __rte_unused)
581 {
582 	struct mlx5_priv *priv = dev->data->dev_private;
583 	struct ibv_qp *qp = NULL;
584 	struct mlx5_ind_table_obj *ind_tbl = hrxq->ind_table;
585 	const uint8_t *rss_key = hrxq->rss_key;
586 	uint64_t hash_fields = hrxq->hash_fields;
587 	int err;
588 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
589 	struct mlx5dv_qp_init_attr qp_init_attr;
590 
591 	memset(&qp_init_attr, 0, sizeof(qp_init_attr));
592 	if (tunnel) {
593 		qp_init_attr.comp_mask =
594 				       MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
595 		qp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS;
596 	}
597 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
598 	if (dev->data->dev_conf.lpbk_mode) {
599 		/* Allow packet sent from NIC loop back w/o source MAC check. */
600 		qp_init_attr.comp_mask |=
601 				MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
602 		qp_init_attr.create_flags |=
603 				MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC;
604 	}
605 #endif
606 	qp = mlx5_glue->dv_create_qp
607 			(priv->sh->ctx,
608 			 &(struct ibv_qp_init_attr_ex){
609 				.qp_type = IBV_QPT_RAW_PACKET,
610 				.comp_mask =
611 					IBV_QP_INIT_ATTR_PD |
612 					IBV_QP_INIT_ATTR_IND_TABLE |
613 					IBV_QP_INIT_ATTR_RX_HASH,
614 				.rx_hash_conf = (struct ibv_rx_hash_conf){
615 					.rx_hash_function =
616 						IBV_RX_HASH_FUNC_TOEPLITZ,
617 					.rx_hash_key_len = hrxq->rss_key_len,
618 					.rx_hash_key =
619 						(void *)(uintptr_t)rss_key,
620 					.rx_hash_fields_mask = hash_fields,
621 				},
622 				.rwq_ind_tbl = ind_tbl->ind_table,
623 				.pd = priv->sh->pd,
624 			  },
625 			  &qp_init_attr);
626 #else
627 	qp = mlx5_glue->create_qp_ex
628 			(priv->sh->ctx,
629 			 &(struct ibv_qp_init_attr_ex){
630 				.qp_type = IBV_QPT_RAW_PACKET,
631 				.comp_mask =
632 					IBV_QP_INIT_ATTR_PD |
633 					IBV_QP_INIT_ATTR_IND_TABLE |
634 					IBV_QP_INIT_ATTR_RX_HASH,
635 				.rx_hash_conf = (struct ibv_rx_hash_conf){
636 					.rx_hash_function =
637 						IBV_RX_HASH_FUNC_TOEPLITZ,
638 					.rx_hash_key_len = hrxq->rss_key_len,
639 					.rx_hash_key =
640 						(void *)(uintptr_t)rss_key,
641 					.rx_hash_fields_mask = hash_fields,
642 				},
643 				.rwq_ind_tbl = ind_tbl->ind_table,
644 				.pd = priv->sh->pd,
645 			 });
646 #endif
647 	if (!qp) {
648 		rte_errno = errno;
649 		goto error;
650 	}
651 	hrxq->qp = qp;
652 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
653 	hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);
654 	if (!hrxq->action) {
655 		rte_errno = errno;
656 		goto error;
657 	}
658 #endif
659 	return 0;
660 error:
661 	err = rte_errno; /* Save rte_errno before cleanup. */
662 	if (qp)
663 		claim_zero(mlx5_glue->destroy_qp(qp));
664 	rte_errno = err; /* Restore rte_errno. */
665 	return -rte_errno;
666 }
667 
668 /**
669  * Destroy a Verbs queue pair.
670  *
671  * @param hrxq
672  *   Hash Rx queue to release its qp.
673  */
674 static void
675 mlx5_ibv_qp_destroy(struct mlx5_hrxq *hrxq)
676 {
677 	claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
678 }
679 
680 /**
681  * Release a drop Rx queue Verbs object.
682  *
683  * @param dev
684  *   Pointer to Ethernet device.
685  */
686 static void
687 mlx5_rxq_ibv_obj_drop_release(struct rte_eth_dev *dev)
688 {
689 	struct mlx5_priv *priv = dev->data->dev_private;
690 	struct mlx5_rxq_obj *rxq = priv->drop_queue.rxq;
691 
692 	if (rxq->wq)
693 		claim_zero(mlx5_glue->destroy_wq(rxq->wq));
694 	if (rxq->ibv_cq)
695 		claim_zero(mlx5_glue->destroy_cq(rxq->ibv_cq));
696 	mlx5_free(rxq);
697 	priv->drop_queue.rxq = NULL;
698 }
699 
700 /**
701  * Create a drop Rx queue Verbs object.
702  *
703  * @param dev
704  *   Pointer to Ethernet device.
705  *
706  * @return
707  *   0 on success, a negative errno value otherwise and rte_errno is set.
708  */
709 static int
710 mlx5_rxq_ibv_obj_drop_create(struct rte_eth_dev *dev)
711 {
712 	struct mlx5_priv *priv = dev->data->dev_private;
713 	struct ibv_context *ctx = priv->sh->ctx;
714 	struct mlx5_rxq_obj *rxq = priv->drop_queue.rxq;
715 
716 	if (rxq)
717 		return 0;
718 	rxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq), 0, SOCKET_ID_ANY);
719 	if (!rxq) {
720 		DEBUG("Port %u cannot allocate drop Rx queue memory.",
721 		      dev->data->port_id);
722 		rte_errno = ENOMEM;
723 		return -rte_errno;
724 	}
725 	priv->drop_queue.rxq = rxq;
726 	rxq->ibv_cq = mlx5_glue->create_cq(ctx, 1, NULL, NULL, 0);
727 	if (!rxq->ibv_cq) {
728 		DEBUG("Port %u cannot allocate CQ for drop queue.",
729 		      dev->data->port_id);
730 		rte_errno = errno;
731 		goto error;
732 	}
733 	rxq->wq = mlx5_glue->create_wq(ctx, &(struct ibv_wq_init_attr){
734 						    .wq_type = IBV_WQT_RQ,
735 						    .max_wr = 1,
736 						    .max_sge = 1,
737 						    .pd = priv->sh->pd,
738 						    .cq = rxq->ibv_cq,
739 					      });
740 	if (!rxq->wq) {
741 		DEBUG("Port %u cannot allocate WQ for drop queue.",
742 		      dev->data->port_id);
743 		rte_errno = errno;
744 		goto error;
745 	}
746 	priv->drop_queue.rxq = rxq;
747 	return 0;
748 error:
749 	mlx5_rxq_ibv_obj_drop_release(dev);
750 	return -rte_errno;
751 }
752 
753 /**
754  * Create a Verbs drop action for Rx Hash queue.
755  *
756  * @param dev
757  *   Pointer to Ethernet device.
758  *
759  * @return
760  *   0 on success, a negative errno value otherwise and rte_errno is set.
761  */
762 static int
763 mlx5_ibv_drop_action_create(struct rte_eth_dev *dev)
764 {
765 	struct mlx5_priv *priv = dev->data->dev_private;
766 	struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq;
767 	struct ibv_rwq_ind_table *ind_tbl = NULL;
768 	struct mlx5_rxq_obj *rxq;
769 	int ret;
770 
771 	MLX5_ASSERT(hrxq && hrxq->ind_table);
772 	ret = mlx5_rxq_ibv_obj_drop_create(dev);
773 	if (ret < 0)
774 		goto error;
775 	rxq = priv->drop_queue.rxq;
776 	ind_tbl = mlx5_glue->create_rwq_ind_table
777 				(priv->sh->ctx,
778 				 &(struct ibv_rwq_ind_table_init_attr){
779 					.log_ind_tbl_size = 0,
780 					.ind_tbl = (struct ibv_wq **)&rxq->wq,
781 					.comp_mask = 0,
782 				 });
783 	if (!ind_tbl) {
784 		DEBUG("Port %u cannot allocate indirection table for drop"
785 		      " queue.", dev->data->port_id);
786 		rte_errno = errno;
787 		goto error;
788 	}
789 	hrxq->qp = mlx5_glue->create_qp_ex(priv->sh->ctx,
790 		 &(struct ibv_qp_init_attr_ex){
791 			.qp_type = IBV_QPT_RAW_PACKET,
792 			.comp_mask = IBV_QP_INIT_ATTR_PD |
793 				     IBV_QP_INIT_ATTR_IND_TABLE |
794 				     IBV_QP_INIT_ATTR_RX_HASH,
795 			.rx_hash_conf = (struct ibv_rx_hash_conf){
796 				.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
797 				.rx_hash_key_len = MLX5_RSS_HASH_KEY_LEN,
798 				.rx_hash_key = rss_hash_default_key,
799 				.rx_hash_fields_mask = 0,
800 				},
801 			.rwq_ind_tbl = ind_tbl,
802 			.pd = priv->sh->pd
803 		 });
804 	if (!hrxq->qp) {
805 		DEBUG("Port %u cannot allocate QP for drop queue.",
806 		      dev->data->port_id);
807 		rte_errno = errno;
808 		goto error;
809 	}
810 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
811 	hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);
812 	if (!hrxq->action) {
813 		rte_errno = errno;
814 		goto error;
815 	}
816 #endif
817 	hrxq->ind_table->ind_table = ind_tbl;
818 	return 0;
819 error:
820 	if (hrxq->qp)
821 		claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
822 	if (ind_tbl)
823 		claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl));
824 	if (priv->drop_queue.rxq)
825 		mlx5_rxq_ibv_obj_drop_release(dev);
826 	return -rte_errno;
827 }
828 
829 /**
830  * Release a drop hash Rx queue.
831  *
832  * @param dev
833  *   Pointer to Ethernet device.
834  */
835 static void
836 mlx5_ibv_drop_action_destroy(struct rte_eth_dev *dev)
837 {
838 	struct mlx5_priv *priv = dev->data->dev_private;
839 	struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq;
840 	struct ibv_rwq_ind_table *ind_tbl = hrxq->ind_table->ind_table;
841 
842 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
843 	claim_zero(mlx5_glue->destroy_flow_action(hrxq->action));
844 #endif
845 	claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
846 	claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl));
847 	mlx5_rxq_ibv_obj_drop_release(dev);
848 }
849 
850 /**
851  * Create a QP Verbs object.
852  *
853  * @param dev
854  *   Pointer to Ethernet device.
855  * @param idx
856  *   Queue index in DPDK Tx queue array.
857  *
858  * @return
859  *   The QP Verbs object, NULL otherwise and rte_errno is set.
860  */
861 static struct ibv_qp *
862 mlx5_txq_ibv_qp_create(struct rte_eth_dev *dev, uint16_t idx)
863 {
864 	struct mlx5_priv *priv = dev->data->dev_private;
865 	struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
866 	struct mlx5_txq_ctrl *txq_ctrl =
867 			container_of(txq_data, struct mlx5_txq_ctrl, txq);
868 	struct ibv_qp *qp_obj = NULL;
869 	struct ibv_qp_init_attr_ex qp_attr = { 0 };
870 	const int desc = 1 << txq_data->elts_n;
871 
872 	MLX5_ASSERT(txq_ctrl->obj->cq);
873 	/* CQ to be associated with the send queue. */
874 	qp_attr.send_cq = txq_ctrl->obj->cq;
875 	/* CQ to be associated with the receive queue. */
876 	qp_attr.recv_cq = txq_ctrl->obj->cq;
877 	/* Max number of outstanding WRs. */
878 	qp_attr.cap.max_send_wr = ((priv->sh->device_attr.max_qp_wr < desc) ?
879 				   priv->sh->device_attr.max_qp_wr : desc);
880 	/*
881 	 * Max number of scatter/gather elements in a WR, must be 1 to prevent
882 	 * libmlx5 from trying to affect must be 1 to prevent libmlx5 from
883 	 * trying to affect too much memory. TX gather is not impacted by the
884 	 * device_attr.max_sge limit and will still work properly.
885 	 */
886 	qp_attr.cap.max_send_sge = 1;
887 	qp_attr.qp_type = IBV_QPT_RAW_PACKET,
888 	/* Do *NOT* enable this, completions events are managed per Tx burst. */
889 	qp_attr.sq_sig_all = 0;
890 	qp_attr.pd = priv->sh->pd;
891 	qp_attr.comp_mask = IBV_QP_INIT_ATTR_PD;
892 	if (txq_data->inlen_send)
893 		qp_attr.cap.max_inline_data = txq_ctrl->max_inline_data;
894 	if (txq_data->tso_en) {
895 		qp_attr.max_tso_header = txq_ctrl->max_tso_header;
896 		qp_attr.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER;
897 	}
898 	qp_obj = mlx5_glue->create_qp_ex(priv->sh->ctx, &qp_attr);
899 	if (qp_obj == NULL) {
900 		DRV_LOG(ERR, "Port %u Tx queue %u QP creation failure.",
901 			dev->data->port_id, idx);
902 		rte_errno = errno;
903 	}
904 	return qp_obj;
905 }
906 
907 /**
908  * Create the Tx queue Verbs object.
909  *
910  * @param dev
911  *   Pointer to Ethernet device.
912  * @param idx
913  *   Queue index in DPDK Tx queue array.
914  *
915  * @return
916  *   0 on success, a negative errno value otherwise and rte_errno is set.
917  */
918 int
919 mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx)
920 {
921 	struct mlx5_priv *priv = dev->data->dev_private;
922 	struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
923 	struct mlx5_txq_ctrl *txq_ctrl =
924 		container_of(txq_data, struct mlx5_txq_ctrl, txq);
925 	struct mlx5_txq_obj *txq_obj = txq_ctrl->obj;
926 	unsigned int cqe_n;
927 	struct mlx5dv_qp qp;
928 	struct mlx5dv_cq cq_info;
929 	struct mlx5dv_obj obj;
930 	const int desc = 1 << txq_data->elts_n;
931 	int ret = 0;
932 
933 	MLX5_ASSERT(txq_data);
934 	MLX5_ASSERT(txq_obj);
935 	txq_obj->txq_ctrl = txq_ctrl;
936 	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_TX_QUEUE;
937 	priv->verbs_alloc_ctx.obj = txq_ctrl;
938 	if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
939 		DRV_LOG(ERR, "Port %u MLX5_ENABLE_CQE_COMPRESSION "
940 			"must never be set.", dev->data->port_id);
941 		rte_errno = EINVAL;
942 		return -rte_errno;
943 	}
944 	cqe_n = desc / MLX5_TX_COMP_THRESH +
945 		1 + MLX5_TX_COMP_THRESH_INLINE_DIV;
946 	txq_obj->cq = mlx5_glue->create_cq(priv->sh->ctx, cqe_n, NULL, NULL, 0);
947 	if (txq_obj->cq == NULL) {
948 		DRV_LOG(ERR, "Port %u Tx queue %u CQ creation failure.",
949 			dev->data->port_id, idx);
950 		rte_errno = errno;
951 		goto error;
952 	}
953 	txq_obj->qp = mlx5_txq_ibv_qp_create(dev, idx);
954 	if (txq_obj->qp == NULL) {
955 		rte_errno = errno;
956 		goto error;
957 	}
958 	ret = mlx5_ibv_modify_qp(txq_obj, MLX5_TXQ_MOD_RST2RDY,
959 				 (uint8_t)priv->dev_port);
960 	if (ret) {
961 		DRV_LOG(ERR, "Port %u Tx queue %u QP state modifying failed.",
962 			dev->data->port_id, idx);
963 		rte_errno = errno;
964 		goto error;
965 	}
966 	qp.comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET;
967 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
968 	/* If using DevX, need additional mask to read tisn value. */
969 	if (priv->sh->devx && !priv->sh->tdn)
970 		qp.comp_mask |= MLX5DV_QP_MASK_RAW_QP_HANDLES;
971 #endif
972 	obj.cq.in = txq_obj->cq;
973 	obj.cq.out = &cq_info;
974 	obj.qp.in = txq_obj->qp;
975 	obj.qp.out = &qp;
976 	ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP);
977 	if (ret != 0) {
978 		rte_errno = errno;
979 		goto error;
980 	}
981 	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
982 		DRV_LOG(ERR,
983 			"Port %u wrong MLX5_CQE_SIZE environment variable"
984 			" value: it should be set to %u.",
985 			dev->data->port_id, RTE_CACHE_LINE_SIZE);
986 		rte_errno = EINVAL;
987 		goto error;
988 	}
989 	txq_data->cqe_n = log2above(cq_info.cqe_cnt);
990 	txq_data->cqe_s = 1 << txq_data->cqe_n;
991 	txq_data->cqe_m = txq_data->cqe_s - 1;
992 	txq_data->qp_num_8s = ((struct ibv_qp *)txq_obj->qp)->qp_num << 8;
993 	txq_data->wqes = qp.sq.buf;
994 	txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
995 	txq_data->wqe_s = 1 << txq_data->wqe_n;
996 	txq_data->wqe_m = txq_data->wqe_s - 1;
997 	txq_data->wqes_end = txq_data->wqes + txq_data->wqe_s;
998 	txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
999 	txq_data->cq_db = cq_info.dbrec;
1000 	txq_data->cqes = (volatile struct mlx5_cqe *)cq_info.buf;
1001 	txq_data->cq_ci = 0;
1002 	txq_data->cq_pi = 0;
1003 	txq_data->wqe_ci = 0;
1004 	txq_data->wqe_pi = 0;
1005 	txq_data->wqe_comp = 0;
1006 	txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV;
1007 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
1008 	/*
1009 	 * If using DevX need to query and store TIS transport domain value.
1010 	 * This is done once per port.
1011 	 * Will use this value on Rx, when creating matching TIR.
1012 	 */
1013 	if (priv->sh->devx && !priv->sh->tdn) {
1014 		ret = mlx5_devx_cmd_qp_query_tis_td(txq_obj->qp, qp.tisn,
1015 						    &priv->sh->tdn);
1016 		if (ret) {
1017 			DRV_LOG(ERR, "Fail to query port %u Tx queue %u QP TIS "
1018 				"transport domain.", dev->data->port_id, idx);
1019 			rte_errno = EINVAL;
1020 			goto error;
1021 		} else {
1022 			DRV_LOG(DEBUG, "Port %u Tx queue %u TIS number %d "
1023 				"transport domain %d.", dev->data->port_id,
1024 				idx, qp.tisn, priv->sh->tdn);
1025 		}
1026 	}
1027 #endif
1028 	txq_ctrl->bf_reg = qp.bf.reg;
1029 	if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
1030 		txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
1031 		DRV_LOG(DEBUG, "Port %u: uar_mmap_offset 0x%" PRIx64 ".",
1032 			dev->data->port_id, txq_ctrl->uar_mmap_offset);
1033 	} else {
1034 		DRV_LOG(ERR,
1035 			"Port %u failed to retrieve UAR info, invalid"
1036 			" libmlx5.so",
1037 			dev->data->port_id);
1038 		rte_errno = EINVAL;
1039 		goto error;
1040 	}
1041 	txq_uar_init(txq_ctrl);
1042 	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
1043 	return 0;
1044 error:
1045 	ret = rte_errno; /* Save rte_errno before cleanup. */
1046 	if (txq_obj->cq)
1047 		claim_zero(mlx5_glue->destroy_cq(txq_obj->cq));
1048 	if (txq_obj->qp)
1049 		claim_zero(mlx5_glue->destroy_qp(txq_obj->qp));
1050 	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
1051 	rte_errno = ret; /* Restore rte_errno. */
1052 	return -rte_errno;
1053 }
1054 
1055 /**
1056  * Release an Tx verbs queue object.
1057  *
1058  * @param txq_obj
1059  *   Verbs Tx queue object..
1060  */
1061 void
1062 mlx5_txq_ibv_obj_release(struct mlx5_txq_obj *txq_obj)
1063 {
1064 	MLX5_ASSERT(txq_obj);
1065 	claim_zero(mlx5_glue->destroy_qp(txq_obj->qp));
1066 	claim_zero(mlx5_glue->destroy_cq(txq_obj->cq));
1067 }
1068 
1069 struct mlx5_obj_ops ibv_obj_ops = {
1070 	.rxq_obj_modify_vlan_strip = mlx5_rxq_obj_modify_wq_vlan_strip,
1071 	.rxq_obj_new = mlx5_rxq_ibv_obj_new,
1072 	.rxq_event_get = mlx5_rx_ibv_get_event,
1073 	.rxq_obj_modify = mlx5_ibv_modify_wq,
1074 	.rxq_obj_release = mlx5_rxq_ibv_obj_release,
1075 	.ind_table_new = mlx5_ibv_ind_table_new,
1076 	.ind_table_destroy = mlx5_ibv_ind_table_destroy,
1077 	.hrxq_new = mlx5_ibv_hrxq_new,
1078 	.hrxq_destroy = mlx5_ibv_qp_destroy,
1079 	.drop_action_create = mlx5_ibv_drop_action_create,
1080 	.drop_action_destroy = mlx5_ibv_drop_action_destroy,
1081 	.txq_obj_new = mlx5_txq_ibv_obj_new,
1082 	.txq_obj_modify = mlx5_ibv_modify_qp,
1083 	.txq_obj_release = mlx5_txq_ibv_obj_release,
1084 };
1085