xref: /dpdk/drivers/net/mlx5/linux/mlx5_verbs.c (revision 6deb19e1b2d24ff95413d30147678d898283c67e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2020 Mellanox Technologies, Ltd
3  */
4 
5 #include <stddef.h>
6 #include <errno.h>
7 #include <string.h>
8 #include <stdint.h>
9 #include <unistd.h>
10 #include <inttypes.h>
11 #include <sys/queue.h>
12 
13 #include "mlx5_autoconf.h"
14 
15 #include <rte_mbuf.h>
16 #include <rte_malloc.h>
17 #include <rte_ethdev_driver.h>
18 #include <rte_common.h>
19 
20 #include <mlx5_glue.h>
21 #include <mlx5_common.h>
22 #include <mlx5_common_mr.h>
23 #include <mlx5_rxtx.h>
24 #include <mlx5_verbs.h>
25 #include <mlx5_utils.h>
26 #include <mlx5_malloc.h>
27 
28 /**
29  * Register mr. Given protection domain pointer, pointer to addr and length
30  * register the memory region.
31  *
32  * @param[in] pd
33  *   Pointer to protection domain context.
34  * @param[in] addr
35  *   Pointer to memory start address.
36  * @param[in] length
37  *   Length of the memory to register.
38  * @param[out] pmd_mr
39  *   pmd_mr struct set with lkey, address, length and pointer to mr object
40  *
41  * @return
42  *   0 on successful registration, -1 otherwise
43  */
44 static int
45 mlx5_reg_mr(void *pd, void *addr, size_t length,
46 		 struct mlx5_pmd_mr *pmd_mr)
47 {
48 	return mlx5_common_verbs_reg_mr(pd, addr, length, pmd_mr);
49 }
50 
51 /**
52  * Deregister mr. Given the mlx5 pmd MR - deregister the MR
53  *
54  * @param[in] pmd_mr
55  *   pmd_mr struct set with lkey, address, length and pointer to mr object
56  *
57  */
58 static void
59 mlx5_dereg_mr(struct mlx5_pmd_mr *pmd_mr)
60 {
61 	mlx5_common_verbs_dereg_mr(pmd_mr);
62 }
63 
64 /* verbs operations. */
65 const struct mlx5_verbs_ops mlx5_verbs_ops = {
66 	.reg_mr = mlx5_reg_mr,
67 	.dereg_mr = mlx5_dereg_mr,
68 };
69 
70 /**
71  * Modify Rx WQ vlan stripping offload
72  *
73  * @param rxq_obj
74  *   Rx queue object.
75  *
76  * @return 0 on success, non-0 otherwise
77  */
78 static int
79 mlx5_rxq_obj_modify_wq_vlan_strip(struct mlx5_rxq_obj *rxq_obj, int on)
80 {
81 	uint16_t vlan_offloads =
82 		(on ? IBV_WQ_FLAGS_CVLAN_STRIPPING : 0) |
83 		0;
84 	struct ibv_wq_attr mod;
85 	mod = (struct ibv_wq_attr){
86 		.attr_mask = IBV_WQ_ATTR_FLAGS,
87 		.flags_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING,
88 		.flags = vlan_offloads,
89 	};
90 	return mlx5_glue->modify_wq(rxq_obj->wq, &mod);
91 }
92 
93 /**
94  * Create a CQ Verbs object.
95  *
96  * @param dev
97  *   Pointer to Ethernet device.
98  * @param priv
99  *   Pointer to device private data.
100  * @param rxq_data
101  *   Pointer to Rx queue data.
102  * @param cqe_n
103  *   Number of CQEs in CQ.
104  * @param rxq_obj
105  *   Pointer to Rx queue object data.
106  *
107  * @return
108  *   The Verbs object initialized, NULL otherwise and rte_errno is set.
109  */
110 static struct ibv_cq *
111 mlx5_ibv_cq_new(struct rte_eth_dev *dev, struct mlx5_priv *priv,
112 		struct mlx5_rxq_data *rxq_data,
113 		unsigned int cqe_n, struct mlx5_rxq_obj *rxq_obj)
114 {
115 	struct {
116 		struct ibv_cq_init_attr_ex ibv;
117 		struct mlx5dv_cq_init_attr mlx5;
118 	} cq_attr;
119 
120 	cq_attr.ibv = (struct ibv_cq_init_attr_ex){
121 		.cqe = cqe_n,
122 		.channel = rxq_obj->ibv_channel,
123 		.comp_mask = 0,
124 	};
125 	cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){
126 		.comp_mask = 0,
127 	};
128 	if (priv->config.cqe_comp && !rxq_data->hw_timestamp) {
129 		cq_attr.mlx5.comp_mask |=
130 				MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
131 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
132 		cq_attr.mlx5.cqe_comp_res_format =
133 				mlx5_rxq_mprq_enabled(rxq_data) ?
134 				MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX :
135 				MLX5DV_CQE_RES_FORMAT_HASH;
136 #else
137 		cq_attr.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
138 #endif
139 		/*
140 		 * For vectorized Rx, it must not be doubled in order to
141 		 * make cq_ci and rq_ci aligned.
142 		 */
143 		if (mlx5_rxq_check_vec_support(rxq_data) < 0)
144 			cq_attr.ibv.cqe *= 2;
145 	} else if (priv->config.cqe_comp && rxq_data->hw_timestamp) {
146 		DRV_LOG(DEBUG,
147 			"Port %u Rx CQE compression is disabled for HW"
148 			" timestamp.",
149 			dev->data->port_id);
150 	}
151 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
152 	if (priv->config.cqe_pad) {
153 		cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS;
154 		cq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD;
155 	}
156 #endif
157 	return mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq(priv->sh->ctx,
158 							      &cq_attr.ibv,
159 							      &cq_attr.mlx5));
160 }
161 
162 /**
163  * Create a WQ Verbs object.
164  *
165  * @param dev
166  *   Pointer to Ethernet device.
167  * @param priv
168  *   Pointer to device private data.
169  * @param rxq_data
170  *   Pointer to Rx queue data.
171  * @param idx
172  *   Queue index in DPDK Rx queue array.
173  * @param wqe_n
174  *   Number of WQEs in WQ.
175  * @param rxq_obj
176  *   Pointer to Rx queue object data.
177  *
178  * @return
179  *   The Verbs object initialized, NULL otherwise and rte_errno is set.
180  */
181 static struct ibv_wq *
182 mlx5_ibv_wq_new(struct rte_eth_dev *dev, struct mlx5_priv *priv,
183 		struct mlx5_rxq_data *rxq_data, uint16_t idx,
184 		unsigned int wqe_n, struct mlx5_rxq_obj *rxq_obj)
185 {
186 	struct {
187 		struct ibv_wq_init_attr ibv;
188 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
189 		struct mlx5dv_wq_init_attr mlx5;
190 #endif
191 	} wq_attr;
192 
193 	wq_attr.ibv = (struct ibv_wq_init_attr){
194 		.wq_context = NULL, /* Could be useful in the future. */
195 		.wq_type = IBV_WQT_RQ,
196 		/* Max number of outstanding WRs. */
197 		.max_wr = wqe_n >> rxq_data->sges_n,
198 		/* Max number of scatter/gather elements in a WR. */
199 		.max_sge = 1 << rxq_data->sges_n,
200 		.pd = priv->sh->pd,
201 		.cq = rxq_obj->ibv_cq,
202 		.comp_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING | 0,
203 		.create_flags = (rxq_data->vlan_strip ?
204 				 IBV_WQ_FLAGS_CVLAN_STRIPPING : 0),
205 	};
206 	/* By default, FCS (CRC) is stripped by hardware. */
207 	if (rxq_data->crc_present) {
208 		wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
209 		wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
210 	}
211 	if (priv->config.hw_padding) {
212 #if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING)
213 		wq_attr.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
214 		wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
215 #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING)
216 		wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING;
217 		wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
218 #endif
219 	}
220 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
221 	wq_attr.mlx5 = (struct mlx5dv_wq_init_attr){
222 		.comp_mask = 0,
223 	};
224 	if (mlx5_rxq_mprq_enabled(rxq_data)) {
225 		struct mlx5dv_striding_rq_init_attr *mprq_attr =
226 						&wq_attr.mlx5.striding_rq_attrs;
227 
228 		wq_attr.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ;
229 		*mprq_attr = (struct mlx5dv_striding_rq_init_attr){
230 			.single_stride_log_num_of_bytes = rxq_data->strd_sz_n,
231 			.single_wqe_log_num_of_strides = rxq_data->strd_num_n,
232 			.two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT,
233 		};
234 	}
235 	rxq_obj->wq = mlx5_glue->dv_create_wq(priv->sh->ctx, &wq_attr.ibv,
236 					      &wq_attr.mlx5);
237 #else
238 	rxq_obj->wq = mlx5_glue->create_wq(priv->sh->ctx, &wq_attr.ibv);
239 #endif
240 	if (rxq_obj->wq) {
241 		/*
242 		 * Make sure number of WRs*SGEs match expectations since a queue
243 		 * cannot allocate more than "desc" buffers.
244 		 */
245 		if (wq_attr.ibv.max_wr != (wqe_n >> rxq_data->sges_n) ||
246 		    wq_attr.ibv.max_sge != (1u << rxq_data->sges_n)) {
247 			DRV_LOG(ERR,
248 				"Port %u Rx queue %u requested %u*%u but got"
249 				" %u*%u WRs*SGEs.",
250 				dev->data->port_id, idx,
251 				wqe_n >> rxq_data->sges_n,
252 				(1 << rxq_data->sges_n),
253 				wq_attr.ibv.max_wr, wq_attr.ibv.max_sge);
254 			claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
255 			rxq_obj->wq = NULL;
256 			rte_errno = EINVAL;
257 		}
258 	}
259 	return rxq_obj->wq;
260 }
261 
262 /**
263  * Create the Rx queue Verbs object.
264  *
265  * @param dev
266  *   Pointer to Ethernet device.
267  * @param idx
268  *   Queue index in DPDK Rx queue array.
269  *
270  * @return
271  *   The Verbs object initialized, NULL otherwise and rte_errno is set.
272  */
273 static struct mlx5_rxq_obj *
274 mlx5_rxq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx)
275 {
276 	struct mlx5_priv *priv = dev->data->dev_private;
277 	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
278 	struct mlx5_rxq_ctrl *rxq_ctrl =
279 		container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
280 	struct ibv_wq_attr mod;
281 	unsigned int cqe_n;
282 	unsigned int wqe_n = 1 << rxq_data->elts_n;
283 	struct mlx5_rxq_obj *tmpl = NULL;
284 	struct mlx5dv_cq cq_info;
285 	struct mlx5dv_rwq rwq;
286 	int ret = 0;
287 	struct mlx5dv_obj obj;
288 
289 	MLX5_ASSERT(rxq_data);
290 	MLX5_ASSERT(!rxq_ctrl->obj);
291 	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE;
292 	priv->verbs_alloc_ctx.obj = rxq_ctrl;
293 	tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl), 0,
294 			   rxq_ctrl->socket);
295 	if (!tmpl) {
296 		DRV_LOG(ERR, "port %u Rx queue %u cannot allocate resources",
297 			dev->data->port_id, rxq_data->idx);
298 		rte_errno = ENOMEM;
299 		goto error;
300 	}
301 	tmpl->type = MLX5_RXQ_OBJ_TYPE_IBV;
302 	tmpl->rxq_ctrl = rxq_ctrl;
303 	if (rxq_ctrl->irq) {
304 		tmpl->ibv_channel =
305 				mlx5_glue->create_comp_channel(priv->sh->ctx);
306 		if (!tmpl->ibv_channel) {
307 			DRV_LOG(ERR, "Port %u: comp channel creation failure.",
308 				dev->data->port_id);
309 			rte_errno = ENOMEM;
310 			goto error;
311 		}
312 		tmpl->fd = ((struct ibv_comp_channel *)(tmpl->ibv_channel))->fd;
313 	}
314 	if (mlx5_rxq_mprq_enabled(rxq_data))
315 		cqe_n = wqe_n * (1 << rxq_data->strd_num_n) - 1;
316 	else
317 		cqe_n = wqe_n - 1;
318 	DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d",
319 		dev->data->port_id, priv->sh->device_attr.max_qp_wr);
320 	DRV_LOG(DEBUG, "port %u device_attr.max_sge is %d",
321 		dev->data->port_id, priv->sh->device_attr.max_sge);
322 	/* Create CQ using Verbs API. */
323 	tmpl->ibv_cq = mlx5_ibv_cq_new(dev, priv, rxq_data, cqe_n, tmpl);
324 	if (!tmpl->ibv_cq) {
325 		DRV_LOG(ERR, "Port %u Rx queue %u CQ creation failure.",
326 			dev->data->port_id, idx);
327 		rte_errno = ENOMEM;
328 		goto error;
329 	}
330 	obj.cq.in = tmpl->ibv_cq;
331 	obj.cq.out = &cq_info;
332 	ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ);
333 	if (ret) {
334 		rte_errno = ret;
335 		goto error;
336 	}
337 	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
338 		DRV_LOG(ERR,
339 			"Port %u wrong MLX5_CQE_SIZE environment "
340 			"variable value: it should be set to %u.",
341 			dev->data->port_id, RTE_CACHE_LINE_SIZE);
342 		rte_errno = EINVAL;
343 		goto error;
344 	}
345 	/* Fill the rings. */
346 	rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
347 	rxq_data->cq_db = cq_info.dbrec;
348 	rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
349 	rxq_data->cq_uar = cq_info.cq_uar;
350 	rxq_data->cqn = cq_info.cqn;
351 	/* Create WQ (RQ) using Verbs API. */
352 	tmpl->wq = mlx5_ibv_wq_new(dev, priv, rxq_data, idx, wqe_n, tmpl);
353 	if (!tmpl->wq) {
354 		DRV_LOG(ERR, "Port %u Rx queue %u WQ creation failure.",
355 			dev->data->port_id, idx);
356 		rte_errno = ENOMEM;
357 		goto error;
358 	}
359 	/* Change queue state to ready. */
360 	mod = (struct ibv_wq_attr){
361 		.attr_mask = IBV_WQ_ATTR_STATE,
362 		.wq_state = IBV_WQS_RDY,
363 	};
364 	ret = mlx5_glue->modify_wq(tmpl->wq, &mod);
365 	if (ret) {
366 		DRV_LOG(ERR,
367 			"Port %u Rx queue %u WQ state to IBV_WQS_RDY failed.",
368 			dev->data->port_id, idx);
369 		rte_errno = ret;
370 		goto error;
371 	}
372 	obj.rwq.in = tmpl->wq;
373 	obj.rwq.out = &rwq;
374 	ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_RWQ);
375 	if (ret) {
376 		rte_errno = ret;
377 		goto error;
378 	}
379 	rxq_data->wqes = rwq.buf;
380 	rxq_data->rq_db = rwq.dbrec;
381 	rxq_data->cq_arm_sn = 0;
382 	mlx5_rxq_initialize(rxq_data);
383 	rxq_data->cq_ci = 0;
384 	DRV_LOG(DEBUG, "port %u rxq %u updated with %p", dev->data->port_id,
385 		idx, (void *)&tmpl);
386 	LIST_INSERT_HEAD(&priv->rxqsobj, tmpl, next);
387 	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
388 	dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
389 	rxq_ctrl->wqn = ((struct ibv_wq *)(tmpl->wq))->wq_num;
390 	return tmpl;
391 error:
392 	if (tmpl) {
393 		ret = rte_errno; /* Save rte_errno before cleanup. */
394 		if (tmpl->wq)
395 			claim_zero(mlx5_glue->destroy_wq(tmpl->wq));
396 		if (tmpl->ibv_cq)
397 			claim_zero(mlx5_glue->destroy_cq(tmpl->ibv_cq));
398 		if (tmpl->ibv_channel)
399 			claim_zero(mlx5_glue->destroy_comp_channel
400 							(tmpl->ibv_channel));
401 		mlx5_free(tmpl);
402 		rte_errno = ret; /* Restore rte_errno. */
403 	}
404 	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
405 	return NULL;
406 }
407 
408 /**
409  * Release an Rx verbs queue object.
410  *
411  * @param rxq_obj
412  *   Verbs Rx queue object.
413  */
414 static void
415 mlx5_rxq_ibv_obj_release(struct mlx5_rxq_obj *rxq_obj)
416 {
417 	MLX5_ASSERT(rxq_obj);
418 	MLX5_ASSERT(rxq_obj->wq);
419 	MLX5_ASSERT(rxq_obj->ibv_cq);
420 	rxq_free_elts(rxq_obj->rxq_ctrl);
421 	claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
422 	claim_zero(mlx5_glue->destroy_cq(rxq_obj->ibv_cq));
423 	if (rxq_obj->ibv_channel)
424 		claim_zero(mlx5_glue->destroy_comp_channel
425 							(rxq_obj->ibv_channel));
426 	LIST_REMOVE(rxq_obj, next);
427 	mlx5_free(rxq_obj);
428 }
429 
430 struct mlx5_obj_ops ibv_obj_ops = {
431 	.rxq_obj_modify_vlan_strip = mlx5_rxq_obj_modify_wq_vlan_strip,
432 	.rxq_obj_new = mlx5_rxq_ibv_obj_new,
433 	.rxq_obj_release = mlx5_rxq_ibv_obj_release,
434 };
435