xref: /dpdk/drivers/net/mlx5/linux/mlx5_verbs.c (revision c279f187ee20b10a5101c8302c6636ec497dd8a2)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2020 Mellanox Technologies, Ltd
3  */
4 
5 #include <stddef.h>
6 #include <errno.h>
7 #include <stdbool.h>
8 #include <string.h>
9 #include <stdint.h>
10 #include <unistd.h>
11 #include <inttypes.h>
12 #include <sys/queue.h>
13 
14 #include "mlx5_autoconf.h"
15 
16 #include <rte_mbuf.h>
17 #include <rte_malloc.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_common.h>
20 
21 #include <mlx5_glue.h>
22 #include <mlx5_common.h>
23 #include <mlx5_common_mr.h>
24 #include <mlx5_rxtx.h>
25 #include <mlx5_verbs.h>
26 #include <mlx5_utils.h>
27 #include <mlx5_malloc.h>
28 
29 /**
30  * Register mr. Given protection domain pointer, pointer to addr and length
31  * register the memory region.
32  *
33  * @param[in] pd
34  *   Pointer to protection domain context.
35  * @param[in] addr
36  *   Pointer to memory start address.
37  * @param[in] length
38  *   Length of the memory to register.
39  * @param[out] pmd_mr
40  *   pmd_mr struct set with lkey, address, length and pointer to mr object
41  *
42  * @return
43  *   0 on successful registration, -1 otherwise
44  */
45 static int
46 mlx5_reg_mr(void *pd, void *addr, size_t length,
47 		 struct mlx5_pmd_mr *pmd_mr)
48 {
49 	return mlx5_common_verbs_reg_mr(pd, addr, length, pmd_mr);
50 }
51 
52 /**
53  * Deregister mr. Given the mlx5 pmd MR - deregister the MR
54  *
55  * @param[in] pmd_mr
56  *   pmd_mr struct set with lkey, address, length and pointer to mr object
57  *
58  */
59 static void
60 mlx5_dereg_mr(struct mlx5_pmd_mr *pmd_mr)
61 {
62 	mlx5_common_verbs_dereg_mr(pmd_mr);
63 }
64 
65 /* verbs operations. */
66 const struct mlx5_verbs_ops mlx5_verbs_ops = {
67 	.reg_mr = mlx5_reg_mr,
68 	.dereg_mr = mlx5_dereg_mr,
69 };
70 
71 /**
72  * Modify Rx WQ vlan stripping offload
73  *
74  * @param rxq_obj
75  *   Rx queue object.
76  *
77  * @return 0 on success, non-0 otherwise
78  */
79 static int
80 mlx5_rxq_obj_modify_wq_vlan_strip(struct mlx5_rxq_obj *rxq_obj, int on)
81 {
82 	uint16_t vlan_offloads =
83 		(on ? IBV_WQ_FLAGS_CVLAN_STRIPPING : 0) |
84 		0;
85 	struct ibv_wq_attr mod;
86 	mod = (struct ibv_wq_attr){
87 		.attr_mask = IBV_WQ_ATTR_FLAGS,
88 		.flags_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING,
89 		.flags = vlan_offloads,
90 	};
91 	return mlx5_glue->modify_wq(rxq_obj->wq, &mod);
92 }
93 
94 /**
95  * Create a CQ Verbs object.
96  *
97  * @param dev
98  *   Pointer to Ethernet device.
99  * @param idx
100  *   Queue index in DPDK Rx queue array.
101  *
102  * @return
103  *   The Verbs CQ object initialized, NULL otherwise and rte_errno is set.
104  */
105 static struct ibv_cq *
106 mlx5_rxq_ibv_cq_create(struct rte_eth_dev *dev, uint16_t idx)
107 {
108 	struct mlx5_priv *priv = dev->data->dev_private;
109 	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
110 	struct mlx5_rxq_ctrl *rxq_ctrl =
111 		container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
112 	struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;
113 	unsigned int cqe_n = mlx5_rxq_cqe_num(rxq_data);
114 	struct {
115 		struct ibv_cq_init_attr_ex ibv;
116 		struct mlx5dv_cq_init_attr mlx5;
117 	} cq_attr;
118 
119 	cq_attr.ibv = (struct ibv_cq_init_attr_ex){
120 		.cqe = cqe_n,
121 		.channel = rxq_obj->ibv_channel,
122 		.comp_mask = 0,
123 	};
124 	cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){
125 		.comp_mask = 0,
126 	};
127 	if (priv->config.cqe_comp && !rxq_data->hw_timestamp) {
128 		cq_attr.mlx5.comp_mask |=
129 				MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
130 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
131 		cq_attr.mlx5.cqe_comp_res_format =
132 				mlx5_rxq_mprq_enabled(rxq_data) ?
133 				MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX :
134 				MLX5DV_CQE_RES_FORMAT_HASH;
135 #else
136 		cq_attr.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
137 #endif
138 		/*
139 		 * For vectorized Rx, it must not be doubled in order to
140 		 * make cq_ci and rq_ci aligned.
141 		 */
142 		if (mlx5_rxq_check_vec_support(rxq_data) < 0)
143 			cq_attr.ibv.cqe *= 2;
144 	} else if (priv->config.cqe_comp && rxq_data->hw_timestamp) {
145 		DRV_LOG(DEBUG,
146 			"Port %u Rx CQE compression is disabled for HW"
147 			" timestamp.",
148 			dev->data->port_id);
149 	}
150 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
151 	if (priv->config.cqe_pad) {
152 		cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS;
153 		cq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD;
154 	}
155 #endif
156 	return mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq(priv->sh->ctx,
157 							      &cq_attr.ibv,
158 							      &cq_attr.mlx5));
159 }
160 
161 /**
162  * Create a WQ Verbs object.
163  *
164  * @param dev
165  *   Pointer to Ethernet device.
166  * @param idx
167  *   Queue index in DPDK Rx queue array.
168  *
169  * @return
170  *   The Verbs WQ object initialized, NULL otherwise and rte_errno is set.
171  */
172 static struct ibv_wq *
173 mlx5_rxq_ibv_wq_create(struct rte_eth_dev *dev, uint16_t idx)
174 {
175 	struct mlx5_priv *priv = dev->data->dev_private;
176 	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
177 	struct mlx5_rxq_ctrl *rxq_ctrl =
178 		container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
179 	struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;
180 	unsigned int wqe_n = 1 << rxq_data->elts_n;
181 	struct {
182 		struct ibv_wq_init_attr ibv;
183 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
184 		struct mlx5dv_wq_init_attr mlx5;
185 #endif
186 	} wq_attr;
187 
188 	wq_attr.ibv = (struct ibv_wq_init_attr){
189 		.wq_context = NULL, /* Could be useful in the future. */
190 		.wq_type = IBV_WQT_RQ,
191 		/* Max number of outstanding WRs. */
192 		.max_wr = wqe_n >> rxq_data->sges_n,
193 		/* Max number of scatter/gather elements in a WR. */
194 		.max_sge = 1 << rxq_data->sges_n,
195 		.pd = priv->sh->pd,
196 		.cq = rxq_obj->ibv_cq,
197 		.comp_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING | 0,
198 		.create_flags = (rxq_data->vlan_strip ?
199 				 IBV_WQ_FLAGS_CVLAN_STRIPPING : 0),
200 	};
201 	/* By default, FCS (CRC) is stripped by hardware. */
202 	if (rxq_data->crc_present) {
203 		wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
204 		wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
205 	}
206 	if (priv->config.hw_padding) {
207 #if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING)
208 		wq_attr.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
209 		wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
210 #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING)
211 		wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING;
212 		wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
213 #endif
214 	}
215 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
216 	wq_attr.mlx5 = (struct mlx5dv_wq_init_attr){
217 		.comp_mask = 0,
218 	};
219 	if (mlx5_rxq_mprq_enabled(rxq_data)) {
220 		struct mlx5dv_striding_rq_init_attr *mprq_attr =
221 						&wq_attr.mlx5.striding_rq_attrs;
222 
223 		wq_attr.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ;
224 		*mprq_attr = (struct mlx5dv_striding_rq_init_attr){
225 			.single_stride_log_num_of_bytes = rxq_data->strd_sz_n,
226 			.single_wqe_log_num_of_strides = rxq_data->strd_num_n,
227 			.two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT,
228 		};
229 	}
230 	rxq_obj->wq = mlx5_glue->dv_create_wq(priv->sh->ctx, &wq_attr.ibv,
231 					      &wq_attr.mlx5);
232 #else
233 	rxq_obj->wq = mlx5_glue->create_wq(priv->sh->ctx, &wq_attr.ibv);
234 #endif
235 	if (rxq_obj->wq) {
236 		/*
237 		 * Make sure number of WRs*SGEs match expectations since a queue
238 		 * cannot allocate more than "desc" buffers.
239 		 */
240 		if (wq_attr.ibv.max_wr != (wqe_n >> rxq_data->sges_n) ||
241 		    wq_attr.ibv.max_sge != (1u << rxq_data->sges_n)) {
242 			DRV_LOG(ERR,
243 				"Port %u Rx queue %u requested %u*%u but got"
244 				" %u*%u WRs*SGEs.",
245 				dev->data->port_id, idx,
246 				wqe_n >> rxq_data->sges_n,
247 				(1 << rxq_data->sges_n),
248 				wq_attr.ibv.max_wr, wq_attr.ibv.max_sge);
249 			claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
250 			rxq_obj->wq = NULL;
251 			rte_errno = EINVAL;
252 		}
253 	}
254 	return rxq_obj->wq;
255 }
256 
257 /**
258  * Create the Rx queue Verbs object.
259  *
260  * @param dev
261  *   Pointer to Ethernet device.
262  * @param idx
263  *   Queue index in DPDK Rx queue array.
264  *
265  * @return
266  *   0 on success, a negative errno value otherwise and rte_errno is set.
267  */
268 static int
269 mlx5_rxq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx)
270 {
271 	struct mlx5_priv *priv = dev->data->dev_private;
272 	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
273 	struct mlx5_rxq_ctrl *rxq_ctrl =
274 		container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
275 	struct ibv_wq_attr mod;
276 	struct mlx5_rxq_obj *tmpl = rxq_ctrl->obj;
277 	struct mlx5dv_cq cq_info;
278 	struct mlx5dv_rwq rwq;
279 	int ret = 0;
280 	struct mlx5dv_obj obj;
281 
282 	MLX5_ASSERT(rxq_data);
283 	MLX5_ASSERT(tmpl);
284 	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE;
285 	priv->verbs_alloc_ctx.obj = rxq_ctrl;
286 	tmpl->type = MLX5_RXQ_OBJ_TYPE_IBV;
287 	tmpl->rxq_ctrl = rxq_ctrl;
288 	if (rxq_ctrl->irq) {
289 		tmpl->ibv_channel =
290 				mlx5_glue->create_comp_channel(priv->sh->ctx);
291 		if (!tmpl->ibv_channel) {
292 			DRV_LOG(ERR, "Port %u: comp channel creation failure.",
293 				dev->data->port_id);
294 			rte_errno = ENOMEM;
295 			goto error;
296 		}
297 		tmpl->fd = ((struct ibv_comp_channel *)(tmpl->ibv_channel))->fd;
298 	}
299 	/* Create CQ using Verbs API. */
300 	tmpl->ibv_cq = mlx5_rxq_ibv_cq_create(dev, idx);
301 	if (!tmpl->ibv_cq) {
302 		DRV_LOG(ERR, "Port %u Rx queue %u CQ creation failure.",
303 			dev->data->port_id, idx);
304 		rte_errno = ENOMEM;
305 		goto error;
306 	}
307 	obj.cq.in = tmpl->ibv_cq;
308 	obj.cq.out = &cq_info;
309 	ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ);
310 	if (ret) {
311 		rte_errno = ret;
312 		goto error;
313 	}
314 	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
315 		DRV_LOG(ERR,
316 			"Port %u wrong MLX5_CQE_SIZE environment "
317 			"variable value: it should be set to %u.",
318 			dev->data->port_id, RTE_CACHE_LINE_SIZE);
319 		rte_errno = EINVAL;
320 		goto error;
321 	}
322 	/* Fill the rings. */
323 	rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
324 	rxq_data->cq_db = cq_info.dbrec;
325 	rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
326 	rxq_data->cq_uar = cq_info.cq_uar;
327 	rxq_data->cqn = cq_info.cqn;
328 	/* Create WQ (RQ) using Verbs API. */
329 	tmpl->wq = mlx5_rxq_ibv_wq_create(dev, idx);
330 	if (!tmpl->wq) {
331 		DRV_LOG(ERR, "Port %u Rx queue %u WQ creation failure.",
332 			dev->data->port_id, idx);
333 		rte_errno = ENOMEM;
334 		goto error;
335 	}
336 	/* Change queue state to ready. */
337 	mod = (struct ibv_wq_attr){
338 		.attr_mask = IBV_WQ_ATTR_STATE,
339 		.wq_state = IBV_WQS_RDY,
340 	};
341 	ret = mlx5_glue->modify_wq(tmpl->wq, &mod);
342 	if (ret) {
343 		DRV_LOG(ERR,
344 			"Port %u Rx queue %u WQ state to IBV_WQS_RDY failed.",
345 			dev->data->port_id, idx);
346 		rte_errno = ret;
347 		goto error;
348 	}
349 	obj.rwq.in = tmpl->wq;
350 	obj.rwq.out = &rwq;
351 	ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_RWQ);
352 	if (ret) {
353 		rte_errno = ret;
354 		goto error;
355 	}
356 	rxq_data->wqes = rwq.buf;
357 	rxq_data->rq_db = rwq.dbrec;
358 	rxq_data->cq_arm_sn = 0;
359 	mlx5_rxq_initialize(rxq_data);
360 	rxq_data->cq_ci = 0;
361 	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
362 	dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
363 	rxq_ctrl->wqn = ((struct ibv_wq *)(tmpl->wq))->wq_num;
364 	return 0;
365 error:
366 	ret = rte_errno; /* Save rte_errno before cleanup. */
367 	if (tmpl->wq)
368 		claim_zero(mlx5_glue->destroy_wq(tmpl->wq));
369 	if (tmpl->ibv_cq)
370 		claim_zero(mlx5_glue->destroy_cq(tmpl->ibv_cq));
371 	if (tmpl->ibv_channel)
372 		claim_zero(mlx5_glue->destroy_comp_channel(tmpl->ibv_channel));
373 	rte_errno = ret; /* Restore rte_errno. */
374 	priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
375 	return -rte_errno;
376 }
377 
378 /**
379  * Release an Rx verbs queue object.
380  *
381  * @param rxq_obj
382  *   Verbs Rx queue object.
383  */
384 static void
385 mlx5_rxq_ibv_obj_release(struct mlx5_rxq_obj *rxq_obj)
386 {
387 	MLX5_ASSERT(rxq_obj);
388 	MLX5_ASSERT(rxq_obj->wq);
389 	MLX5_ASSERT(rxq_obj->ibv_cq);
390 	claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
391 	claim_zero(mlx5_glue->destroy_cq(rxq_obj->ibv_cq));
392 	if (rxq_obj->ibv_channel)
393 		claim_zero(mlx5_glue->destroy_comp_channel
394 							(rxq_obj->ibv_channel));
395 }
396 
397 /**
398  * Get event for an Rx verbs queue object.
399  *
400  * @param rxq_obj
401  *   Verbs Rx queue object.
402  *
403  * @return
404  *   0 on success, a negative errno value otherwise and rte_errno is set.
405  */
406 static int
407 mlx5_rx_ibv_get_event(struct mlx5_rxq_obj *rxq_obj)
408 {
409 	struct ibv_cq *ev_cq;
410 	void *ev_ctx;
411 	int ret = mlx5_glue->get_cq_event(rxq_obj->ibv_channel,
412 					  &ev_cq, &ev_ctx);
413 
414 	if (ret < 0 || ev_cq != rxq_obj->ibv_cq)
415 		goto exit;
416 	mlx5_glue->ack_cq_events(rxq_obj->ibv_cq, 1);
417 	return 0;
418 exit:
419 	if (ret < 0)
420 		rte_errno = errno;
421 	else
422 		rte_errno = EINVAL;
423 	return -rte_errno;
424 }
425 
426 /**
427  * Modifies the attributes for the specified WQ.
428  *
429  * @param rxq_obj
430  *   Verbs Rx queue object.
431  *
432  * @return
433  *   0 on success, a negative errno value otherwise and rte_errno is set.
434  */
435 static int
436 mlx5_ibv_modify_wq(struct mlx5_rxq_obj *rxq_obj, bool is_start)
437 {
438 	struct ibv_wq_attr mod = {
439 		.attr_mask = IBV_WQ_ATTR_STATE,
440 		.wq_state = is_start ? IBV_WQS_RDY : IBV_WQS_RESET,
441 	};
442 
443 	return mlx5_glue->modify_wq(rxq_obj->wq, &mod);
444 }
445 
446 struct mlx5_obj_ops ibv_obj_ops = {
447 	.rxq_obj_modify_vlan_strip = mlx5_rxq_obj_modify_wq_vlan_strip,
448 	.rxq_obj_new = mlx5_rxq_ibv_obj_new,
449 	.rxq_event_get = mlx5_rx_ibv_get_event,
450 	.rxq_obj_modify = mlx5_ibv_modify_wq,
451 	.rxq_obj_release = mlx5_rxq_ibv_obj_release,
452 };
453