xref: /dpdk/drivers/net/mlx5/mlx5_txq.c (revision 8205e241b2b01c05f2cffe5158c053d614d1f68c)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2015 6WIND S.A.
5  *   Copyright 2015 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <stddef.h>
35 #include <assert.h>
36 #include <errno.h>
37 #include <string.h>
38 #include <stdint.h>
39 
40 /* Verbs header. */
41 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
42 #ifdef PEDANTIC
43 #pragma GCC diagnostic ignored "-pedantic"
44 #endif
45 #include <infiniband/verbs.h>
46 #ifdef PEDANTIC
47 #pragma GCC diagnostic error "-pedantic"
48 #endif
49 
50 /* DPDK headers don't like -pedantic. */
51 #ifdef PEDANTIC
52 #pragma GCC diagnostic ignored "-pedantic"
53 #endif
54 #include <rte_mbuf.h>
55 #include <rte_malloc.h>
56 #include <rte_ethdev.h>
57 #include <rte_common.h>
58 #ifdef PEDANTIC
59 #pragma GCC diagnostic error "-pedantic"
60 #endif
61 
62 #include "mlx5_utils.h"
63 #include "mlx5.h"
64 #include "mlx5_rxtx.h"
65 #include "mlx5_autoconf.h"
66 #include "mlx5_defs.h"
67 
68 /**
69  * Allocate TX queue elements.
70  *
71  * @param txq
72  *   Pointer to TX queue structure.
73  * @param elts_n
74  *   Number of elements to allocate.
75  *
76  * @return
77  *   0 on success, errno value on failure.
78  */
79 static int
80 txq_alloc_elts(struct txq *txq, unsigned int elts_n)
81 {
82 	unsigned int i;
83 	struct txq_elt (*elts)[elts_n] =
84 		rte_calloc_socket("TXQ", 1, sizeof(*elts), 0, txq->socket);
85 	linear_t (*elts_linear)[elts_n] =
86 		rte_calloc_socket("TXQ", 1, sizeof(*elts_linear), 0,
87 				  txq->socket);
88 	struct ibv_mr *mr_linear = NULL;
89 	int ret = 0;
90 
91 	if ((elts == NULL) || (elts_linear == NULL)) {
92 		ERROR("%p: can't allocate packets array", (void *)txq);
93 		ret = ENOMEM;
94 		goto error;
95 	}
96 	mr_linear =
97 		ibv_reg_mr(txq->priv->pd, elts_linear, sizeof(*elts_linear),
98 			   (IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE));
99 	if (mr_linear == NULL) {
100 		ERROR("%p: unable to configure MR, ibv_reg_mr() failed",
101 		      (void *)txq);
102 		ret = EINVAL;
103 		goto error;
104 	}
105 	for (i = 0; (i != elts_n); ++i) {
106 		struct txq_elt *elt = &(*elts)[i];
107 
108 		elt->buf = NULL;
109 	}
110 	DEBUG("%p: allocated and configured %u WRs", (void *)txq, elts_n);
111 	txq->elts_n = elts_n;
112 	txq->elts = elts;
113 	txq->elts_head = 0;
114 	txq->elts_tail = 0;
115 	txq->elts_comp = 0;
116 	/* Request send completion every MLX5_PMD_TX_PER_COMP_REQ packets or
117 	 * at least 4 times per ring. */
118 	txq->elts_comp_cd_init =
119 		((MLX5_PMD_TX_PER_COMP_REQ < (elts_n / 4)) ?
120 		 MLX5_PMD_TX_PER_COMP_REQ : (elts_n / 4));
121 	txq->elts_comp_cd = txq->elts_comp_cd_init;
122 	txq->elts_linear = elts_linear;
123 	txq->mr_linear = mr_linear;
124 	assert(ret == 0);
125 	return 0;
126 error:
127 	if (mr_linear != NULL)
128 		claim_zero(ibv_dereg_mr(mr_linear));
129 
130 	rte_free(elts_linear);
131 	rte_free(elts);
132 
133 	DEBUG("%p: failed, freed everything", (void *)txq);
134 	assert(ret > 0);
135 	return ret;
136 }
137 
138 /**
139  * Free TX queue elements.
140  *
141  * @param txq
142  *   Pointer to TX queue structure.
143  */
144 static void
145 txq_free_elts(struct txq *txq)
146 {
147 	unsigned int i;
148 	unsigned int elts_n = txq->elts_n;
149 	struct txq_elt (*elts)[elts_n] = txq->elts;
150 	linear_t (*elts_linear)[elts_n] = txq->elts_linear;
151 	struct ibv_mr *mr_linear = txq->mr_linear;
152 
153 	DEBUG("%p: freeing WRs", (void *)txq);
154 	txq->elts_n = 0;
155 	txq->elts = NULL;
156 	txq->elts_linear = NULL;
157 	txq->mr_linear = NULL;
158 	if (mr_linear != NULL)
159 		claim_zero(ibv_dereg_mr(mr_linear));
160 
161 	rte_free(elts_linear);
162 	if (elts == NULL)
163 		return;
164 	for (i = 0; (i != RTE_DIM(*elts)); ++i) {
165 		struct txq_elt *elt = &(*elts)[i];
166 
167 		if (elt->buf == NULL)
168 			continue;
169 		rte_pktmbuf_free(elt->buf);
170 	}
171 	rte_free(elts);
172 }
173 
174 /**
175  * Clean up a TX queue.
176  *
177  * Destroy objects, free allocated memory and reset the structure for reuse.
178  *
179  * @param txq
180  *   Pointer to TX queue structure.
181  */
182 void
183 txq_cleanup(struct txq *txq)
184 {
185 	struct ibv_exp_release_intf_params params;
186 	size_t i;
187 
188 	DEBUG("cleaning up %p", (void *)txq);
189 	txq_free_elts(txq);
190 	if (txq->if_qp != NULL) {
191 		assert(txq->priv != NULL);
192 		assert(txq->priv->ctx != NULL);
193 		assert(txq->qp != NULL);
194 		params = (struct ibv_exp_release_intf_params){
195 			.comp_mask = 0,
196 		};
197 		claim_zero(ibv_exp_release_intf(txq->priv->ctx,
198 						txq->if_qp,
199 						&params));
200 	}
201 	if (txq->if_cq != NULL) {
202 		assert(txq->priv != NULL);
203 		assert(txq->priv->ctx != NULL);
204 		assert(txq->cq != NULL);
205 		params = (struct ibv_exp_release_intf_params){
206 			.comp_mask = 0,
207 		};
208 		claim_zero(ibv_exp_release_intf(txq->priv->ctx,
209 						txq->if_cq,
210 						&params));
211 	}
212 	if (txq->qp != NULL)
213 		claim_zero(ibv_destroy_qp(txq->qp));
214 	if (txq->cq != NULL)
215 		claim_zero(ibv_destroy_cq(txq->cq));
216 	if (txq->rd != NULL) {
217 		struct ibv_exp_destroy_res_domain_attr attr = {
218 			.comp_mask = 0,
219 		};
220 
221 		assert(txq->priv != NULL);
222 		assert(txq->priv->ctx != NULL);
223 		claim_zero(ibv_exp_destroy_res_domain(txq->priv->ctx,
224 						      txq->rd,
225 						      &attr));
226 	}
227 	for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
228 		if (txq->mp2mr[i].mp == NULL)
229 			break;
230 		assert(txq->mp2mr[i].mr != NULL);
231 		claim_zero(ibv_dereg_mr(txq->mp2mr[i].mr));
232 	}
233 	memset(txq, 0, sizeof(*txq));
234 }
235 
236 /**
237  * Configure a TX queue.
238  *
239  * @param dev
240  *   Pointer to Ethernet device structure.
241  * @param txq
242  *   Pointer to TX queue structure.
243  * @param desc
244  *   Number of descriptors to configure in queue.
245  * @param socket
246  *   NUMA socket on which memory must be allocated.
247  * @param[in] conf
248  *   Thresholds parameters.
249  *
250  * @return
251  *   0 on success, errno value on failure.
252  */
253 static int
254 txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc,
255 	  unsigned int socket, const struct rte_eth_txconf *conf)
256 {
257 	struct priv *priv = dev->data->dev_private;
258 	struct txq tmpl = {
259 		.priv = priv,
260 		.socket = socket
261 	};
262 	union {
263 		struct ibv_exp_query_intf_params params;
264 		struct ibv_exp_qp_init_attr init;
265 		struct ibv_exp_res_domain_init_attr rd;
266 		struct ibv_exp_cq_init_attr cq;
267 		struct ibv_exp_qp_attr mod;
268 	} attr;
269 	enum ibv_exp_query_intf_status status;
270 	int ret = 0;
271 
272 	(void)conf; /* Thresholds configuration (ignored). */
273 	if ((desc == 0) || (desc % MLX5_PMD_SGE_WR_N)) {
274 		ERROR("%p: invalid number of TX descriptors (must be a"
275 		      " multiple of %d)", (void *)dev, MLX5_PMD_SGE_WR_N);
276 		return EINVAL;
277 	}
278 	desc /= MLX5_PMD_SGE_WR_N;
279 	/* MRs will be registered in mp2mr[] later. */
280 	attr.rd = (struct ibv_exp_res_domain_init_attr){
281 		.comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
282 			      IBV_EXP_RES_DOMAIN_MSG_MODEL),
283 		.thread_model = IBV_EXP_THREAD_SINGLE,
284 		.msg_model = IBV_EXP_MSG_HIGH_BW,
285 	};
286 	tmpl.rd = ibv_exp_create_res_domain(priv->ctx, &attr.rd);
287 	if (tmpl.rd == NULL) {
288 		ret = ENOMEM;
289 		ERROR("%p: RD creation failure: %s",
290 		      (void *)dev, strerror(ret));
291 		goto error;
292 	}
293 	attr.cq = (struct ibv_exp_cq_init_attr){
294 		.comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,
295 		.res_domain = tmpl.rd,
296 	};
297 	tmpl.cq = ibv_exp_create_cq(priv->ctx, desc, NULL, NULL, 0, &attr.cq);
298 	if (tmpl.cq == NULL) {
299 		ret = ENOMEM;
300 		ERROR("%p: CQ creation failure: %s",
301 		      (void *)dev, strerror(ret));
302 		goto error;
303 	}
304 	DEBUG("priv->device_attr.max_qp_wr is %d",
305 	      priv->device_attr.max_qp_wr);
306 	DEBUG("priv->device_attr.max_sge is %d",
307 	      priv->device_attr.max_sge);
308 	attr.init = (struct ibv_exp_qp_init_attr){
309 		/* CQ to be associated with the send queue. */
310 		.send_cq = tmpl.cq,
311 		/* CQ to be associated with the receive queue. */
312 		.recv_cq = tmpl.cq,
313 		.cap = {
314 			/* Max number of outstanding WRs. */
315 			.max_send_wr = ((priv->device_attr.max_qp_wr < desc) ?
316 					priv->device_attr.max_qp_wr :
317 					desc),
318 			/* Max number of scatter/gather elements in a WR. */
319 			.max_send_sge = ((priv->device_attr.max_sge <
320 					  MLX5_PMD_SGE_WR_N) ?
321 					 priv->device_attr.max_sge :
322 					 MLX5_PMD_SGE_WR_N),
323 #if MLX5_PMD_MAX_INLINE > 0
324 			.max_inline_data = MLX5_PMD_MAX_INLINE,
325 #endif
326 		},
327 		.qp_type = IBV_QPT_RAW_PACKET,
328 		/* Do *NOT* enable this, completions events are managed per
329 		 * TX burst. */
330 		.sq_sig_all = 0,
331 		.pd = priv->pd,
332 		.res_domain = tmpl.rd,
333 		.comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
334 			      IBV_EXP_QP_INIT_ATTR_RES_DOMAIN),
335 	};
336 	tmpl.qp = ibv_exp_create_qp(priv->ctx, &attr.init);
337 	if (tmpl.qp == NULL) {
338 		ret = (errno ? errno : EINVAL);
339 		ERROR("%p: QP creation failure: %s",
340 		      (void *)dev, strerror(ret));
341 		goto error;
342 	}
343 #if MLX5_PMD_MAX_INLINE > 0
344 	/* ibv_create_qp() updates this value. */
345 	tmpl.max_inline = attr.init.cap.max_inline_data;
346 #endif
347 	attr.mod = (struct ibv_exp_qp_attr){
348 		/* Move the QP to this state. */
349 		.qp_state = IBV_QPS_INIT,
350 		/* Primary port number. */
351 		.port_num = priv->port
352 	};
353 	ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod,
354 				(IBV_EXP_QP_STATE | IBV_EXP_QP_PORT));
355 	if (ret) {
356 		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
357 		      (void *)dev, strerror(ret));
358 		goto error;
359 	}
360 	ret = txq_alloc_elts(&tmpl, desc);
361 	if (ret) {
362 		ERROR("%p: TXQ allocation failed: %s",
363 		      (void *)dev, strerror(ret));
364 		goto error;
365 	}
366 	attr.mod = (struct ibv_exp_qp_attr){
367 		.qp_state = IBV_QPS_RTR
368 	};
369 	ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod, IBV_EXP_QP_STATE);
370 	if (ret) {
371 		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
372 		      (void *)dev, strerror(ret));
373 		goto error;
374 	}
375 	attr.mod.qp_state = IBV_QPS_RTS;
376 	ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod, IBV_EXP_QP_STATE);
377 	if (ret) {
378 		ERROR("%p: QP state to IBV_QPS_RTS failed: %s",
379 		      (void *)dev, strerror(ret));
380 		goto error;
381 	}
382 	attr.params = (struct ibv_exp_query_intf_params){
383 		.intf_scope = IBV_EXP_INTF_GLOBAL,
384 		.intf = IBV_EXP_INTF_CQ,
385 		.obj = tmpl.cq,
386 	};
387 	tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
388 	if (tmpl.if_cq == NULL) {
389 		ret = EINVAL;
390 		ERROR("%p: CQ interface family query failed with status %d",
391 		      (void *)dev, status);
392 		goto error;
393 	}
394 	attr.params = (struct ibv_exp_query_intf_params){
395 		.intf_scope = IBV_EXP_INTF_GLOBAL,
396 		.intf = IBV_EXP_INTF_QP_BURST,
397 		.obj = tmpl.qp,
398 	};
399 	tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
400 	if (tmpl.if_qp == NULL) {
401 		ret = EINVAL;
402 		ERROR("%p: QP interface family query failed with status %d",
403 		      (void *)dev, status);
404 		goto error;
405 	}
406 	/* Clean up txq in case we're reinitializing it. */
407 	DEBUG("%p: cleaning-up old txq just in case", (void *)txq);
408 	txq_cleanup(txq);
409 	*txq = tmpl;
410 	DEBUG("%p: txq updated with %p", (void *)txq, (void *)&tmpl);
411 	assert(ret == 0);
412 	return 0;
413 error:
414 	txq_cleanup(&tmpl);
415 	assert(ret > 0);
416 	return ret;
417 }
418 
419 /**
420  * DPDK callback to configure a TX queue.
421  *
422  * @param dev
423  *   Pointer to Ethernet device structure.
424  * @param idx
425  *   TX queue index.
426  * @param desc
427  *   Number of descriptors to configure in queue.
428  * @param socket
429  *   NUMA socket on which memory must be allocated.
430  * @param[in] conf
431  *   Thresholds parameters.
432  *
433  * @return
434  *   0 on success, negative errno value on failure.
435  */
436 int
437 mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
438 		    unsigned int socket, const struct rte_eth_txconf *conf)
439 {
440 	struct priv *priv = dev->data->dev_private;
441 	struct txq *txq = (*priv->txqs)[idx];
442 	int ret;
443 
444 	priv_lock(priv);
445 	DEBUG("%p: configuring queue %u for %u descriptors",
446 	      (void *)dev, idx, desc);
447 	if (idx >= priv->txqs_n) {
448 		ERROR("%p: queue index out of range (%u >= %u)",
449 		      (void *)dev, idx, priv->txqs_n);
450 		priv_unlock(priv);
451 		return -EOVERFLOW;
452 	}
453 	if (txq != NULL) {
454 		DEBUG("%p: reusing already allocated queue index %u (%p)",
455 		      (void *)dev, idx, (void *)txq);
456 		if (priv->started) {
457 			priv_unlock(priv);
458 			return -EEXIST;
459 		}
460 		(*priv->txqs)[idx] = NULL;
461 		txq_cleanup(txq);
462 	} else {
463 		txq = rte_calloc_socket("TXQ", 1, sizeof(*txq), 0, socket);
464 		if (txq == NULL) {
465 			ERROR("%p: unable to allocate queue index %u",
466 			      (void *)dev, idx);
467 			priv_unlock(priv);
468 			return -ENOMEM;
469 		}
470 	}
471 	ret = txq_setup(dev, txq, desc, socket, conf);
472 	if (ret)
473 		rte_free(txq);
474 	else {
475 		txq->stats.idx = idx;
476 		DEBUG("%p: adding TX queue %p to list",
477 		      (void *)dev, (void *)txq);
478 		(*priv->txqs)[idx] = txq;
479 		/* Update send callback. */
480 		dev->tx_pkt_burst = mlx5_tx_burst;
481 	}
482 	priv_unlock(priv);
483 	return -ret;
484 }
485 
486 /**
487  * DPDK callback to release a TX queue.
488  *
489  * @param dpdk_txq
490  *   Generic TX queue pointer.
491  */
492 void
493 mlx5_tx_queue_release(void *dpdk_txq)
494 {
495 	struct txq *txq = (struct txq *)dpdk_txq;
496 	struct priv *priv;
497 	unsigned int i;
498 
499 	if (txq == NULL)
500 		return;
501 	priv = txq->priv;
502 	priv_lock(priv);
503 	for (i = 0; (i != priv->txqs_n); ++i)
504 		if ((*priv->txqs)[i] == txq) {
505 			DEBUG("%p: removing TX queue %p from list",
506 			      (void *)priv->dev, (void *)txq);
507 			(*priv->txqs)[i] = NULL;
508 			break;
509 		}
510 	txq_cleanup(txq);
511 	rte_free(txq);
512 	priv_unlock(priv);
513 }
514