1 /*
2 * Copyright (c) 2012 Mellanox Technologies, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <config.h>
34
35 #include <stdlib.h>
36 #include <stdio.h>
37 #include <string.h>
38 #include <pthread.h>
39 #include <errno.h>
40 #include <limits.h>
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include <fcntl.h>
44 #include <unistd.h>
45 #include <sys/mman.h>
46
47 #include "mlx5.h"
48 #include "mlx5-abi.h"
49 #include "wqe.h"
50
51 int mlx5_single_threaded = 0;
52
is_xrc_tgt(int type)53 static inline int is_xrc_tgt(int type)
54 {
55 return type == IBV_QPT_XRC_RECV;
56 }
57
mlx5_query_device(struct ibv_context * context,struct ibv_device_attr * attr)58 int mlx5_query_device(struct ibv_context *context, struct ibv_device_attr *attr)
59 {
60 struct ibv_query_device cmd;
61 uint64_t raw_fw_ver;
62 unsigned major, minor, sub_minor;
63 int ret;
64
65 ret = ibv_cmd_query_device(context, attr, &raw_fw_ver, &cmd, sizeof cmd);
66 if (ret)
67 return ret;
68
69 major = (raw_fw_ver >> 32) & 0xffff;
70 minor = (raw_fw_ver >> 16) & 0xffff;
71 sub_minor = raw_fw_ver & 0xffff;
72
73 snprintf(attr->fw_ver, sizeof attr->fw_ver,
74 "%d.%d.%04d", major, minor, sub_minor);
75
76 return 0;
77 }
78
79 #define READL(ptr) (*((uint32_t *)(ptr)))
mlx5_read_clock(struct ibv_context * context,uint64_t * cycles)80 static int mlx5_read_clock(struct ibv_context *context, uint64_t *cycles)
81 {
82 unsigned int clockhi, clocklo, clockhi1;
83 int i;
84 struct mlx5_context *ctx = to_mctx(context);
85
86 if (!ctx->hca_core_clock)
87 return -EOPNOTSUPP;
88
89 /* Handle wraparound */
90 for (i = 0; i < 2; i++) {
91 clockhi = be32toh(READL(ctx->hca_core_clock));
92 clocklo = be32toh(READL(ctx->hca_core_clock + 4));
93 clockhi1 = be32toh(READL(ctx->hca_core_clock));
94 if (clockhi == clockhi1)
95 break;
96 }
97
98 *cycles = (uint64_t)clockhi << 32 | (uint64_t)clocklo;
99
100 return 0;
101 }
102
mlx5_query_rt_values(struct ibv_context * context,struct ibv_values_ex * values)103 int mlx5_query_rt_values(struct ibv_context *context,
104 struct ibv_values_ex *values)
105 {
106 uint32_t comp_mask = 0;
107 int err = 0;
108
109 if (values->comp_mask & IBV_VALUES_MASK_RAW_CLOCK) {
110 uint64_t cycles;
111
112 err = mlx5_read_clock(context, &cycles);
113 if (!err) {
114 values->raw_clock.tv_sec = 0;
115 values->raw_clock.tv_nsec = cycles;
116 comp_mask |= IBV_VALUES_MASK_RAW_CLOCK;
117 }
118 }
119
120 values->comp_mask = comp_mask;
121
122 return err;
123 }
124
mlx5_query_port(struct ibv_context * context,uint8_t port,struct ibv_port_attr * attr)125 int mlx5_query_port(struct ibv_context *context, uint8_t port,
126 struct ibv_port_attr *attr)
127 {
128 struct ibv_query_port cmd;
129
130 return ibv_cmd_query_port(context, port, attr, &cmd, sizeof cmd);
131 }
132
mlx5_alloc_pd(struct ibv_context * context)133 struct ibv_pd *mlx5_alloc_pd(struct ibv_context *context)
134 {
135 struct ibv_alloc_pd cmd;
136 struct mlx5_alloc_pd_resp resp;
137 struct mlx5_pd *pd;
138
139 pd = calloc(1, sizeof *pd);
140 if (!pd)
141 return NULL;
142
143 if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof cmd,
144 &resp.ibv_resp, sizeof resp)) {
145 free(pd);
146 return NULL;
147 }
148
149 pd->pdn = resp.pdn;
150
151 return &pd->ibv_pd;
152 }
153
mlx5_free_pd(struct ibv_pd * pd)154 int mlx5_free_pd(struct ibv_pd *pd)
155 {
156 int ret;
157
158 ret = ibv_cmd_dealloc_pd(pd);
159 if (ret)
160 return ret;
161
162 free(to_mpd(pd));
163 return 0;
164 }
165
mlx5_reg_mr(struct ibv_pd * pd,void * addr,size_t length,int acc)166 struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
167 int acc)
168 {
169 struct mlx5_mr *mr;
170 struct ibv_reg_mr cmd;
171 int ret;
172 enum ibv_access_flags access = (enum ibv_access_flags)acc;
173 struct ibv_reg_mr_resp resp;
174
175 mr = calloc(1, sizeof(*mr));
176 if (!mr)
177 return NULL;
178
179 ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access,
180 &(mr->ibv_mr), &cmd, sizeof(cmd), &resp,
181 sizeof resp);
182 if (ret) {
183 mlx5_free_buf(&(mr->buf));
184 free(mr);
185 return NULL;
186 }
187 mr->alloc_flags = acc;
188
189 return &mr->ibv_mr;
190 }
191
mlx5_rereg_mr(struct ibv_mr * ibmr,int flags,struct ibv_pd * pd,void * addr,size_t length,int access)192 int mlx5_rereg_mr(struct ibv_mr *ibmr, int flags, struct ibv_pd *pd, void *addr,
193 size_t length, int access)
194 {
195 struct ibv_rereg_mr cmd;
196 struct ibv_rereg_mr_resp resp;
197
198 if (flags & IBV_REREG_MR_KEEP_VALID)
199 return ENOTSUP;
200
201 return ibv_cmd_rereg_mr(ibmr, flags, addr, length, (uintptr_t)addr,
202 access, pd, &cmd, sizeof(cmd), &resp,
203 sizeof(resp));
204 }
205
mlx5_dereg_mr(struct ibv_mr * ibmr)206 int mlx5_dereg_mr(struct ibv_mr *ibmr)
207 {
208 int ret;
209 struct mlx5_mr *mr = to_mmr(ibmr);
210
211 ret = ibv_cmd_dereg_mr(ibmr);
212 if (ret)
213 return ret;
214
215 free(mr);
216 return 0;
217 }
218
mlx5_alloc_mw(struct ibv_pd * pd,enum ibv_mw_type type)219 struct ibv_mw *mlx5_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type)
220 {
221 struct ibv_mw *mw;
222 struct ibv_alloc_mw cmd;
223 struct ibv_alloc_mw_resp resp;
224 int ret;
225
226 mw = malloc(sizeof(*mw));
227 if (!mw)
228 return NULL;
229
230 memset(mw, 0, sizeof(*mw));
231
232 ret = ibv_cmd_alloc_mw(pd, type, mw, &cmd, sizeof(cmd), &resp,
233 sizeof(resp));
234 if (ret) {
235 free(mw);
236 return NULL;
237 }
238
239 return mw;
240 }
241
mlx5_dealloc_mw(struct ibv_mw * mw)242 int mlx5_dealloc_mw(struct ibv_mw *mw)
243 {
244 int ret;
245 struct ibv_dealloc_mw cmd;
246
247 ret = ibv_cmd_dealloc_mw(mw, &cmd, sizeof(cmd));
248 if (ret)
249 return ret;
250
251 free(mw);
252 return 0;
253 }
254
mlx5_round_up_power_of_two(long long sz)255 int mlx5_round_up_power_of_two(long long sz)
256 {
257 long long ret;
258
259 for (ret = 1; ret < sz; ret <<= 1)
260 ; /* nothing */
261
262 if (ret > INT_MAX) {
263 fprintf(stderr, "%s: roundup overflow\n", __func__);
264 return -ENOMEM;
265 }
266
267 return (int)ret;
268 }
269
align_queue_size(long long req)270 static int align_queue_size(long long req)
271 {
272 return mlx5_round_up_power_of_two(req);
273 }
274
get_cqe_size(void)275 static int get_cqe_size(void)
276 {
277 char *env;
278 int size = 64;
279
280 env = getenv("MLX5_CQE_SIZE");
281 if (env)
282 size = atoi(env);
283
284 switch (size) {
285 case 64:
286 case 128:
287 return size;
288
289 default:
290 return -EINVAL;
291 }
292 }
293
use_scatter_to_cqe(void)294 static int use_scatter_to_cqe(void)
295 {
296 char *env;
297
298 env = getenv("MLX5_SCATTER_TO_CQE");
299 if (env && !strcmp(env, "0"))
300 return 0;
301
302 return 1;
303 }
304
srq_sig_enabled(void)305 static int srq_sig_enabled(void)
306 {
307 char *env;
308
309 env = getenv("MLX5_SRQ_SIGNATURE");
310 if (env)
311 return 1;
312
313 return 0;
314 }
315
qp_sig_enabled(void)316 static int qp_sig_enabled(void)
317 {
318 char *env;
319
320 env = getenv("MLX5_QP_SIGNATURE");
321 if (env)
322 return 1;
323
324 return 0;
325 }
326
327 enum {
328 CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS |
329 IBV_WC_EX_WITH_COMPLETION_TIMESTAMP |
330 IBV_WC_EX_WITH_CVLAN |
331 IBV_WC_EX_WITH_FLOW_TAG
332 };
333
334 enum {
335 CREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS
336 };
337
338 enum {
339 CREATE_CQ_SUPPORTED_FLAGS = IBV_CREATE_CQ_ATTR_SINGLE_THREADED
340 };
341
create_cq(struct ibv_context * context,const struct ibv_cq_init_attr_ex * cq_attr,int cq_alloc_flags,struct mlx5dv_cq_init_attr * mlx5cq_attr)342 static struct ibv_cq_ex *create_cq(struct ibv_context *context,
343 const struct ibv_cq_init_attr_ex *cq_attr,
344 int cq_alloc_flags,
345 struct mlx5dv_cq_init_attr *mlx5cq_attr)
346 {
347 struct mlx5_create_cq cmd;
348 struct mlx5_create_cq_resp resp;
349 struct mlx5_cq *cq;
350 int cqe_sz;
351 int ret;
352 int ncqe;
353 struct mlx5_context *mctx = to_mctx(context);
354 FILE *fp = to_mctx(context)->dbg_fp;
355
356 if (!cq_attr->cqe) {
357 mlx5_dbg(fp, MLX5_DBG_CQ, "CQE invalid\n");
358 errno = EINVAL;
359 return NULL;
360 }
361
362 if (cq_attr->comp_mask & ~CREATE_CQ_SUPPORTED_COMP_MASK) {
363 mlx5_dbg(fp, MLX5_DBG_CQ,
364 "Unsupported comp_mask for create_cq\n");
365 errno = EINVAL;
366 return NULL;
367 }
368
369 if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS &&
370 cq_attr->flags & ~CREATE_CQ_SUPPORTED_FLAGS) {
371 mlx5_dbg(fp, MLX5_DBG_CQ,
372 "Unsupported creation flags requested for create_cq\n");
373 errno = EINVAL;
374 return NULL;
375 }
376
377 if (cq_attr->wc_flags & ~CREATE_CQ_SUPPORTED_WC_FLAGS) {
378 mlx5_dbg(fp, MLX5_DBG_CQ, "\n");
379 errno = ENOTSUP;
380 return NULL;
381 }
382
383 cq = calloc(1, sizeof *cq);
384 if (!cq) {
385 mlx5_dbg(fp, MLX5_DBG_CQ, "\n");
386 return NULL;
387 }
388
389 memset(&cmd, 0, sizeof cmd);
390 cq->cons_index = 0;
391
392 if (mlx5_spinlock_init(&cq->lock))
393 goto err;
394
395 ncqe = align_queue_size(cq_attr->cqe + 1);
396 if ((ncqe > (1 << 24)) || (ncqe < (cq_attr->cqe + 1))) {
397 mlx5_dbg(fp, MLX5_DBG_CQ, "ncqe %d\n", ncqe);
398 errno = EINVAL;
399 goto err_spl;
400 }
401
402 cqe_sz = get_cqe_size();
403 if (cqe_sz < 0) {
404 mlx5_dbg(fp, MLX5_DBG_CQ, "\n");
405 errno = -cqe_sz;
406 goto err_spl;
407 }
408
409 if (mlx5_alloc_cq_buf(to_mctx(context), cq, &cq->buf_a, ncqe, cqe_sz)) {
410 mlx5_dbg(fp, MLX5_DBG_CQ, "\n");
411 goto err_spl;
412 }
413
414 cq->dbrec = mlx5_alloc_dbrec(to_mctx(context));
415 if (!cq->dbrec) {
416 mlx5_dbg(fp, MLX5_DBG_CQ, "\n");
417 goto err_buf;
418 }
419
420 cq->dbrec[MLX5_CQ_SET_CI] = 0;
421 cq->dbrec[MLX5_CQ_ARM_DB] = 0;
422 cq->arm_sn = 0;
423 cq->cqe_sz = cqe_sz;
424 cq->flags = cq_alloc_flags;
425
426 if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS &&
427 cq_attr->flags & IBV_CREATE_CQ_ATTR_SINGLE_THREADED)
428 cq->flags |= MLX5_CQ_FLAGS_SINGLE_THREADED;
429 cmd.buf_addr = (uintptr_t) cq->buf_a.buf;
430 cmd.db_addr = (uintptr_t) cq->dbrec;
431 cmd.cqe_size = cqe_sz;
432
433 if (mlx5cq_attr) {
434 if (mlx5cq_attr->comp_mask & ~(MLX5DV_CQ_INIT_ATTR_MASK_RESERVED - 1)) {
435 mlx5_dbg(fp, MLX5_DBG_CQ,
436 "Unsupported vendor comp_mask for create_cq\n");
437 errno = EINVAL;
438 goto err_db;
439 }
440
441 if (mlx5cq_attr->comp_mask & MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE) {
442 if (mctx->cqe_comp_caps.max_num &&
443 (mlx5cq_attr->cqe_comp_res_format &
444 mctx->cqe_comp_caps.supported_format)) {
445 cmd.cqe_comp_en = 1;
446 cmd.cqe_comp_res_format = mlx5cq_attr->cqe_comp_res_format;
447 } else {
448 mlx5_dbg(fp, MLX5_DBG_CQ, "CQE Compression is not supported\n");
449 errno = EINVAL;
450 goto err_db;
451 }
452 }
453 }
454
455 ret = ibv_cmd_create_cq(context, ncqe - 1, cq_attr->channel,
456 cq_attr->comp_vector,
457 ibv_cq_ex_to_cq(&cq->ibv_cq), &cmd.ibv_cmd,
458 sizeof(cmd), &resp.ibv_resp, sizeof(resp));
459 if (ret) {
460 mlx5_dbg(fp, MLX5_DBG_CQ, "ret %d\n", ret);
461 goto err_db;
462 }
463
464 cq->active_buf = &cq->buf_a;
465 cq->resize_buf = NULL;
466 cq->cqn = resp.cqn;
467 cq->stall_enable = to_mctx(context)->stall_enable;
468 cq->stall_adaptive_enable = to_mctx(context)->stall_adaptive_enable;
469 cq->stall_cycles = to_mctx(context)->stall_cycles;
470
471 if (cq_alloc_flags & MLX5_CQ_FLAGS_EXTENDED)
472 mlx5_cq_fill_pfns(cq, cq_attr);
473
474 return &cq->ibv_cq;
475
476 err_db:
477 mlx5_free_db(to_mctx(context), cq->dbrec);
478
479 err_buf:
480 mlx5_free_cq_buf(to_mctx(context), &cq->buf_a);
481
482 err_spl:
483 mlx5_spinlock_destroy(&cq->lock);
484
485 err:
486 free(cq);
487
488 return NULL;
489 }
490
mlx5_create_cq(struct ibv_context * context,int cqe,struct ibv_comp_channel * channel,int comp_vector)491 struct ibv_cq *mlx5_create_cq(struct ibv_context *context, int cqe,
492 struct ibv_comp_channel *channel,
493 int comp_vector)
494 {
495 struct ibv_cq_ex *cq;
496 struct ibv_cq_init_attr_ex cq_attr = {.cqe = cqe, .channel = channel,
497 .comp_vector = comp_vector,
498 .wc_flags = IBV_WC_STANDARD_FLAGS};
499
500 if (cqe <= 0) {
501 errno = EINVAL;
502 return NULL;
503 }
504
505 cq = create_cq(context, &cq_attr, 0, NULL);
506 return cq ? ibv_cq_ex_to_cq(cq) : NULL;
507 }
508
mlx5_create_cq_ex(struct ibv_context * context,struct ibv_cq_init_attr_ex * cq_attr)509 struct ibv_cq_ex *mlx5_create_cq_ex(struct ibv_context *context,
510 struct ibv_cq_init_attr_ex *cq_attr)
511 {
512 return create_cq(context, cq_attr, MLX5_CQ_FLAGS_EXTENDED, NULL);
513 }
514
mlx5dv_create_cq(struct ibv_context * context,struct ibv_cq_init_attr_ex * cq_attr,struct mlx5dv_cq_init_attr * mlx5_cq_attr)515 struct ibv_cq_ex *mlx5dv_create_cq(struct ibv_context *context,
516 struct ibv_cq_init_attr_ex *cq_attr,
517 struct mlx5dv_cq_init_attr *mlx5_cq_attr)
518 {
519 struct ibv_cq_ex *cq;
520 int err = 0;
521
522 cq = create_cq(context, cq_attr, MLX5_CQ_FLAGS_EXTENDED, mlx5_cq_attr);
523 if (!cq)
524 return NULL;
525
526 err = verbs_init_cq(ibv_cq_ex_to_cq(cq), context,
527 cq_attr->channel, cq_attr->cq_context);
528 if (err)
529 goto err;
530
531 return cq;
532
533 err:
534 context->ops.destroy_cq(ibv_cq_ex_to_cq(cq));
535
536 return NULL;
537 }
538
mlx5_resize_cq(struct ibv_cq * ibcq,int cqe)539 int mlx5_resize_cq(struct ibv_cq *ibcq, int cqe)
540 {
541 struct mlx5_cq *cq = to_mcq(ibcq);
542 struct mlx5_resize_cq_resp resp;
543 struct mlx5_resize_cq cmd;
544 struct mlx5_context *mctx = to_mctx(ibcq->context);
545 int err;
546
547 if (cqe < 0) {
548 errno = EINVAL;
549 return errno;
550 }
551
552 memset(&cmd, 0, sizeof(cmd));
553 memset(&resp, 0, sizeof(resp));
554
555 if (((long long)cqe * 64) > INT_MAX)
556 return EINVAL;
557
558 mlx5_spin_lock(&cq->lock);
559 cq->active_cqes = cq->ibv_cq.cqe;
560 if (cq->active_buf == &cq->buf_a)
561 cq->resize_buf = &cq->buf_b;
562 else
563 cq->resize_buf = &cq->buf_a;
564
565 cqe = align_queue_size(cqe + 1);
566 if (cqe == ibcq->cqe + 1) {
567 cq->resize_buf = NULL;
568 err = 0;
569 goto out;
570 }
571
572 /* currently we don't change cqe size */
573 cq->resize_cqe_sz = cq->cqe_sz;
574 cq->resize_cqes = cqe;
575 err = mlx5_alloc_cq_buf(mctx, cq, cq->resize_buf, cq->resize_cqes, cq->resize_cqe_sz);
576 if (err) {
577 cq->resize_buf = NULL;
578 errno = ENOMEM;
579 goto out;
580 }
581
582 cmd.buf_addr = (uintptr_t)cq->resize_buf->buf;
583 cmd.cqe_size = cq->resize_cqe_sz;
584
585 err = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof(cmd),
586 &resp.ibv_resp, sizeof(resp));
587 if (err)
588 goto out_buf;
589
590 mlx5_cq_resize_copy_cqes(cq);
591 mlx5_free_cq_buf(mctx, cq->active_buf);
592 cq->active_buf = cq->resize_buf;
593 cq->ibv_cq.cqe = cqe - 1;
594 mlx5_spin_unlock(&cq->lock);
595 cq->resize_buf = NULL;
596 return 0;
597
598 out_buf:
599 mlx5_free_cq_buf(mctx, cq->resize_buf);
600 cq->resize_buf = NULL;
601
602 out:
603 mlx5_spin_unlock(&cq->lock);
604 return err;
605 }
606
mlx5_destroy_cq(struct ibv_cq * cq)607 int mlx5_destroy_cq(struct ibv_cq *cq)
608 {
609 int ret;
610 struct mlx5_cq *mcq = to_mcq(cq);
611
612 ret = ibv_cmd_destroy_cq(cq);
613 if (ret)
614 return ret;
615
616 verbs_cleanup_cq(cq);
617 mlx5_free_db(to_mctx(cq->context), to_mcq(cq)->dbrec);
618 mlx5_free_cq_buf(to_mctx(cq->context), to_mcq(cq)->active_buf);
619 mlx5_spinlock_destroy(&mcq->lock);
620 free(to_mcq(cq));
621
622 return 0;
623 }
624
mlx5_create_srq(struct ibv_pd * pd,struct ibv_srq_init_attr * attr)625 struct ibv_srq *mlx5_create_srq(struct ibv_pd *pd,
626 struct ibv_srq_init_attr *attr)
627 {
628 struct mlx5_create_srq cmd;
629 struct mlx5_create_srq_resp resp;
630 struct mlx5_srq *srq;
631 int ret;
632 struct mlx5_context *ctx;
633 int max_sge;
634 struct ibv_srq *ibsrq;
635
636 ctx = to_mctx(pd->context);
637 srq = calloc(1, sizeof *srq);
638 if (!srq) {
639 fprintf(stderr, "%s-%d:\n", __func__, __LINE__);
640 return NULL;
641 }
642 ibsrq = &srq->vsrq.srq;
643
644 memset(&cmd, 0, sizeof cmd);
645 if (mlx5_spinlock_init(&srq->lock)) {
646 fprintf(stderr, "%s-%d:\n", __func__, __LINE__);
647 goto err;
648 }
649
650 if (attr->attr.max_wr > ctx->max_srq_recv_wr) {
651 fprintf(stderr, "%s-%d:max_wr %d, max_srq_recv_wr %d\n", __func__, __LINE__,
652 attr->attr.max_wr, ctx->max_srq_recv_wr);
653 errno = EINVAL;
654 goto err_spl;
655 }
656
657 /*
658 * this calculation does not consider required control segments. The
659 * final calculation is done again later. This is done so to avoid
660 * overflows of variables
661 */
662 max_sge = ctx->max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg);
663 if (attr->attr.max_sge > max_sge) {
664 fprintf(stderr, "%s-%d:max_wr %d, max_srq_recv_wr %d\n", __func__, __LINE__,
665 attr->attr.max_wr, ctx->max_srq_recv_wr);
666 errno = EINVAL;
667 goto err_spl;
668 }
669
670 srq->max = align_queue_size(attr->attr.max_wr + 1);
671 srq->max_gs = attr->attr.max_sge;
672 srq->counter = 0;
673
674 if (mlx5_alloc_srq_buf(pd->context, srq)) {
675 fprintf(stderr, "%s-%d:\n", __func__, __LINE__);
676 goto err_spl;
677 }
678
679 srq->db = mlx5_alloc_dbrec(to_mctx(pd->context));
680 if (!srq->db) {
681 fprintf(stderr, "%s-%d:\n", __func__, __LINE__);
682 goto err_free;
683 }
684
685 *srq->db = 0;
686
687 cmd.buf_addr = (uintptr_t) srq->buf.buf;
688 cmd.db_addr = (uintptr_t) srq->db;
689 srq->wq_sig = srq_sig_enabled();
690 if (srq->wq_sig)
691 cmd.flags = MLX5_SRQ_FLAG_SIGNATURE;
692
693 attr->attr.max_sge = srq->max_gs;
694 pthread_mutex_lock(&ctx->srq_table_mutex);
695 ret = ibv_cmd_create_srq(pd, ibsrq, attr, &cmd.ibv_cmd, sizeof(cmd),
696 &resp.ibv_resp, sizeof(resp));
697 if (ret)
698 goto err_db;
699
700 ret = mlx5_store_srq(ctx, resp.srqn, srq);
701 if (ret)
702 goto err_destroy;
703
704 pthread_mutex_unlock(&ctx->srq_table_mutex);
705
706 srq->srqn = resp.srqn;
707 srq->rsc.rsn = resp.srqn;
708 srq->rsc.type = MLX5_RSC_TYPE_SRQ;
709
710 return ibsrq;
711
712 err_destroy:
713 ibv_cmd_destroy_srq(ibsrq);
714
715 err_db:
716 pthread_mutex_unlock(&ctx->srq_table_mutex);
717 mlx5_free_db(to_mctx(pd->context), srq->db);
718
719 err_free:
720 free(srq->wrid);
721 mlx5_free_buf(&srq->buf);
722
723 err_spl:
724 mlx5_spinlock_destroy(&srq->lock);
725
726 err:
727 free(srq);
728
729 return NULL;
730 }
731
mlx5_modify_srq(struct ibv_srq * srq,struct ibv_srq_attr * attr,int attr_mask)732 int mlx5_modify_srq(struct ibv_srq *srq,
733 struct ibv_srq_attr *attr,
734 int attr_mask)
735 {
736 struct ibv_modify_srq cmd;
737
738 return ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof cmd);
739 }
740
mlx5_query_srq(struct ibv_srq * srq,struct ibv_srq_attr * attr)741 int mlx5_query_srq(struct ibv_srq *srq,
742 struct ibv_srq_attr *attr)
743 {
744 struct ibv_query_srq cmd;
745
746 return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd);
747 }
748
mlx5_destroy_srq(struct ibv_srq * srq)749 int mlx5_destroy_srq(struct ibv_srq *srq)
750 {
751 int ret;
752 struct mlx5_srq *msrq = to_msrq(srq);
753 struct mlx5_context *ctx = to_mctx(srq->context);
754
755 ret = ibv_cmd_destroy_srq(srq);
756 if (ret)
757 return ret;
758
759 if (ctx->cqe_version && msrq->rsc.type == MLX5_RSC_TYPE_XSRQ)
760 mlx5_clear_uidx(ctx, msrq->rsc.rsn);
761 else
762 mlx5_clear_srq(ctx, msrq->srqn);
763
764 mlx5_free_db(ctx, msrq->db);
765 mlx5_free_buf(&msrq->buf);
766 free(msrq->wrid);
767 mlx5_spinlock_destroy(&msrq->lock);
768 free(msrq);
769
770 return 0;
771 }
772
sq_overhead(enum ibv_qp_type qp_type)773 static int sq_overhead(enum ibv_qp_type qp_type)
774 {
775 size_t size = 0;
776 size_t mw_bind_size =
777 sizeof(struct mlx5_wqe_umr_ctrl_seg) +
778 sizeof(struct mlx5_wqe_mkey_context_seg) +
779 max_t(size_t, sizeof(struct mlx5_wqe_umr_klm_seg), 64);
780
781 switch (qp_type) {
782 case IBV_QPT_RC:
783 size += sizeof(struct mlx5_wqe_ctrl_seg) +
784 max(sizeof(struct mlx5_wqe_atomic_seg) +
785 sizeof(struct mlx5_wqe_raddr_seg),
786 mw_bind_size);
787 break;
788
789 case IBV_QPT_UC:
790 size = sizeof(struct mlx5_wqe_ctrl_seg) +
791 max(sizeof(struct mlx5_wqe_raddr_seg),
792 mw_bind_size);
793 break;
794
795 case IBV_QPT_UD:
796 size = sizeof(struct mlx5_wqe_ctrl_seg) +
797 sizeof(struct mlx5_wqe_datagram_seg);
798 break;
799
800 case IBV_QPT_XRC_SEND:
801 size = sizeof(struct mlx5_wqe_ctrl_seg) + mw_bind_size;
802 SWITCH_FALLTHROUGH;
803
804 case IBV_QPT_XRC_RECV:
805 size = max(size, sizeof(struct mlx5_wqe_ctrl_seg) +
806 sizeof(struct mlx5_wqe_xrc_seg) +
807 sizeof(struct mlx5_wqe_raddr_seg));
808 break;
809
810 case IBV_QPT_RAW_PACKET:
811 size = sizeof(struct mlx5_wqe_ctrl_seg) +
812 sizeof(struct mlx5_wqe_eth_seg);
813 break;
814
815 default:
816 return -EINVAL;
817 }
818
819 return size;
820 }
821
mlx5_calc_send_wqe(struct mlx5_context * ctx,struct ibv_qp_init_attr_ex * attr,struct mlx5_qp * qp)822 static int mlx5_calc_send_wqe(struct mlx5_context *ctx,
823 struct ibv_qp_init_attr_ex *attr,
824 struct mlx5_qp *qp)
825 {
826 int size;
827 int inl_size = 0;
828 int max_gather;
829 int tot_size;
830
831 size = sq_overhead(attr->qp_type);
832 if (size < 0)
833 return size;
834
835 if (attr->cap.max_inline_data) {
836 inl_size = size + align(sizeof(struct mlx5_wqe_inl_data_seg) +
837 attr->cap.max_inline_data, 16);
838 }
839
840 if (attr->comp_mask & IBV_QP_INIT_ATTR_MAX_TSO_HEADER) {
841 size += align(attr->max_tso_header, 16);
842 qp->max_tso_header = attr->max_tso_header;
843 }
844
845 max_gather = (ctx->max_sq_desc_sz - size) /
846 sizeof(struct mlx5_wqe_data_seg);
847 if (attr->cap.max_send_sge > max_gather)
848 return -EINVAL;
849
850 size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
851 tot_size = max_int(size, inl_size);
852
853 if (tot_size > ctx->max_sq_desc_sz)
854 return -EINVAL;
855
856 return align(tot_size, MLX5_SEND_WQE_BB);
857 }
858
mlx5_calc_rcv_wqe(struct mlx5_context * ctx,struct ibv_qp_init_attr_ex * attr,struct mlx5_qp * qp)859 static int mlx5_calc_rcv_wqe(struct mlx5_context *ctx,
860 struct ibv_qp_init_attr_ex *attr,
861 struct mlx5_qp *qp)
862 {
863 uint32_t size;
864 int num_scatter;
865
866 if (attr->srq)
867 return 0;
868
869 num_scatter = max_t(uint32_t, attr->cap.max_recv_sge, 1);
870 size = sizeof(struct mlx5_wqe_data_seg) * num_scatter;
871 if (qp->wq_sig)
872 size += sizeof(struct mlx5_rwqe_sig);
873
874 if (size > ctx->max_rq_desc_sz)
875 return -EINVAL;
876
877 size = mlx5_round_up_power_of_two(size);
878
879 return size;
880 }
881
mlx5_calc_sq_size(struct mlx5_context * ctx,struct ibv_qp_init_attr_ex * attr,struct mlx5_qp * qp)882 static int mlx5_calc_sq_size(struct mlx5_context *ctx,
883 struct ibv_qp_init_attr_ex *attr,
884 struct mlx5_qp *qp)
885 {
886 int wqe_size;
887 int wq_size;
888 FILE *fp = ctx->dbg_fp;
889
890 if (!attr->cap.max_send_wr)
891 return 0;
892
893 wqe_size = mlx5_calc_send_wqe(ctx, attr, qp);
894 if (wqe_size < 0) {
895 mlx5_dbg(fp, MLX5_DBG_QP, "\n");
896 return wqe_size;
897 }
898
899 if (wqe_size > ctx->max_sq_desc_sz) {
900 mlx5_dbg(fp, MLX5_DBG_QP, "\n");
901 return -EINVAL;
902 }
903
904 qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) -
905 sizeof(struct mlx5_wqe_inl_data_seg);
906 attr->cap.max_inline_data = qp->max_inline_data;
907
908 /*
909 * to avoid overflow, we limit max_send_wr so
910 * that the multiplication will fit in int
911 */
912 if (attr->cap.max_send_wr > 0x7fffffff / ctx->max_sq_desc_sz) {
913 mlx5_dbg(fp, MLX5_DBG_QP, "\n");
914 return -EINVAL;
915 }
916
917 wq_size = mlx5_round_up_power_of_two(attr->cap.max_send_wr * wqe_size);
918 qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
919 if (qp->sq.wqe_cnt > ctx->max_send_wqebb) {
920 mlx5_dbg(fp, MLX5_DBG_QP, "\n");
921 return -EINVAL;
922 }
923
924 qp->sq.wqe_shift = mlx5_ilog2(MLX5_SEND_WQE_BB);
925 qp->sq.max_gs = attr->cap.max_send_sge;
926 qp->sq.max_post = wq_size / wqe_size;
927
928 return wq_size;
929 }
930
mlx5_calc_rwq_size(struct mlx5_context * ctx,struct mlx5_rwq * rwq,struct ibv_wq_init_attr * attr)931 static int mlx5_calc_rwq_size(struct mlx5_context *ctx,
932 struct mlx5_rwq *rwq,
933 struct ibv_wq_init_attr *attr)
934 {
935 size_t wqe_size;
936 int wq_size;
937 uint32_t num_scatter;
938 int scat_spc;
939
940 if (!attr->max_wr)
941 return -EINVAL;
942
943 /* TBD: check caps for RQ */
944 num_scatter = max_t(uint32_t, attr->max_sge, 1);
945 wqe_size = sizeof(struct mlx5_wqe_data_seg) * num_scatter;
946
947 if (rwq->wq_sig)
948 wqe_size += sizeof(struct mlx5_rwqe_sig);
949
950 if (wqe_size <= 0 || wqe_size > ctx->max_rq_desc_sz)
951 return -EINVAL;
952
953 wqe_size = mlx5_round_up_power_of_two(wqe_size);
954 wq_size = mlx5_round_up_power_of_two(attr->max_wr) * wqe_size;
955 wq_size = max(wq_size, MLX5_SEND_WQE_BB);
956 rwq->rq.wqe_cnt = wq_size / wqe_size;
957 rwq->rq.wqe_shift = mlx5_ilog2(wqe_size);
958 rwq->rq.max_post = 1 << mlx5_ilog2(wq_size / wqe_size);
959 scat_spc = wqe_size -
960 ((rwq->wq_sig) ? sizeof(struct mlx5_rwqe_sig) : 0);
961 rwq->rq.max_gs = scat_spc / sizeof(struct mlx5_wqe_data_seg);
962 return wq_size;
963 }
964
mlx5_calc_rq_size(struct mlx5_context * ctx,struct ibv_qp_init_attr_ex * attr,struct mlx5_qp * qp)965 static int mlx5_calc_rq_size(struct mlx5_context *ctx,
966 struct ibv_qp_init_attr_ex *attr,
967 struct mlx5_qp *qp)
968 {
969 int wqe_size;
970 int wq_size;
971 int scat_spc;
972 FILE *fp = ctx->dbg_fp;
973
974 if (!attr->cap.max_recv_wr)
975 return 0;
976
977 if (attr->cap.max_recv_wr > ctx->max_recv_wr) {
978 mlx5_dbg(fp, MLX5_DBG_QP, "\n");
979 return -EINVAL;
980 }
981
982 wqe_size = mlx5_calc_rcv_wqe(ctx, attr, qp);
983 if (wqe_size < 0 || wqe_size > ctx->max_rq_desc_sz) {
984 mlx5_dbg(fp, MLX5_DBG_QP, "\n");
985 return -EINVAL;
986 }
987
988 wq_size = mlx5_round_up_power_of_two(attr->cap.max_recv_wr) * wqe_size;
989 if (wqe_size) {
990 wq_size = max(wq_size, MLX5_SEND_WQE_BB);
991 qp->rq.wqe_cnt = wq_size / wqe_size;
992 qp->rq.wqe_shift = mlx5_ilog2(wqe_size);
993 qp->rq.max_post = 1 << mlx5_ilog2(wq_size / wqe_size);
994 scat_spc = wqe_size -
995 (qp->wq_sig ? sizeof(struct mlx5_rwqe_sig) : 0);
996 qp->rq.max_gs = scat_spc / sizeof(struct mlx5_wqe_data_seg);
997 } else {
998 qp->rq.wqe_cnt = 0;
999 qp->rq.wqe_shift = 0;
1000 qp->rq.max_post = 0;
1001 qp->rq.max_gs = 0;
1002 }
1003 return wq_size;
1004 }
1005
mlx5_calc_wq_size(struct mlx5_context * ctx,struct ibv_qp_init_attr_ex * attr,struct mlx5_qp * qp)1006 static int mlx5_calc_wq_size(struct mlx5_context *ctx,
1007 struct ibv_qp_init_attr_ex *attr,
1008 struct mlx5_qp *qp)
1009 {
1010 int ret;
1011 int result;
1012
1013 ret = mlx5_calc_sq_size(ctx, attr, qp);
1014 if (ret < 0)
1015 return ret;
1016
1017 result = ret;
1018 ret = mlx5_calc_rq_size(ctx, attr, qp);
1019 if (ret < 0)
1020 return ret;
1021
1022 result += ret;
1023
1024 qp->sq.offset = ret;
1025 qp->rq.offset = 0;
1026
1027 return result;
1028 }
1029
map_uuar(struct ibv_context * context,struct mlx5_qp * qp,int uuar_index)1030 static void map_uuar(struct ibv_context *context, struct mlx5_qp *qp,
1031 int uuar_index)
1032 {
1033 struct mlx5_context *ctx = to_mctx(context);
1034
1035 qp->bf = &ctx->bfs[uuar_index];
1036 }
1037
qptype2key(enum ibv_qp_type type)1038 static const char *qptype2key(enum ibv_qp_type type)
1039 {
1040 switch (type) {
1041 case IBV_QPT_RC: return "HUGE_RC";
1042 case IBV_QPT_UC: return "HUGE_UC";
1043 case IBV_QPT_UD: return "HUGE_UD";
1044 case IBV_QPT_RAW_PACKET: return "HUGE_RAW_ETH";
1045 default: return "HUGE_NA";
1046 }
1047 }
1048
mlx5_alloc_qp_buf(struct ibv_context * context,struct ibv_qp_init_attr_ex * attr,struct mlx5_qp * qp,int size)1049 static int mlx5_alloc_qp_buf(struct ibv_context *context,
1050 struct ibv_qp_init_attr_ex *attr,
1051 struct mlx5_qp *qp,
1052 int size)
1053 {
1054 int err;
1055 enum mlx5_alloc_type alloc_type;
1056 enum mlx5_alloc_type default_alloc_type = MLX5_ALLOC_TYPE_ANON;
1057 const char *qp_huge_key;
1058
1059 if (qp->sq.wqe_cnt) {
1060 qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid));
1061 if (!qp->sq.wrid) {
1062 errno = ENOMEM;
1063 err = -1;
1064 return err;
1065 }
1066
1067 qp->sq.wr_data = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data));
1068 if (!qp->sq.wr_data) {
1069 errno = ENOMEM;
1070 err = -1;
1071 goto ex_wrid;
1072 }
1073 }
1074
1075 qp->sq.wqe_head = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head));
1076 if (!qp->sq.wqe_head) {
1077 errno = ENOMEM;
1078 err = -1;
1079 goto ex_wrid;
1080 }
1081
1082 if (qp->rq.wqe_cnt) {
1083 qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof(uint64_t));
1084 if (!qp->rq.wrid) {
1085 errno = ENOMEM;
1086 err = -1;
1087 goto ex_wrid;
1088 }
1089 }
1090
1091 /* compatibility support */
1092 qp_huge_key = qptype2key(qp->ibv_qp->qp_type);
1093 if (mlx5_use_huge(qp_huge_key))
1094 default_alloc_type = MLX5_ALLOC_TYPE_HUGE;
1095
1096 mlx5_get_alloc_type(MLX5_QP_PREFIX, &alloc_type,
1097 default_alloc_type);
1098
1099 err = mlx5_alloc_prefered_buf(to_mctx(context), &qp->buf,
1100 align(qp->buf_size, to_mdev
1101 (context->device)->page_size),
1102 to_mdev(context->device)->page_size,
1103 alloc_type,
1104 MLX5_QP_PREFIX);
1105
1106 if (err) {
1107 err = -ENOMEM;
1108 goto ex_wrid;
1109 }
1110
1111 memset(qp->buf.buf, 0, qp->buf_size);
1112
1113 if (attr->qp_type == IBV_QPT_RAW_PACKET) {
1114 size_t aligned_sq_buf_size = align(qp->sq_buf_size,
1115 to_mdev(context->device)->page_size);
1116 /* For Raw Packet QP, allocate a separate buffer for the SQ */
1117 err = mlx5_alloc_prefered_buf(to_mctx(context), &qp->sq_buf,
1118 aligned_sq_buf_size,
1119 to_mdev(context->device)->page_size,
1120 alloc_type,
1121 MLX5_QP_PREFIX);
1122 if (err) {
1123 err = -ENOMEM;
1124 goto rq_buf;
1125 }
1126
1127 memset(qp->sq_buf.buf, 0, aligned_sq_buf_size);
1128 }
1129
1130 return 0;
1131 rq_buf:
1132 mlx5_free_actual_buf(to_mctx(qp->verbs_qp.qp.context), &qp->buf);
1133 ex_wrid:
1134 if (qp->rq.wrid)
1135 free(qp->rq.wrid);
1136
1137 if (qp->sq.wqe_head)
1138 free(qp->sq.wqe_head);
1139
1140 if (qp->sq.wr_data)
1141 free(qp->sq.wr_data);
1142 if (qp->sq.wrid)
1143 free(qp->sq.wrid);
1144
1145 return err;
1146 }
1147
mlx5_free_qp_buf(struct mlx5_qp * qp)1148 static void mlx5_free_qp_buf(struct mlx5_qp *qp)
1149 {
1150 struct mlx5_context *ctx = to_mctx(qp->ibv_qp->context);
1151
1152 mlx5_free_actual_buf(ctx, &qp->buf);
1153
1154 if (qp->sq_buf.buf)
1155 mlx5_free_actual_buf(ctx, &qp->sq_buf);
1156
1157 if (qp->rq.wrid)
1158 free(qp->rq.wrid);
1159
1160 if (qp->sq.wqe_head)
1161 free(qp->sq.wqe_head);
1162
1163 if (qp->sq.wrid)
1164 free(qp->sq.wrid);
1165
1166 if (qp->sq.wr_data)
1167 free(qp->sq.wr_data);
1168 }
1169
mlx5_cmd_create_rss_qp(struct ibv_context * context,struct ibv_qp_init_attr_ex * attr,struct mlx5_qp * qp)1170 static int mlx5_cmd_create_rss_qp(struct ibv_context *context,
1171 struct ibv_qp_init_attr_ex *attr,
1172 struct mlx5_qp *qp)
1173 {
1174 struct mlx5_create_qp_ex_rss cmd_ex_rss = {};
1175 struct mlx5_create_qp_resp_ex resp = {};
1176 int ret;
1177
1178 if (attr->rx_hash_conf.rx_hash_key_len > sizeof(cmd_ex_rss.rx_hash_key)) {
1179 errno = EINVAL;
1180 return errno;
1181 }
1182
1183 cmd_ex_rss.rx_hash_fields_mask = attr->rx_hash_conf.rx_hash_fields_mask;
1184 cmd_ex_rss.rx_hash_function = attr->rx_hash_conf.rx_hash_function;
1185 cmd_ex_rss.rx_key_len = attr->rx_hash_conf.rx_hash_key_len;
1186 memcpy(cmd_ex_rss.rx_hash_key, attr->rx_hash_conf.rx_hash_key,
1187 attr->rx_hash_conf.rx_hash_key_len);
1188
1189 ret = ibv_cmd_create_qp_ex2(context, &qp->verbs_qp,
1190 sizeof(qp->verbs_qp), attr,
1191 &cmd_ex_rss.ibv_cmd, sizeof(cmd_ex_rss.ibv_cmd),
1192 sizeof(cmd_ex_rss), &resp.ibv_resp,
1193 sizeof(resp.ibv_resp), sizeof(resp));
1194 if (ret)
1195 return ret;
1196
1197 qp->rss_qp = 1;
1198 return 0;
1199 }
1200
mlx5_cmd_create_qp_ex(struct ibv_context * context,struct ibv_qp_init_attr_ex * attr,struct mlx5_create_qp * cmd,struct mlx5_qp * qp,struct mlx5_create_qp_resp_ex * resp)1201 static int mlx5_cmd_create_qp_ex(struct ibv_context *context,
1202 struct ibv_qp_init_attr_ex *attr,
1203 struct mlx5_create_qp *cmd,
1204 struct mlx5_qp *qp,
1205 struct mlx5_create_qp_resp_ex *resp)
1206 {
1207 struct mlx5_create_qp_ex cmd_ex;
1208 int ret;
1209
1210 memset(&cmd_ex, 0, sizeof(cmd_ex));
1211 memcpy(&cmd_ex.ibv_cmd.base, &cmd->ibv_cmd.user_handle,
1212 offsetof(typeof(cmd->ibv_cmd), is_srq) +
1213 sizeof(cmd->ibv_cmd.is_srq) -
1214 offsetof(typeof(cmd->ibv_cmd), user_handle));
1215
1216 memcpy(&cmd_ex.drv_ex, &cmd->buf_addr,
1217 offsetof(typeof(*cmd), sq_buf_addr) +
1218 sizeof(cmd->sq_buf_addr) - sizeof(cmd->ibv_cmd));
1219
1220 ret = ibv_cmd_create_qp_ex2(context, &qp->verbs_qp,
1221 sizeof(qp->verbs_qp), attr,
1222 &cmd_ex.ibv_cmd, sizeof(cmd_ex.ibv_cmd),
1223 sizeof(cmd_ex), &resp->ibv_resp,
1224 sizeof(resp->ibv_resp), sizeof(*resp));
1225
1226 return ret;
1227 }
1228
1229 enum {
1230 MLX5_CREATE_QP_SUP_COMP_MASK = (IBV_QP_INIT_ATTR_PD |
1231 IBV_QP_INIT_ATTR_XRCD |
1232 IBV_QP_INIT_ATTR_CREATE_FLAGS |
1233 IBV_QP_INIT_ATTR_MAX_TSO_HEADER |
1234 IBV_QP_INIT_ATTR_IND_TABLE |
1235 IBV_QP_INIT_ATTR_RX_HASH),
1236 };
1237
1238 enum {
1239 MLX5_CREATE_QP_EX2_COMP_MASK = (IBV_QP_INIT_ATTR_CREATE_FLAGS |
1240 IBV_QP_INIT_ATTR_MAX_TSO_HEADER |
1241 IBV_QP_INIT_ATTR_IND_TABLE |
1242 IBV_QP_INIT_ATTR_RX_HASH),
1243 };
1244
create_qp(struct ibv_context * context,struct ibv_qp_init_attr_ex * attr)1245 static struct ibv_qp *create_qp(struct ibv_context *context,
1246 struct ibv_qp_init_attr_ex *attr)
1247 {
1248 struct mlx5_create_qp cmd;
1249 struct mlx5_create_qp_resp resp;
1250 struct mlx5_create_qp_resp_ex resp_ex;
1251 struct mlx5_qp *qp;
1252 int ret;
1253 struct mlx5_context *ctx = to_mctx(context);
1254 struct ibv_qp *ibqp;
1255 int32_t usr_idx = 0;
1256 uint32_t uuar_index;
1257 FILE *fp = ctx->dbg_fp;
1258
1259 if (attr->comp_mask & ~MLX5_CREATE_QP_SUP_COMP_MASK)
1260 return NULL;
1261
1262 if ((attr->comp_mask & IBV_QP_INIT_ATTR_MAX_TSO_HEADER) &&
1263 (attr->qp_type != IBV_QPT_RAW_PACKET))
1264 return NULL;
1265
1266 qp = calloc(1, sizeof(*qp));
1267 if (!qp) {
1268 mlx5_dbg(fp, MLX5_DBG_QP, "\n");
1269 return NULL;
1270 }
1271 ibqp = (struct ibv_qp *)&qp->verbs_qp;
1272 qp->ibv_qp = ibqp;
1273
1274 memset(&cmd, 0, sizeof(cmd));
1275 memset(&resp, 0, sizeof(resp));
1276 memset(&resp_ex, 0, sizeof(resp_ex));
1277
1278 if (attr->comp_mask & IBV_QP_INIT_ATTR_RX_HASH) {
1279 ret = mlx5_cmd_create_rss_qp(context, attr, qp);
1280 if (ret)
1281 goto err;
1282
1283 return ibqp;
1284 }
1285
1286 qp->wq_sig = qp_sig_enabled();
1287 if (qp->wq_sig)
1288 cmd.flags |= MLX5_QP_FLAG_SIGNATURE;
1289
1290 if (use_scatter_to_cqe())
1291 cmd.flags |= MLX5_QP_FLAG_SCATTER_CQE;
1292
1293 ret = mlx5_calc_wq_size(ctx, attr, qp);
1294 if (ret < 0) {
1295 errno = -ret;
1296 goto err;
1297 }
1298
1299 if (attr->qp_type == IBV_QPT_RAW_PACKET) {
1300 qp->buf_size = qp->sq.offset;
1301 qp->sq_buf_size = ret - qp->buf_size;
1302 qp->sq.offset = 0;
1303 } else {
1304 qp->buf_size = ret;
1305 qp->sq_buf_size = 0;
1306 }
1307
1308 if (mlx5_alloc_qp_buf(context, attr, qp, ret)) {
1309 mlx5_dbg(fp, MLX5_DBG_QP, "\n");
1310 goto err;
1311 }
1312
1313 if (attr->qp_type == IBV_QPT_RAW_PACKET) {
1314 qp->sq_start = qp->sq_buf.buf;
1315 qp->sq.qend = qp->sq_buf.buf +
1316 (qp->sq.wqe_cnt << qp->sq.wqe_shift);
1317 } else {
1318 qp->sq_start = qp->buf.buf + qp->sq.offset;
1319 qp->sq.qend = qp->buf.buf + qp->sq.offset +
1320 (qp->sq.wqe_cnt << qp->sq.wqe_shift);
1321 }
1322
1323 mlx5_init_qp_indices(qp);
1324
1325 if (mlx5_spinlock_init(&qp->sq.lock))
1326 goto err_free_qp_buf;
1327
1328 if (mlx5_spinlock_init(&qp->rq.lock))
1329 goto err_sq_spl;
1330
1331 qp->db = mlx5_alloc_dbrec(ctx);
1332 if (!qp->db) {
1333 mlx5_dbg(fp, MLX5_DBG_QP, "\n");
1334 goto err_rq_spl;
1335 }
1336
1337 qp->db[MLX5_RCV_DBR] = 0;
1338 qp->db[MLX5_SND_DBR] = 0;
1339
1340 cmd.buf_addr = (uintptr_t) qp->buf.buf;
1341 cmd.sq_buf_addr = (attr->qp_type == IBV_QPT_RAW_PACKET) ?
1342 (uintptr_t) qp->sq_buf.buf : 0;
1343 cmd.db_addr = (uintptr_t) qp->db;
1344 cmd.sq_wqe_count = qp->sq.wqe_cnt;
1345 cmd.rq_wqe_count = qp->rq.wqe_cnt;
1346 cmd.rq_wqe_shift = qp->rq.wqe_shift;
1347
1348 if (ctx->atomic_cap == IBV_ATOMIC_HCA)
1349 qp->atomics_enabled = 1;
1350
1351 if (!ctx->cqe_version) {
1352 cmd.uidx = 0xffffff;
1353 pthread_mutex_lock(&ctx->qp_table_mutex);
1354 } else if (!is_xrc_tgt(attr->qp_type)) {
1355 usr_idx = mlx5_store_uidx(ctx, qp);
1356 if (usr_idx < 0) {
1357 mlx5_dbg(fp, MLX5_DBG_QP, "Couldn't find free user index\n");
1358 goto err_rq_db;
1359 }
1360
1361 cmd.uidx = usr_idx;
1362 }
1363
1364 if (attr->comp_mask & MLX5_CREATE_QP_EX2_COMP_MASK)
1365 ret = mlx5_cmd_create_qp_ex(context, attr, &cmd, qp, &resp_ex);
1366 else
1367 ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp, sizeof(qp->verbs_qp),
1368 attr, &cmd.ibv_cmd, sizeof(cmd),
1369 &resp.ibv_resp, sizeof(resp));
1370 if (ret) {
1371 mlx5_dbg(fp, MLX5_DBG_QP, "ret %d\n", ret);
1372 goto err_free_uidx;
1373 }
1374
1375 uuar_index = (attr->comp_mask & MLX5_CREATE_QP_EX2_COMP_MASK) ?
1376 resp_ex.uuar_index : resp.uuar_index;
1377 if (!ctx->cqe_version) {
1378 if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) {
1379 ret = mlx5_store_qp(ctx, ibqp->qp_num, qp);
1380 if (ret) {
1381 mlx5_dbg(fp, MLX5_DBG_QP, "ret %d\n", ret);
1382 goto err_destroy;
1383 }
1384 }
1385
1386 pthread_mutex_unlock(&ctx->qp_table_mutex);
1387 }
1388
1389 map_uuar(context, qp, uuar_index);
1390
1391 qp->rq.max_post = qp->rq.wqe_cnt;
1392 if (attr->sq_sig_all)
1393 qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
1394 else
1395 qp->sq_signal_bits = 0;
1396
1397 attr->cap.max_send_wr = qp->sq.max_post;
1398 attr->cap.max_recv_wr = qp->rq.max_post;
1399 attr->cap.max_recv_sge = qp->rq.max_gs;
1400
1401 qp->rsc.type = MLX5_RSC_TYPE_QP;
1402 qp->rsc.rsn = (ctx->cqe_version && !is_xrc_tgt(attr->qp_type)) ?
1403 usr_idx : ibqp->qp_num;
1404
1405 return ibqp;
1406
1407 err_destroy:
1408 ibv_cmd_destroy_qp(ibqp);
1409
1410 err_free_uidx:
1411 if (!ctx->cqe_version)
1412 pthread_mutex_unlock(&to_mctx(context)->qp_table_mutex);
1413 else if (!is_xrc_tgt(attr->qp_type))
1414 mlx5_clear_uidx(ctx, usr_idx);
1415
1416 err_rq_db:
1417 mlx5_free_db(to_mctx(context), qp->db);
1418
1419 err_rq_spl:
1420 mlx5_spinlock_destroy(&qp->rq.lock);
1421
1422 err_sq_spl:
1423 mlx5_spinlock_destroy(&qp->sq.lock);
1424
1425 err_free_qp_buf:
1426 mlx5_free_qp_buf(qp);
1427
1428 err:
1429 free(qp);
1430
1431 return NULL;
1432 }
1433
mlx5_create_qp(struct ibv_pd * pd,struct ibv_qp_init_attr * attr)1434 struct ibv_qp *mlx5_create_qp(struct ibv_pd *pd,
1435 struct ibv_qp_init_attr *attr)
1436 {
1437 struct ibv_qp *qp;
1438 struct ibv_qp_init_attr_ex attrx;
1439
1440 memset(&attrx, 0, sizeof(attrx));
1441 memcpy(&attrx, attr, sizeof(*attr));
1442 attrx.comp_mask = IBV_QP_INIT_ATTR_PD;
1443 attrx.pd = pd;
1444 qp = create_qp(pd->context, &attrx);
1445 if (qp)
1446 memcpy(attr, &attrx, sizeof(*attr));
1447
1448 return qp;
1449 }
1450
mlx5_lock_cqs(struct ibv_qp * qp)1451 static void mlx5_lock_cqs(struct ibv_qp *qp)
1452 {
1453 struct mlx5_cq *send_cq = to_mcq(qp->send_cq);
1454 struct mlx5_cq *recv_cq = to_mcq(qp->recv_cq);
1455
1456 if (send_cq && recv_cq) {
1457 if (send_cq == recv_cq) {
1458 mlx5_spin_lock(&send_cq->lock);
1459 } else if (send_cq->cqn < recv_cq->cqn) {
1460 mlx5_spin_lock(&send_cq->lock);
1461 mlx5_spin_lock(&recv_cq->lock);
1462 } else {
1463 mlx5_spin_lock(&recv_cq->lock);
1464 mlx5_spin_lock(&send_cq->lock);
1465 }
1466 } else if (send_cq) {
1467 mlx5_spin_lock(&send_cq->lock);
1468 } else if (recv_cq) {
1469 mlx5_spin_lock(&recv_cq->lock);
1470 }
1471 }
1472
mlx5_unlock_cqs(struct ibv_qp * qp)1473 static void mlx5_unlock_cqs(struct ibv_qp *qp)
1474 {
1475 struct mlx5_cq *send_cq = to_mcq(qp->send_cq);
1476 struct mlx5_cq *recv_cq = to_mcq(qp->recv_cq);
1477
1478 if (send_cq && recv_cq) {
1479 if (send_cq == recv_cq) {
1480 mlx5_spin_unlock(&send_cq->lock);
1481 } else if (send_cq->cqn < recv_cq->cqn) {
1482 mlx5_spin_unlock(&recv_cq->lock);
1483 mlx5_spin_unlock(&send_cq->lock);
1484 } else {
1485 mlx5_spin_unlock(&send_cq->lock);
1486 mlx5_spin_unlock(&recv_cq->lock);
1487 }
1488 } else if (send_cq) {
1489 mlx5_spin_unlock(&send_cq->lock);
1490 } else if (recv_cq) {
1491 mlx5_spin_unlock(&recv_cq->lock);
1492 }
1493 }
1494
mlx5_destroy_qp(struct ibv_qp * ibqp)1495 int mlx5_destroy_qp(struct ibv_qp *ibqp)
1496 {
1497 struct mlx5_qp *qp = to_mqp(ibqp);
1498 struct mlx5_context *ctx = to_mctx(ibqp->context);
1499 int ret;
1500
1501 if (qp->rss_qp) {
1502 ret = ibv_cmd_destroy_qp(ibqp);
1503 if (ret)
1504 return ret;
1505 goto free;
1506 }
1507
1508 if (!ctx->cqe_version)
1509 pthread_mutex_lock(&ctx->qp_table_mutex);
1510
1511 ret = ibv_cmd_destroy_qp(ibqp);
1512 if (ret) {
1513 if (!ctx->cqe_version)
1514 pthread_mutex_unlock(&ctx->qp_table_mutex);
1515 return ret;
1516 }
1517
1518 mlx5_lock_cqs(ibqp);
1519
1520 __mlx5_cq_clean(to_mcq(ibqp->recv_cq), qp->rsc.rsn,
1521 ibqp->srq ? to_msrq(ibqp->srq) : NULL);
1522 if (ibqp->send_cq != ibqp->recv_cq)
1523 __mlx5_cq_clean(to_mcq(ibqp->send_cq), qp->rsc.rsn, NULL);
1524
1525 if (!ctx->cqe_version) {
1526 if (qp->sq.wqe_cnt || qp->rq.wqe_cnt)
1527 mlx5_clear_qp(ctx, ibqp->qp_num);
1528 }
1529
1530 mlx5_unlock_cqs(ibqp);
1531 if (!ctx->cqe_version)
1532 pthread_mutex_unlock(&ctx->qp_table_mutex);
1533 else if (!is_xrc_tgt(ibqp->qp_type))
1534 mlx5_clear_uidx(ctx, qp->rsc.rsn);
1535
1536 mlx5_free_db(ctx, qp->db);
1537 mlx5_spinlock_destroy(&qp->rq.lock);
1538 mlx5_spinlock_destroy(&qp->sq.lock);
1539 mlx5_free_qp_buf(qp);
1540 free:
1541 free(qp);
1542
1543 return 0;
1544 }
1545
mlx5_query_qp(struct ibv_qp * ibqp,struct ibv_qp_attr * attr,int attr_mask,struct ibv_qp_init_attr * init_attr)1546 int mlx5_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr,
1547 int attr_mask, struct ibv_qp_init_attr *init_attr)
1548 {
1549 struct ibv_query_qp cmd;
1550 struct mlx5_qp *qp = to_mqp(ibqp);
1551 int ret;
1552
1553 if (qp->rss_qp)
1554 return ENOSYS;
1555
1556 ret = ibv_cmd_query_qp(ibqp, attr, attr_mask, init_attr, &cmd, sizeof(cmd));
1557 if (ret)
1558 return ret;
1559
1560 init_attr->cap.max_send_wr = qp->sq.max_post;
1561 init_attr->cap.max_send_sge = qp->sq.max_gs;
1562 init_attr->cap.max_inline_data = qp->max_inline_data;
1563
1564 attr->cap = init_attr->cap;
1565
1566 return 0;
1567 }
1568
1569 enum {
1570 MLX5_MODIFY_QP_EX_ATTR_MASK = IBV_QP_RATE_LIMIT,
1571 };
1572
mlx5_modify_qp(struct ibv_qp * qp,struct ibv_qp_attr * attr,int attr_mask)1573 int mlx5_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
1574 int attr_mask)
1575 {
1576 struct ibv_modify_qp cmd = {};
1577 struct ibv_modify_qp_ex cmd_ex = {};
1578 struct ibv_modify_qp_resp_ex resp = {};
1579 struct mlx5_qp *mqp = to_mqp(qp);
1580 struct mlx5_context *context = to_mctx(qp->context);
1581 int ret;
1582 uint32_t *db;
1583
1584 if (mqp->rss_qp)
1585 return ENOSYS;
1586
1587 if (attr_mask & IBV_QP_PORT) {
1588 switch (qp->qp_type) {
1589 case IBV_QPT_RAW_PACKET:
1590 if (context->cached_link_layer[attr->port_num - 1] ==
1591 IBV_LINK_LAYER_ETHERNET) {
1592 if (context->cached_device_cap_flags &
1593 IBV_DEVICE_RAW_IP_CSUM)
1594 mqp->qp_cap_cache |=
1595 MLX5_CSUM_SUPPORT_RAW_OVER_ETH |
1596 MLX5_RX_CSUM_VALID;
1597
1598 if (ibv_is_qpt_supported(
1599 context->cached_tso_caps.supported_qpts,
1600 IBV_QPT_RAW_PACKET))
1601 mqp->max_tso =
1602 context->cached_tso_caps.max_tso;
1603 }
1604 break;
1605 default:
1606 break;
1607 }
1608 }
1609
1610 if (attr_mask & MLX5_MODIFY_QP_EX_ATTR_MASK)
1611 ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask,
1612 &cmd_ex,
1613 sizeof(cmd_ex), sizeof(cmd_ex),
1614 &resp,
1615 sizeof(resp), sizeof(resp));
1616 else
1617 ret = ibv_cmd_modify_qp(qp, attr, attr_mask,
1618 &cmd, sizeof(cmd));
1619
1620 if (!ret &&
1621 (attr_mask & IBV_QP_STATE) &&
1622 attr->qp_state == IBV_QPS_RESET) {
1623 if (qp->recv_cq) {
1624 mlx5_cq_clean(to_mcq(qp->recv_cq), mqp->rsc.rsn,
1625 qp->srq ? to_msrq(qp->srq) : NULL);
1626 }
1627 if (qp->send_cq != qp->recv_cq && qp->send_cq)
1628 mlx5_cq_clean(to_mcq(qp->send_cq),
1629 to_mqp(qp)->rsc.rsn, NULL);
1630
1631 mlx5_init_qp_indices(mqp);
1632 db = mqp->db;
1633 db[MLX5_RCV_DBR] = 0;
1634 db[MLX5_SND_DBR] = 0;
1635 }
1636
1637 /*
1638 * When the Raw Packet QP is in INIT state, its RQ
1639 * underneath is already in RDY, which means it can
1640 * receive packets. According to the IB spec, a QP can't
1641 * receive packets until moved to RTR state. To achieve this,
1642 * for Raw Packet QPs, we update the doorbell record
1643 * once the QP is moved to RTR.
1644 */
1645 if (!ret &&
1646 (attr_mask & IBV_QP_STATE) &&
1647 attr->qp_state == IBV_QPS_RTR &&
1648 qp->qp_type == IBV_QPT_RAW_PACKET) {
1649 mlx5_spin_lock(&mqp->rq.lock);
1650 mqp->db[MLX5_RCV_DBR] = htobe32(mqp->rq.head & 0xffff);
1651 mlx5_spin_unlock(&mqp->rq.lock);
1652 }
1653
1654 return ret;
1655 }
1656
1657 #define RROCE_UDP_SPORT_MIN 0xC000
1658 #define RROCE_UDP_SPORT_MAX 0xFFFF
mlx5_create_ah(struct ibv_pd * pd,struct ibv_ah_attr * attr)1659 struct ibv_ah *mlx5_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
1660 {
1661 struct mlx5_context *ctx = to_mctx(pd->context);
1662 struct ibv_port_attr port_attr;
1663 struct mlx5_ah *ah;
1664 uint32_t gid_type;
1665 uint32_t tmp;
1666 uint8_t grh;
1667 int is_eth;
1668
1669 if (attr->port_num < 1 || attr->port_num > ctx->num_ports)
1670 return NULL;
1671
1672 if (ctx->cached_link_layer[attr->port_num - 1]) {
1673 is_eth = ctx->cached_link_layer[attr->port_num - 1] ==
1674 IBV_LINK_LAYER_ETHERNET;
1675 } else {
1676 if (ibv_query_port(pd->context, attr->port_num, &port_attr))
1677 return NULL;
1678
1679 is_eth = (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET);
1680 }
1681
1682 if (unlikely((!attr->is_global) && is_eth)) {
1683 errno = EINVAL;
1684 return NULL;
1685 }
1686
1687 ah = calloc(1, sizeof *ah);
1688 if (!ah)
1689 return NULL;
1690
1691 if (is_eth) {
1692 if (ibv_query_gid_type(pd->context, attr->port_num,
1693 attr->grh.sgid_index, &gid_type))
1694 goto err;
1695
1696 if (gid_type == IBV_GID_TYPE_ROCE_V2)
1697 ah->av.rlid = htobe16(rand() % (RROCE_UDP_SPORT_MAX + 1
1698 - RROCE_UDP_SPORT_MIN)
1699 + RROCE_UDP_SPORT_MIN);
1700 /* Since RoCE packets must contain GRH, this bit is reserved
1701 * for RoCE and shouldn't be set.
1702 */
1703 grh = 0;
1704 } else {
1705 ah->av.fl_mlid = attr->src_path_bits & 0x7f;
1706 ah->av.rlid = htobe16(attr->dlid);
1707 grh = 1;
1708 }
1709 ah->av.stat_rate_sl = (attr->static_rate << 4) | attr->sl;
1710 if (attr->is_global) {
1711 ah->av.tclass = attr->grh.traffic_class;
1712 ah->av.hop_limit = attr->grh.hop_limit;
1713 tmp = htobe32((grh << 30) |
1714 ((attr->grh.sgid_index & 0xff) << 20) |
1715 (attr->grh.flow_label & 0xfffff));
1716 ah->av.grh_gid_fl = tmp;
1717 memcpy(ah->av.rgid, attr->grh.dgid.raw, 16);
1718 }
1719
1720 if (is_eth) {
1721 if (ctx->cmds_supp_uhw & MLX5_USER_CMDS_SUPP_UHW_CREATE_AH) {
1722 struct mlx5_create_ah_resp resp = {};
1723
1724 if (ibv_cmd_create_ah(pd, &ah->ibv_ah, attr, &resp.ibv_resp, sizeof(resp)))
1725 goto err;
1726
1727 ah->kern_ah = true;
1728 memcpy(ah->av.rmac, resp.dmac, ETHERNET_LL_SIZE);
1729 } else {
1730 uint16_t vid;
1731
1732 if (ibv_resolve_eth_l2_from_gid(pd->context, attr,
1733 ah->av.rmac, &vid))
1734 goto err;
1735 }
1736 }
1737
1738 return &ah->ibv_ah;
1739 err:
1740 free(ah);
1741 return NULL;
1742 }
1743
mlx5_destroy_ah(struct ibv_ah * ah)1744 int mlx5_destroy_ah(struct ibv_ah *ah)
1745 {
1746 struct mlx5_ah *mah = to_mah(ah);
1747 int err;
1748
1749 if (mah->kern_ah) {
1750 err = ibv_cmd_destroy_ah(ah);
1751 if (err)
1752 return err;
1753 }
1754
1755 free(mah);
1756 return 0;
1757 }
1758
mlx5_attach_mcast(struct ibv_qp * qp,const union ibv_gid * gid,uint16_t lid)1759 int mlx5_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid)
1760 {
1761 return ibv_cmd_attach_mcast(qp, gid, lid);
1762 }
1763
mlx5_detach_mcast(struct ibv_qp * qp,const union ibv_gid * gid,uint16_t lid)1764 int mlx5_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid)
1765 {
1766 return ibv_cmd_detach_mcast(qp, gid, lid);
1767 }
1768
mlx5_create_qp_ex(struct ibv_context * context,struct ibv_qp_init_attr_ex * attr)1769 struct ibv_qp *mlx5_create_qp_ex(struct ibv_context *context,
1770 struct ibv_qp_init_attr_ex *attr)
1771 {
1772 return create_qp(context, attr);
1773 }
1774
mlx5_get_srq_num(struct ibv_srq * srq,uint32_t * srq_num)1775 int mlx5_get_srq_num(struct ibv_srq *srq, uint32_t *srq_num)
1776 {
1777 struct mlx5_srq *msrq = to_msrq(srq);
1778
1779 *srq_num = msrq->srqn;
1780
1781 return 0;
1782 }
1783
1784 struct ibv_xrcd *
mlx5_open_xrcd(struct ibv_context * context,struct ibv_xrcd_init_attr * xrcd_init_attr)1785 mlx5_open_xrcd(struct ibv_context *context,
1786 struct ibv_xrcd_init_attr *xrcd_init_attr)
1787 {
1788 int err;
1789 struct verbs_xrcd *xrcd;
1790 struct ibv_open_xrcd cmd = {};
1791 struct ibv_open_xrcd_resp resp = {};
1792
1793 xrcd = calloc(1, sizeof(*xrcd));
1794 if (!xrcd)
1795 return NULL;
1796
1797 err = ibv_cmd_open_xrcd(context, xrcd, sizeof(*xrcd), xrcd_init_attr,
1798 &cmd, sizeof(cmd), &resp, sizeof(resp));
1799 if (err) {
1800 free(xrcd);
1801 return NULL;
1802 }
1803
1804 return &xrcd->xrcd;
1805 }
1806
mlx5_close_xrcd(struct ibv_xrcd * ib_xrcd)1807 int mlx5_close_xrcd(struct ibv_xrcd *ib_xrcd)
1808 {
1809 struct verbs_xrcd *xrcd = container_of(ib_xrcd, struct verbs_xrcd, xrcd);
1810 int ret;
1811
1812 ret = ibv_cmd_close_xrcd(xrcd);
1813 if (!ret)
1814 free(xrcd);
1815
1816 return ret;
1817 }
1818
1819 static struct ibv_srq *
mlx5_create_xrc_srq(struct ibv_context * context,struct ibv_srq_init_attr_ex * attr)1820 mlx5_create_xrc_srq(struct ibv_context *context,
1821 struct ibv_srq_init_attr_ex *attr)
1822 {
1823 int err;
1824 struct mlx5_create_srq_ex cmd;
1825 struct mlx5_create_srq_resp resp;
1826 struct mlx5_srq *msrq;
1827 struct mlx5_context *ctx = to_mctx(context);
1828 int max_sge;
1829 struct ibv_srq *ibsrq;
1830 int uidx;
1831 FILE *fp = ctx->dbg_fp;
1832
1833 msrq = calloc(1, sizeof(*msrq));
1834 if (!msrq)
1835 return NULL;
1836
1837 ibsrq = (struct ibv_srq *)&msrq->vsrq;
1838
1839 memset(&cmd, 0, sizeof(cmd));
1840 memset(&resp, 0, sizeof(resp));
1841
1842 if (mlx5_spinlock_init(&msrq->lock)) {
1843 fprintf(stderr, "%s-%d:\n", __func__, __LINE__);
1844 goto err;
1845 }
1846
1847 if (attr->attr.max_wr > ctx->max_srq_recv_wr) {
1848 fprintf(stderr, "%s-%d:max_wr %d, max_srq_recv_wr %d\n",
1849 __func__, __LINE__, attr->attr.max_wr,
1850 ctx->max_srq_recv_wr);
1851 errno = EINVAL;
1852 goto err_spl;
1853 }
1854
1855 /*
1856 * this calculation does not consider required control segments. The
1857 * final calculation is done again later. This is done so to avoid
1858 * overflows of variables
1859 */
1860 max_sge = ctx->max_recv_wr / sizeof(struct mlx5_wqe_data_seg);
1861 if (attr->attr.max_sge > max_sge) {
1862 fprintf(stderr, "%s-%d:max_wr %d, max_srq_recv_wr %d\n",
1863 __func__, __LINE__, attr->attr.max_wr,
1864 ctx->max_srq_recv_wr);
1865 errno = EINVAL;
1866 goto err_spl;
1867 }
1868
1869 msrq->max = align_queue_size(attr->attr.max_wr + 1);
1870 msrq->max_gs = attr->attr.max_sge;
1871 msrq->counter = 0;
1872
1873 if (mlx5_alloc_srq_buf(context, msrq)) {
1874 fprintf(stderr, "%s-%d:\n", __func__, __LINE__);
1875 goto err_spl;
1876 }
1877
1878 msrq->db = mlx5_alloc_dbrec(ctx);
1879 if (!msrq->db) {
1880 fprintf(stderr, "%s-%d:\n", __func__, __LINE__);
1881 goto err_free;
1882 }
1883
1884 *msrq->db = 0;
1885
1886 cmd.buf_addr = (uintptr_t)msrq->buf.buf;
1887 cmd.db_addr = (uintptr_t)msrq->db;
1888 msrq->wq_sig = srq_sig_enabled();
1889 if (msrq->wq_sig)
1890 cmd.flags = MLX5_SRQ_FLAG_SIGNATURE;
1891
1892 attr->attr.max_sge = msrq->max_gs;
1893 if (ctx->cqe_version) {
1894 uidx = mlx5_store_uidx(ctx, msrq);
1895 if (uidx < 0) {
1896 mlx5_dbg(fp, MLX5_DBG_QP, "Couldn't find free user index\n");
1897 goto err_free_db;
1898 }
1899 cmd.uidx = uidx;
1900 } else {
1901 cmd.uidx = 0xffffff;
1902 pthread_mutex_lock(&ctx->srq_table_mutex);
1903 }
1904
1905 err = ibv_cmd_create_srq_ex(context, &msrq->vsrq, sizeof(msrq->vsrq),
1906 attr, &cmd.ibv_cmd, sizeof(cmd),
1907 &resp.ibv_resp, sizeof(resp));
1908 if (err)
1909 goto err_free_uidx;
1910
1911 if (!ctx->cqe_version) {
1912 err = mlx5_store_srq(to_mctx(context), resp.srqn, msrq);
1913 if (err)
1914 goto err_destroy;
1915
1916 pthread_mutex_unlock(&ctx->srq_table_mutex);
1917 }
1918
1919 msrq->srqn = resp.srqn;
1920 msrq->rsc.type = MLX5_RSC_TYPE_XSRQ;
1921 msrq->rsc.rsn = ctx->cqe_version ? cmd.uidx : resp.srqn;
1922
1923 return ibsrq;
1924
1925 err_destroy:
1926 ibv_cmd_destroy_srq(ibsrq);
1927
1928 err_free_uidx:
1929 if (ctx->cqe_version)
1930 mlx5_clear_uidx(ctx, cmd.uidx);
1931 else
1932 pthread_mutex_unlock(&ctx->srq_table_mutex);
1933
1934 err_free_db:
1935 mlx5_free_db(ctx, msrq->db);
1936
1937 err_free:
1938 free(msrq->wrid);
1939 mlx5_free_buf(&msrq->buf);
1940
1941 err_spl:
1942 mlx5_spinlock_destroy(&msrq->lock);
1943
1944 err:
1945 free(msrq);
1946
1947 return NULL;
1948 }
1949
mlx5_create_srq_ex(struct ibv_context * context,struct ibv_srq_init_attr_ex * attr)1950 struct ibv_srq *mlx5_create_srq_ex(struct ibv_context *context,
1951 struct ibv_srq_init_attr_ex *attr)
1952 {
1953 if (!(attr->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) ||
1954 (attr->srq_type == IBV_SRQT_BASIC))
1955 return mlx5_create_srq(attr->pd,
1956 (struct ibv_srq_init_attr *)attr);
1957 else if (attr->srq_type == IBV_SRQT_XRC)
1958 return mlx5_create_xrc_srq(context, attr);
1959
1960 return NULL;
1961 }
1962
mlx5_query_device_ex(struct ibv_context * context,const struct ibv_query_device_ex_input * input,struct ibv_device_attr_ex * attr,size_t attr_size)1963 int mlx5_query_device_ex(struct ibv_context *context,
1964 const struct ibv_query_device_ex_input *input,
1965 struct ibv_device_attr_ex *attr,
1966 size_t attr_size)
1967 {
1968 struct mlx5_context *mctx = to_mctx(context);
1969 struct mlx5_query_device_ex_resp resp;
1970 struct mlx5_query_device_ex cmd;
1971 struct ibv_device_attr *a;
1972 uint64_t raw_fw_ver;
1973 unsigned sub_minor;
1974 unsigned major;
1975 unsigned minor;
1976 int err;
1977 int cmd_supp_uhw = mctx->cmds_supp_uhw &
1978 MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE;
1979
1980 memset(&cmd, 0, sizeof(cmd));
1981 memset(&resp, 0, sizeof(resp));
1982 err = ibv_cmd_query_device_ex(context, input, attr, attr_size,
1983 &raw_fw_ver,
1984 &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd),
1985 &resp.ibv_resp, sizeof(resp.ibv_resp),
1986 cmd_supp_uhw ? sizeof(resp) : sizeof(resp.ibv_resp));
1987 if (err)
1988 return err;
1989
1990 attr->tso_caps = resp.tso_caps;
1991 attr->rss_caps.rx_hash_fields_mask = resp.rss_caps.rx_hash_fields_mask;
1992 attr->rss_caps.rx_hash_function = resp.rss_caps.rx_hash_function;
1993 attr->packet_pacing_caps = resp.packet_pacing_caps.caps;
1994
1995 if (resp.support_multi_pkt_send_wqe)
1996 mctx->vendor_cap_flags |= MLX5_VENDOR_CAP_FLAGS_MPW;
1997
1998 mctx->cqe_comp_caps = resp.cqe_comp_caps;
1999
2000 major = (raw_fw_ver >> 32) & 0xffff;
2001 minor = (raw_fw_ver >> 16) & 0xffff;
2002 sub_minor = raw_fw_ver & 0xffff;
2003 a = &attr->orig_attr;
2004 snprintf(a->fw_ver, sizeof(a->fw_ver), "%d.%d.%04d",
2005 major, minor, sub_minor);
2006
2007 return 0;
2008 }
2009
rwq_sig_enabled(struct ibv_context * context)2010 static int rwq_sig_enabled(struct ibv_context *context)
2011 {
2012 char *env;
2013
2014 env = getenv("MLX5_RWQ_SIGNATURE");
2015 if (env)
2016 return 1;
2017
2018 return 0;
2019 }
2020
mlx5_free_rwq_buf(struct mlx5_rwq * rwq,struct ibv_context * context)2021 static void mlx5_free_rwq_buf(struct mlx5_rwq *rwq, struct ibv_context *context)
2022 {
2023 struct mlx5_context *ctx = to_mctx(context);
2024
2025 mlx5_free_actual_buf(ctx, &rwq->buf);
2026 free(rwq->rq.wrid);
2027 }
2028
mlx5_alloc_rwq_buf(struct ibv_context * context,struct mlx5_rwq * rwq,int size)2029 static int mlx5_alloc_rwq_buf(struct ibv_context *context,
2030 struct mlx5_rwq *rwq,
2031 int size)
2032 {
2033 int err;
2034 enum mlx5_alloc_type default_alloc_type = MLX5_ALLOC_TYPE_PREFER_CONTIG;
2035
2036 rwq->rq.wrid = malloc(rwq->rq.wqe_cnt * sizeof(uint64_t));
2037 if (!rwq->rq.wrid) {
2038 errno = ENOMEM;
2039 return -1;
2040 }
2041
2042 err = mlx5_alloc_prefered_buf(to_mctx(context), &rwq->buf,
2043 align(rwq->buf_size, to_mdev
2044 (context->device)->page_size),
2045 to_mdev(context->device)->page_size,
2046 default_alloc_type,
2047 MLX5_RWQ_PREFIX);
2048
2049 if (err) {
2050 free(rwq->rq.wrid);
2051 errno = ENOMEM;
2052 return -1;
2053 }
2054
2055 return 0;
2056 }
2057
mlx5_create_wq(struct ibv_context * context,struct ibv_wq_init_attr * attr)2058 struct ibv_wq *mlx5_create_wq(struct ibv_context *context,
2059 struct ibv_wq_init_attr *attr)
2060 {
2061 struct mlx5_create_wq cmd;
2062 struct mlx5_create_wq_resp resp;
2063 int err;
2064 struct mlx5_rwq *rwq;
2065 struct mlx5_context *ctx = to_mctx(context);
2066 int ret;
2067 int32_t usr_idx = 0;
2068 FILE *fp = ctx->dbg_fp;
2069
2070 if (attr->wq_type != IBV_WQT_RQ)
2071 return NULL;
2072
2073 memset(&cmd, 0, sizeof(cmd));
2074 memset(&resp, 0, sizeof(resp));
2075
2076 rwq = calloc(1, sizeof(*rwq));
2077 if (!rwq)
2078 return NULL;
2079
2080 ret = ibv_init_wq(&rwq->wq);
2081 if (ret < 0)
2082 goto err;
2083
2084 rwq->wq_sig = rwq_sig_enabled(context);
2085 if (rwq->wq_sig)
2086 cmd.drv.flags = MLX5_RWQ_FLAG_SIGNATURE;
2087
2088 ret = mlx5_calc_rwq_size(ctx, rwq, attr);
2089 if (ret < 0) {
2090 errno = -ret;
2091 goto err_cleanup_wq;
2092 }
2093
2094 rwq->buf_size = ret;
2095 if (mlx5_alloc_rwq_buf(context, rwq, ret))
2096 goto err_cleanup_wq;
2097
2098 mlx5_init_rwq_indices(rwq);
2099
2100 if (mlx5_spinlock_init(&rwq->rq.lock))
2101 goto err_free_rwq_buf;
2102
2103 rwq->db = mlx5_alloc_dbrec(ctx);
2104 if (!rwq->db)
2105 goto err_spl;
2106
2107 rwq->db[MLX5_RCV_DBR] = 0;
2108 rwq->db[MLX5_SND_DBR] = 0;
2109 rwq->pbuff = rwq->buf.buf + rwq->rq.offset;
2110 rwq->recv_db = &rwq->db[MLX5_RCV_DBR];
2111 cmd.drv.buf_addr = (uintptr_t)rwq->buf.buf;
2112 cmd.drv.db_addr = (uintptr_t)rwq->db;
2113 cmd.drv.rq_wqe_count = rwq->rq.wqe_cnt;
2114 cmd.drv.rq_wqe_shift = rwq->rq.wqe_shift;
2115 usr_idx = mlx5_store_uidx(ctx, rwq);
2116 if (usr_idx < 0) {
2117 mlx5_dbg(fp, MLX5_DBG_QP, "Couldn't find free user index\n");
2118 goto err_free_db_rec;
2119 }
2120
2121 cmd.drv.user_index = usr_idx;
2122 err = ibv_cmd_create_wq(context, attr, &rwq->wq, &cmd.ibv_cmd,
2123 sizeof(cmd.ibv_cmd),
2124 sizeof(cmd),
2125 &resp.ibv_resp, sizeof(resp.ibv_resp),
2126 sizeof(resp));
2127 if (err)
2128 goto err_create;
2129
2130 rwq->rsc.type = MLX5_RSC_TYPE_RWQ;
2131 rwq->rsc.rsn = cmd.drv.user_index;
2132
2133 rwq->wq.post_recv = mlx5_post_wq_recv;
2134 return &rwq->wq;
2135
2136 err_create:
2137 mlx5_clear_uidx(ctx, cmd.drv.user_index);
2138 err_free_db_rec:
2139 mlx5_free_db(to_mctx(context), rwq->db);
2140 err_spl:
2141 mlx5_spinlock_destroy(&rwq->rq.lock);
2142 err_free_rwq_buf:
2143 mlx5_free_rwq_buf(rwq, context);
2144 err_cleanup_wq:
2145 ibv_cleanup_wq(&rwq->wq);
2146 err:
2147 free(rwq);
2148 return NULL;
2149 }
2150
mlx5_modify_wq(struct ibv_wq * wq,struct ibv_wq_attr * attr)2151 int mlx5_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *attr)
2152 {
2153 struct mlx5_modify_wq cmd = {};
2154 struct mlx5_rwq *rwq = to_mrwq(wq);
2155
2156 if ((attr->attr_mask & IBV_WQ_ATTR_STATE) &&
2157 attr->wq_state == IBV_WQS_RDY) {
2158 if ((attr->attr_mask & IBV_WQ_ATTR_CURR_STATE) &&
2159 attr->curr_wq_state != wq->state)
2160 return -EINVAL;
2161
2162 if (wq->state == IBV_WQS_RESET) {
2163 mlx5_spin_lock(&to_mcq(wq->cq)->lock);
2164 __mlx5_cq_clean(to_mcq(wq->cq),
2165 rwq->rsc.rsn, NULL);
2166 mlx5_spin_unlock(&to_mcq(wq->cq)->lock);
2167 mlx5_init_rwq_indices(rwq);
2168 rwq->db[MLX5_RCV_DBR] = 0;
2169 rwq->db[MLX5_SND_DBR] = 0;
2170 }
2171 }
2172
2173 return ibv_cmd_modify_wq(wq, attr, &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd));
2174 }
2175
mlx5_destroy_wq(struct ibv_wq * wq)2176 int mlx5_destroy_wq(struct ibv_wq *wq)
2177 {
2178 struct mlx5_rwq *rwq = to_mrwq(wq);
2179 int ret;
2180
2181 ret = ibv_cmd_destroy_wq(wq);
2182 if (ret)
2183 return ret;
2184
2185 mlx5_spin_lock(&to_mcq(wq->cq)->lock);
2186 __mlx5_cq_clean(to_mcq(wq->cq), rwq->rsc.rsn, NULL);
2187 mlx5_spin_unlock(&to_mcq(wq->cq)->lock);
2188 mlx5_clear_uidx(to_mctx(wq->context), rwq->rsc.rsn);
2189 mlx5_free_db(to_mctx(wq->context), rwq->db);
2190 mlx5_spinlock_destroy(&rwq->rq.lock);
2191 mlx5_free_rwq_buf(rwq, wq->context);
2192 ibv_cleanup_wq(&rwq->wq);
2193 free(rwq);
2194
2195 return 0;
2196 }
2197
mlx5_create_rwq_ind_table(struct ibv_context * context,struct ibv_rwq_ind_table_init_attr * init_attr)2198 struct ibv_rwq_ind_table *mlx5_create_rwq_ind_table(struct ibv_context *context,
2199 struct ibv_rwq_ind_table_init_attr *init_attr)
2200 {
2201 struct ibv_create_rwq_ind_table *cmd;
2202 struct mlx5_create_rwq_ind_table_resp resp;
2203 struct ibv_rwq_ind_table *ind_table;
2204 uint32_t required_tbl_size;
2205 int num_tbl_entries;
2206 int cmd_size;
2207 int err;
2208
2209 num_tbl_entries = 1 << init_attr->log_ind_tbl_size;
2210 /* Data must be u64 aligned */
2211 required_tbl_size = (num_tbl_entries * sizeof(uint32_t)) < sizeof(uint64_t) ?
2212 sizeof(uint64_t) : (num_tbl_entries * sizeof(uint32_t));
2213
2214 cmd_size = required_tbl_size + sizeof(*cmd);
2215 cmd = calloc(1, cmd_size);
2216 if (!cmd)
2217 return NULL;
2218
2219 memset(&resp, 0, sizeof(resp));
2220 ind_table = calloc(1, sizeof(*ind_table));
2221 if (!ind_table)
2222 goto free_cmd;
2223
2224 err = ibv_cmd_create_rwq_ind_table(context, init_attr, ind_table, cmd,
2225 cmd_size, cmd_size, &resp.ibv_resp, sizeof(resp.ibv_resp),
2226 sizeof(resp));
2227 if (err)
2228 goto err;
2229
2230 free(cmd);
2231 return ind_table;
2232
2233 err:
2234 free(ind_table);
2235 free_cmd:
2236 free(cmd);
2237 return NULL;
2238 }
2239
mlx5_destroy_rwq_ind_table(struct ibv_rwq_ind_table * rwq_ind_table)2240 int mlx5_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table)
2241 {
2242 int ret;
2243
2244 ret = ibv_cmd_destroy_rwq_ind_table(rwq_ind_table);
2245
2246 if (ret)
2247 return ret;
2248
2249 free(rwq_ind_table);
2250 return 0;
2251 }
2252