xref: /spdk/lib/mlx5/mlx5_qp.c (revision 60241941e6cfa4fc04cfcf6840c79f941ccf85d0)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  */
4 
5 #include <infiniband/mlx5dv.h>
6 
7 #include "mlx5_priv.h"
8 #include "mlx5_ifc.h"
9 #include "spdk/log.h"
10 #include "spdk/util.h"
11 
12 #include "spdk_internal/assert.h"
13 #include "spdk_internal/rdma_utils.h"
14 
15 #define MLX5_QP_RQ_PSN              0x4242
16 #define MLX5_QP_MAX_DEST_RD_ATOMIC      16
17 #define MLX5_QP_RNR_TIMER               12
18 #define MLX5_QP_HOP_LIMIT               64
19 
20 /* RTS state params */
21 #define MLX5_QP_TIMEOUT            14
22 #define MLX5_QP_RETRY_COUNT         7
23 #define MLX5_QP_RNR_RETRY           7
24 #define MLX5_QP_MAX_RD_ATOMIC      16
25 #define MLX5_QP_SQ_PSN         0x4242
26 
27 struct mlx5_qp_conn_caps {
28 	bool resources_on_nvme_emulation_manager;
29 	bool roce_enabled;
30 	bool fl_when_roce_disabled;
31 	bool fl_when_roce_enabled;
32 	bool port_ib_enabled;
33 	uint8_t roce_version;
34 	uint8_t port;
35 	uint16_t pkey_idx;
36 	enum ibv_mtu mtu;
37 };
38 
39 static int mlx5_qp_connect(struct spdk_mlx5_qp *qp);
40 
41 static void
42 mlx5_cq_deinit(struct spdk_mlx5_cq *cq)
43 {
44 	if (cq->verbs_cq) {
45 		ibv_destroy_cq(cq->verbs_cq);
46 	}
47 }
48 
49 static int
50 mlx5_cq_init(struct ibv_pd *pd, const struct spdk_mlx5_cq_attr *attr, struct spdk_mlx5_cq *cq)
51 {
52 	struct ibv_cq_init_attr_ex cq_attr = {
53 		.cqe = attr->cqe_cnt,
54 		.cq_context = attr->cq_context,
55 		.channel = attr->comp_channel,
56 		.comp_vector = attr->comp_vector,
57 		.wc_flags = IBV_WC_STANDARD_FLAGS,
58 		.comp_mask = IBV_CQ_INIT_ATTR_MASK_FLAGS,
59 		.flags = IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN
60 	};
61 	struct mlx5dv_cq_init_attr cq_ex_attr = {
62 		.comp_mask = MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE,
63 		.cqe_size = attr->cqe_size
64 	};
65 	struct mlx5dv_obj dv_obj;
66 	struct mlx5dv_cq mlx5_cq;
67 	struct ibv_cq_ex *cq_ex;
68 	int rc;
69 
70 	cq_ex = mlx5dv_create_cq(pd->context, &cq_attr, &cq_ex_attr);
71 	if (!cq_ex) {
72 		rc = -errno;
73 		SPDK_ERRLOG("mlx5dv_create_cq failed, errno %d\n", rc);
74 		return rc;
75 	}
76 
77 	cq->verbs_cq = ibv_cq_ex_to_cq(cq_ex);
78 	assert(cq->verbs_cq);
79 
80 	dv_obj.cq.in = cq->verbs_cq;
81 	dv_obj.cq.out = &mlx5_cq;
82 
83 	/* Init CQ - CQ is marked as owned by DV for all consumer index related actions */
84 	rc = mlx5dv_init_obj(&dv_obj, MLX5DV_OBJ_CQ);
85 	if (rc) {
86 		SPDK_ERRLOG("Failed to init DV CQ, rc %d\n", rc);
87 		ibv_destroy_cq(cq->verbs_cq);
88 		free(cq);
89 		return rc;
90 	}
91 
92 	cq->hw.cq_addr = (uintptr_t)mlx5_cq.buf;
93 	cq->hw.ci = 0;
94 	cq->hw.cqe_cnt = mlx5_cq.cqe_cnt;
95 	cq->hw.cqe_size = mlx5_cq.cqe_size;
96 	cq->hw.cq_num = mlx5_cq.cqn;
97 
98 	return 0;
99 }
100 
101 static void
102 mlx5_qp_destroy(struct spdk_mlx5_qp *qp)
103 {
104 	if (qp->verbs_qp) {
105 		ibv_destroy_qp(qp->verbs_qp);
106 	}
107 	if (qp->completions) {
108 		free(qp->completions);
109 	}
110 }
111 
112 static int
113 mlx5_qp_init(struct ibv_pd *pd, const struct spdk_mlx5_qp_attr *attr, struct ibv_cq *cq,
114 	     struct spdk_mlx5_qp *qp)
115 {
116 	struct mlx5dv_qp dv_qp;
117 	struct mlx5dv_obj dv_obj;
118 	struct ibv_qp_init_attr_ex dv_qp_attr = {
119 		.cap = attr->cap,
120 		.qp_type = IBV_QPT_RC,
121 		.comp_mask = IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_SEND_OPS_FLAGS,
122 		.pd = pd,
123 		.send_ops_flags = IBV_QP_EX_WITH_RDMA_WRITE | IBV_QP_EX_WITH_SEND | IBV_QP_EX_WITH_RDMA_READ | IBV_QP_EX_WITH_BIND_MW,
124 		.send_cq = cq,
125 		.recv_cq = cq,
126 		.sq_sig_all = attr->sigall,
127 	};
128 	/* Attrs required for MKEYs registration */
129 	struct mlx5dv_qp_init_attr mlx5_qp_attr = {
130 		.comp_mask = MLX5DV_QP_INIT_ATTR_MASK_SEND_OPS_FLAGS,
131 		.send_ops_flags = MLX5DV_QP_EX_WITH_MKEY_CONFIGURE
132 	};
133 	int rc;
134 
135 	if (attr->sigall && attr->siglast) {
136 		SPDK_ERRLOG("Params sigall and siglast can't be enabled simultaneously\n");
137 		return -EINVAL;
138 	}
139 
140 	qp->verbs_qp = mlx5dv_create_qp(pd->context, &dv_qp_attr, &mlx5_qp_attr);
141 	if (!qp->verbs_qp) {
142 		rc = -errno;
143 		SPDK_ERRLOG("Failed to create qp, rc %d\n", rc);
144 		return rc;
145 	}
146 
147 	dv_obj.qp.in = qp->verbs_qp;
148 	dv_obj.qp.out = &dv_qp;
149 
150 	rc = mlx5dv_init_obj(&dv_obj, MLX5DV_OBJ_QP);
151 	if (rc) {
152 		ibv_destroy_qp(qp->verbs_qp);
153 		SPDK_ERRLOG("Failed to init DV QP, rc %d\n", rc);
154 		return rc;
155 	}
156 
157 	qp->hw.sq_addr = (uint64_t)dv_qp.sq.buf;
158 	qp->hw.dbr_addr = (uint64_t)dv_qp.dbrec;
159 	qp->hw.sq_bf_addr = (uint64_t)dv_qp.bf.reg;
160 	qp->hw.sq_wqe_cnt = dv_qp.sq.wqe_cnt;
161 
162 	SPDK_NOTICELOG("mlx5 QP, sq size %u WQE_BB. %u send_wrs -> %u WQE_BB per send WR\n",
163 		       qp->hw.sq_wqe_cnt, attr->cap.max_send_wr, qp->hw.sq_wqe_cnt / attr->cap.max_send_wr);
164 
165 	qp->hw.qp_num = qp->verbs_qp->qp_num;
166 
167 	qp->hw.sq_tx_db_nc = dv_qp.bf.size == 0;
168 	qp->tx_available = qp->hw.sq_wqe_cnt;
169 	qp->max_send_sge = attr->cap.max_send_sge;
170 	rc = posix_memalign((void **)&qp->completions, 4096, qp->hw.sq_wqe_cnt * sizeof(*qp->completions));
171 	if (rc) {
172 		ibv_destroy_qp(qp->verbs_qp);
173 		SPDK_ERRLOG("Failed to alloc completions\n");
174 		return rc;
175 	}
176 	qp->sigmode = SPDK_MLX5_QP_SIG_NONE;
177 	if (attr->sigall) {
178 		qp->sigmode = SPDK_MLX5_QP_SIG_ALL;
179 	} else if (attr->siglast) {
180 		qp->sigmode = SPDK_MLX5_QP_SIG_LAST;
181 	}
182 
183 	rc = mlx5_qp_connect(qp);
184 	if (rc) {
185 		ibv_destroy_qp(qp->verbs_qp);
186 		free(qp->completions);
187 		return rc;
188 	}
189 
190 	return 0;
191 }
192 
193 static int
194 mlx5_qp_get_port_pkey_idx(struct spdk_mlx5_qp *qp, struct mlx5_qp_conn_caps *conn_caps)
195 {
196 	struct ibv_qp_attr attr = {};
197 	struct ibv_qp_init_attr init_attr = {};
198 	int attr_mask = IBV_QP_PKEY_INDEX | IBV_QP_PORT;
199 	int rc;
200 
201 	rc = ibv_query_qp(qp->verbs_qp, &attr, attr_mask, &init_attr);
202 	if (rc) {
203 		SPDK_ERRLOG("Failed to query qp %p %u\n", qp, qp->hw.qp_num);
204 		return rc;
205 	}
206 	conn_caps->port = attr.port_num;
207 	conn_caps->pkey_idx = attr.pkey_index;
208 
209 	return 0;
210 }
211 
212 static int
213 mlx5_check_port(struct ibv_context *ctx, struct mlx5_qp_conn_caps *conn_caps)
214 {
215 	struct ibv_port_attr port_attr = {};
216 	int rc;
217 
218 	conn_caps->port_ib_enabled = false;
219 
220 	rc = ibv_query_port(ctx, conn_caps->port, &port_attr);
221 	if (rc) {
222 		return rc;
223 	}
224 
225 	if (port_attr.link_layer == IBV_LINK_LAYER_INFINIBAND) {
226 		/* we only support local IB addressing for now */
227 		if (port_attr.flags & IBV_QPF_GRH_REQUIRED) {
228 			SPDK_ERRLOG("IB enabled and GRH addressing is required but only local addressing is supported\n");
229 			return -1;
230 		}
231 		conn_caps->mtu = port_attr.active_mtu;
232 		conn_caps->port_ib_enabled = true;
233 		return 0;
234 	}
235 
236 	if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) {
237 		return -1;
238 	}
239 
240 	conn_caps->mtu = IBV_MTU_4096;
241 
242 	return 0;
243 }
244 
245 static int
246 mlx5_fill_qp_conn_caps(struct ibv_context *context,
247 		       struct mlx5_qp_conn_caps *conn_caps)
248 {
249 	uint8_t in[DEVX_ST_SZ_BYTES(query_hca_cap_in)] = {0};
250 	uint8_t out[DEVX_ST_SZ_BYTES(query_hca_cap_out)] = {0};
251 	int rc;
252 
253 	DEVX_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
254 	DEVX_SET(query_hca_cap_in, in, op_mod,
255 		 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
256 	rc = mlx5dv_devx_general_cmd(context, in, sizeof(in), out,
257 				     sizeof(out));
258 	if (rc) {
259 		return rc;
260 	}
261 
262 	conn_caps->resources_on_nvme_emulation_manager =
263 		DEVX_GET(query_hca_cap_out, out,
264 			 capability.cmd_hca_cap.resources_on_nvme_emulation_manager);
265 	conn_caps->fl_when_roce_disabled = DEVX_GET(query_hca_cap_out, out,
266 					   capability.cmd_hca_cap.fl_rc_qp_when_roce_disabled);
267 	conn_caps->roce_enabled = DEVX_GET(query_hca_cap_out, out,
268 					   capability.cmd_hca_cap.roce);
269 	if (!conn_caps->roce_enabled) {
270 		goto out;
271 	}
272 
273 	memset(in, 0, sizeof(in));
274 	memset(out, 0, sizeof(out));
275 	DEVX_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
276 	DEVX_SET(query_hca_cap_in, in, op_mod, MLX5_SET_HCA_CAP_OP_MOD_ROCE);
277 	rc = mlx5dv_devx_general_cmd(context, in, sizeof(in), out,
278 				     sizeof(out));
279 	if (rc) {
280 		return rc;
281 	}
282 
283 	conn_caps->roce_version = DEVX_GET(query_hca_cap_out, out,
284 					   capability.roce_caps.roce_version);
285 	conn_caps->fl_when_roce_enabled = DEVX_GET(query_hca_cap_out,
286 					  out, capability.roce_caps.fl_rc_qp_when_roce_enabled);
287 out:
288 	SPDK_DEBUGLOG(mlx5, "RoCE Caps: enabled %d ver %d fl allowed %d\n",
289 		      conn_caps->roce_enabled, conn_caps->roce_version,
290 		      conn_caps->roce_enabled ? conn_caps->fl_when_roce_enabled :
291 		      conn_caps->fl_when_roce_disabled);
292 	return 0;
293 }
294 
295 static int
296 mlx5_qp_loopback_conn_rts_2_init(struct spdk_mlx5_qp *qp, struct ibv_qp_attr *qp_attr,
297 				 int attr_mask)
298 {
299 	uint8_t in[DEVX_ST_SZ_BYTES(rst2init_qp_in)] = {0};
300 	uint8_t out[DEVX_ST_SZ_BYTES(rst2init_qp_out)] = {0};
301 	void *qpc = DEVX_ADDR_OF(rst2init_qp_in, in, qpc);
302 	int rc;
303 
304 	DEVX_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
305 	DEVX_SET(rst2init_qp_in, in, qpn, qp->hw.qp_num);
306 	DEVX_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
307 
308 	if (attr_mask & IBV_QP_PKEY_INDEX)
309 		DEVX_SET(qpc, qpc, primary_address_path.pkey_index,
310 			 qp_attr->pkey_index);
311 
312 	if (attr_mask & IBV_QP_PORT)
313 		DEVX_SET(qpc, qpc, primary_address_path.vhca_port_num,
314 			 qp_attr->port_num);
315 
316 	if (attr_mask & IBV_QP_ACCESS_FLAGS) {
317 		if (qp_attr->qp_access_flags & IBV_ACCESS_REMOTE_READ) {
318 			DEVX_SET(qpc, qpc, rre, 1);
319 		}
320 		if (qp_attr->qp_access_flags & IBV_ACCESS_REMOTE_WRITE) {
321 			DEVX_SET(qpc, qpc, rwe, 1);
322 		}
323 	}
324 
325 	rc = mlx5dv_devx_qp_modify(qp->verbs_qp, in, sizeof(in), out, sizeof(out));
326 	if (rc) {
327 		SPDK_ERRLOG("failed to modify qp to init, errno = %d\n", rc);
328 	}
329 
330 	return rc;
331 
332 }
333 
334 static int
335 mlx5_qp_loopback_conn_init_2_rtr(struct spdk_mlx5_qp *qp, struct ibv_qp_attr *qp_attr,
336 				 int attr_mask)
337 {
338 	uint8_t in[DEVX_ST_SZ_BYTES(init2rtr_qp_in)] = {0};
339 	uint8_t out[DEVX_ST_SZ_BYTES(init2rtr_qp_out)] = {0};
340 	void *qpc = DEVX_ADDR_OF(init2rtr_qp_in, in, qpc);
341 	int rc;
342 
343 	DEVX_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
344 	DEVX_SET(init2rtr_qp_in, in, qpn, qp->hw.qp_num);
345 
346 	/* 30 is the maximum value for Infiniband QPs */
347 	DEVX_SET(qpc, qpc, log_msg_max, 30);
348 
349 	/* TODO: add more attributes */
350 	if (attr_mask & IBV_QP_PATH_MTU) {
351 		DEVX_SET(qpc, qpc, mtu, qp_attr->path_mtu);
352 	}
353 	if (attr_mask & IBV_QP_DEST_QPN) {
354 		DEVX_SET(qpc, qpc, remote_qpn, qp_attr->dest_qp_num);
355 	}
356 	if (attr_mask & IBV_QP_RQ_PSN) {
357 		DEVX_SET(qpc, qpc, next_rcv_psn, qp_attr->rq_psn & 0xffffff);
358 	}
359 	if (attr_mask & IBV_QP_TIMEOUT)
360 		DEVX_SET(qpc, qpc, primary_address_path.ack_timeout,
361 			 qp_attr->timeout);
362 	if (attr_mask & IBV_QP_PKEY_INDEX)
363 		DEVX_SET(qpc, qpc, primary_address_path.pkey_index,
364 			 qp_attr->pkey_index);
365 	if (attr_mask & IBV_QP_PORT)
366 		DEVX_SET(qpc, qpc, primary_address_path.vhca_port_num,
367 			 qp_attr->port_num);
368 	if (attr_mask & IBV_QP_MAX_DEST_RD_ATOMIC)
369 		DEVX_SET(qpc, qpc, log_rra_max,
370 			 spdk_u32log2(qp_attr->max_dest_rd_atomic));
371 	if (attr_mask & IBV_QP_MIN_RNR_TIMER) {
372 		DEVX_SET(qpc, qpc, min_rnr_nak, qp_attr->min_rnr_timer);
373 	}
374 	if (attr_mask & IBV_QP_AV) {
375 		DEVX_SET(qpc, qpc, primary_address_path.fl, 1);
376 	}
377 
378 	rc = mlx5dv_devx_qp_modify(qp->verbs_qp, in, sizeof(in), out, sizeof(out));
379 	if (rc) {
380 		SPDK_ERRLOG("failed to modify qp to rtr with errno = %d\n", rc);
381 	}
382 
383 	return rc;
384 }
385 
386 static int
387 mlx5_qp_loopback_conn_rtr_2_rts(struct spdk_mlx5_qp *qp, struct ibv_qp_attr *qp_attr, int attr_mask)
388 {
389 	uint8_t in[DEVX_ST_SZ_BYTES(rtr2rts_qp_in)] = {0};
390 	uint8_t out[DEVX_ST_SZ_BYTES(rtr2rts_qp_out)] = {0};
391 	void *qpc = DEVX_ADDR_OF(rtr2rts_qp_in, in, qpc);
392 	int rc;
393 
394 	DEVX_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
395 	DEVX_SET(rtr2rts_qp_in, in, qpn, qp->hw.qp_num);
396 
397 	if (attr_mask & IBV_QP_TIMEOUT)
398 		DEVX_SET(qpc, qpc, primary_address_path.ack_timeout,
399 			 qp_attr->timeout);
400 	if (attr_mask & IBV_QP_RETRY_CNT) {
401 		DEVX_SET(qpc, qpc, retry_count, qp_attr->retry_cnt);
402 	}
403 	if (attr_mask & IBV_QP_SQ_PSN) {
404 		DEVX_SET(qpc, qpc, next_send_psn, qp_attr->sq_psn & 0xffffff);
405 	}
406 	if (attr_mask & IBV_QP_RNR_RETRY) {
407 		DEVX_SET(qpc, qpc, rnr_retry, qp_attr->rnr_retry);
408 	}
409 	if (attr_mask & IBV_QP_MAX_QP_RD_ATOMIC)
410 		DEVX_SET(qpc, qpc, log_sra_max,
411 			 spdk_u32log2(qp_attr->max_rd_atomic));
412 
413 	rc = mlx5dv_devx_qp_modify(qp->verbs_qp, in, sizeof(in), out, sizeof(out));
414 	if (rc) {
415 		SPDK_ERRLOG("failed to modify qp to rts with errno = %d\n", rc);
416 	}
417 
418 	return rc;
419 }
420 
421 
422 static int
423 mlx5_qp_loopback_conn(struct spdk_mlx5_qp *qp, struct mlx5_qp_conn_caps *caps)
424 {
425 	struct ibv_qp_attr qp_attr = {};
426 	int rc, attr_mask = IBV_QP_STATE |
427 			    IBV_QP_PKEY_INDEX |
428 			    IBV_QP_PORT |
429 			    IBV_QP_ACCESS_FLAGS;
430 
431 	qp_attr.qp_state = IBV_QPS_INIT;
432 	qp_attr.pkey_index = caps->pkey_idx;
433 	qp_attr.port_num = caps->port;
434 	qp_attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ;
435 
436 	rc = mlx5_qp_loopback_conn_rts_2_init(qp, &qp_attr, attr_mask);
437 	if (rc) {
438 		return rc;
439 	}
440 
441 	memset(&qp_attr, 0, sizeof(qp_attr));
442 	qp_attr.dest_qp_num = qp->hw.qp_num;
443 	qp_attr.qp_state = IBV_QPS_RTR;
444 	qp_attr.path_mtu = caps->mtu;
445 	qp_attr.rq_psn = MLX5_QP_RQ_PSN;
446 	qp_attr.max_dest_rd_atomic = MLX5_QP_MAX_DEST_RD_ATOMIC;
447 	qp_attr.min_rnr_timer = MLX5_QP_RNR_TIMER;
448 	qp_attr.ah_attr.port_num = caps->port;
449 	qp_attr.ah_attr.grh.hop_limit = MLX5_QP_HOP_LIMIT;
450 
451 	attr_mask = IBV_QP_STATE              |
452 		    IBV_QP_AV                 |
453 		    IBV_QP_PATH_MTU           |
454 		    IBV_QP_DEST_QPN           |
455 		    IBV_QP_RQ_PSN             |
456 		    IBV_QP_MAX_DEST_RD_ATOMIC |
457 		    IBV_QP_MIN_RNR_TIMER;
458 
459 	rc = mlx5_qp_loopback_conn_init_2_rtr(qp, &qp_attr, attr_mask);
460 	if (rc) {
461 		return rc;
462 	}
463 
464 	memset(&qp_attr, 0, sizeof(qp_attr));
465 	qp_attr.qp_state = IBV_QPS_RTS;
466 	qp_attr.timeout = MLX5_QP_TIMEOUT;
467 	qp_attr.retry_cnt = MLX5_QP_RETRY_COUNT;
468 	qp_attr.sq_psn = MLX5_QP_SQ_PSN;
469 	qp_attr.rnr_retry = MLX5_QP_RNR_RETRY;
470 	qp_attr.max_rd_atomic = MLX5_QP_MAX_RD_ATOMIC;
471 	attr_mask = IBV_QP_STATE              |
472 		    IBV_QP_TIMEOUT            |
473 		    IBV_QP_RETRY_CNT          |
474 		    IBV_QP_RNR_RETRY          |
475 		    IBV_QP_SQ_PSN             |
476 		    IBV_QP_MAX_QP_RD_ATOMIC;
477 	/* once QPs were moved to RTR using devx, they must also move to RTS
478 	 * using devx since kernel doesn't know QPs are on RTR state
479 	 */
480 	return mlx5_qp_loopback_conn_rtr_2_rts(qp, &qp_attr, attr_mask);
481 }
482 
483 static int
484 mlx5_qp_connect(struct spdk_mlx5_qp *qp)
485 {
486 	struct mlx5_qp_conn_caps conn_caps = {};
487 	struct ibv_context *context = qp->verbs_qp->context;
488 	int rc;
489 
490 	rc = mlx5_qp_get_port_pkey_idx(qp, &conn_caps);
491 	if (rc) {
492 		return rc;
493 	}
494 	rc = mlx5_fill_qp_conn_caps(context, &conn_caps);
495 	if (rc) {
496 		return rc;
497 	}
498 	rc = mlx5_check_port(context, &conn_caps);
499 	if (rc) {
500 		return rc;
501 	}
502 
503 	/* Check if force-loopback is supported */
504 	if (conn_caps.port_ib_enabled || (conn_caps.resources_on_nvme_emulation_manager &&
505 					  ((conn_caps.roce_enabled && conn_caps.fl_when_roce_enabled) ||
506 					   (!conn_caps.roce_enabled && conn_caps.fl_when_roce_disabled)))) {
507 	} else if (conn_caps.resources_on_nvme_emulation_manager) {
508 		SPDK_ERRLOG("Force-loopback QP is not supported. Cannot create queue.\n");
509 		return -ENOTSUP;
510 	}
511 
512 	return mlx5_qp_loopback_conn(qp, &conn_caps);
513 }
514 
515 static void
516 mlx5_cq_remove_qp(struct spdk_mlx5_cq *cq, struct spdk_mlx5_qp *qp)
517 {
518 	uint32_t qpn_upper = qp->hw.qp_num >> SPDK_MLX5_QP_NUM_UPPER_SHIFT;
519 	uint32_t qpn_mask = qp->hw.qp_num & SPDK_MLX5_QP_NUM_LOWER_MASK;
520 
521 	if (cq->qps[qpn_upper].count) {
522 		cq->qps[qpn_upper].table[qpn_mask] = NULL;
523 		cq->qps[qpn_upper].count--;
524 		cq->qps_count--;
525 		if (!cq->qps[qpn_upper].count) {
526 			free(cq->qps[qpn_upper].table);
527 		}
528 	} else {
529 		SPDK_ERRLOG("incorrect count, cq %p, qp %p, qpn %u\n", cq, qp, qp->hw.qp_num);
530 		SPDK_UNREACHABLE();
531 	}
532 }
533 
534 static int
535 mlx5_cq_add_qp(struct spdk_mlx5_cq *cq, struct spdk_mlx5_qp *qp)
536 {
537 	uint32_t qpn_upper = qp->hw.qp_num >> SPDK_MLX5_QP_NUM_UPPER_SHIFT;
538 	uint32_t qpn_mask = qp->hw.qp_num & SPDK_MLX5_QP_NUM_LOWER_MASK;
539 
540 	if (!cq->qps[qpn_upper].count) {
541 		cq->qps[qpn_upper].table = calloc(SPDK_MLX5_QP_NUM_LUT_SIZE, sizeof(*cq->qps[qpn_upper].table));
542 		if (!cq->qps[qpn_upper].table) {
543 			return -ENOMEM;
544 		}
545 	}
546 	if (cq->qps[qpn_upper].table[qpn_mask]) {
547 		SPDK_ERRLOG("incorrect entry, cq %p, qp %p, qpn %u\n", cq, qp, qp->hw.qp_num);
548 		SPDK_UNREACHABLE();
549 	}
550 	cq->qps[qpn_upper].count++;
551 	cq->qps_count++;
552 	cq->qps[qpn_upper].table[qpn_mask] = qp;
553 
554 	return 0;
555 }
556 
557 int
558 spdk_mlx5_cq_create(struct ibv_pd *pd, struct spdk_mlx5_cq_attr *cq_attr,
559 		    struct spdk_mlx5_cq **cq_out)
560 {
561 	struct spdk_mlx5_cq *cq;
562 	int rc;
563 
564 	cq = calloc(1, sizeof(*cq));
565 	if (!cq) {
566 		return -ENOMEM;
567 	}
568 
569 	rc = mlx5_cq_init(pd, cq_attr, cq);
570 	if (rc) {
571 		free(cq);
572 		return rc;
573 	}
574 	*cq_out = cq;
575 
576 	return 0;
577 }
578 
579 int
580 spdk_mlx5_cq_destroy(struct spdk_mlx5_cq *cq)
581 {
582 	if (cq->qps_count) {
583 		SPDK_ERRLOG("CQ has %u bound QPs\n", cq->qps_count);
584 		return -EBUSY;
585 	}
586 
587 	mlx5_cq_deinit(cq);
588 	free(cq);
589 
590 	return 0;
591 }
592 
593 int
594 spdk_mlx5_qp_create(struct ibv_pd *pd, struct spdk_mlx5_cq *cq, struct spdk_mlx5_qp_attr *qp_attr,
595 		    struct spdk_mlx5_qp **qp_out)
596 {
597 	int rc;
598 	struct spdk_mlx5_qp *qp;
599 
600 	qp = calloc(1, sizeof(*qp));
601 	if (!qp) {
602 		return -ENOMEM;
603 	}
604 
605 	rc = mlx5_qp_init(pd, qp_attr, cq->verbs_cq, qp);
606 	if (rc) {
607 		free(qp);
608 		return rc;
609 	}
610 	qp->cq = cq;
611 	rc = mlx5_cq_add_qp(cq, qp);
612 	if (rc) {
613 		mlx5_qp_destroy(qp);
614 		free(qp);
615 		return rc;
616 	}
617 	*qp_out = qp;
618 
619 	return 0;
620 }
621 
622 void
623 spdk_mlx5_qp_destroy(struct spdk_mlx5_qp *qp)
624 {
625 	mlx5_cq_remove_qp(qp->cq, qp);
626 	mlx5_qp_destroy(qp);
627 	free(qp);
628 }
629 
630 int
631 spdk_mlx5_qp_set_error_state(struct spdk_mlx5_qp *qp)
632 {
633 	struct ibv_qp_attr attr = {
634 		.qp_state = IBV_QPS_ERR,
635 	};
636 
637 	return ibv_modify_qp(qp->verbs_qp, &attr, IBV_QP_STATE);
638 }
639 
640 struct ibv_qp *
641 spdk_mlx5_qp_get_verbs_qp(struct spdk_mlx5_qp *qp)
642 {
643 	return qp->verbs_qp;
644 }
645