xref: /spdk/lib/mlx5/mlx5_umr.c (revision 57fd99b91e71a4baa5543e19ff83958dc99d4dac)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  */
4 
5 #include <infiniband/verbs.h>
6 
7 #include "spdk/log.h"
8 #include "spdk/util.h"
9 #include "spdk/likely.h"
10 #include "spdk/thread.h"
11 #include "spdk/tree.h"
12 
13 #include "spdk_internal/rdma_utils.h"
14 #include "mlx5_priv.h"
15 #include "mlx5_ifc.h"
16 
17 #define MLX5_UMR_POOL_VALID_FLAGS_MASK (~(SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO))
18 #define MLX5_CRYPTO_BSF_P_TYPE_CRYPTO (0x1)
19 #define MLX5_CRYPTO_BSF_SIZE_64B (0x2)
20 
21 RB_HEAD(mlx5_mkeys_tree, spdk_mlx5_mkey_pool_obj);
22 
23 struct mlx5_relaxed_ordering_caps {
24 	bool relaxed_ordering_write_pci_enabled;
25 	bool relaxed_ordering_write;
26 	bool relaxed_ordering_read;
27 	bool relaxed_ordering_write_umr;
28 	bool relaxed_ordering_read_umr;
29 };
30 
31 struct mlx5_mkey_attr {
32 	uint64_t addr;
33 	uint64_t size;
34 	uint32_t log_entity_size;
35 	struct mlx5_wqe_data_seg *klm;
36 	uint32_t klm_count;
37 	/* Size of bsf in octowords. If 0 then bsf is disabled */
38 	uint32_t bsf_octowords;
39 	bool crypto_en;
40 	bool relaxed_ordering_write;
41 	bool relaxed_ordering_read;
42 };
43 
44 struct mlx5_mkey {
45 	struct mlx5dv_devx_obj *devx_obj;
46 	uint32_t mkey;
47 	uint64_t addr;
48 };
49 
50 struct spdk_mlx5_mkey_pool {
51 	struct ibv_pd *pd;
52 	struct spdk_mempool *mpool;
53 	struct mlx5_mkeys_tree tree;
54 	struct mlx5_mkey **mkeys;
55 	uint32_t num_mkeys;
56 	uint32_t refcnt;
57 	uint32_t flags;
58 	TAILQ_ENTRY(spdk_mlx5_mkey_pool) link;
59 };
60 
61 static int
62 mlx5_key_obj_compare(struct spdk_mlx5_mkey_pool_obj *key1, struct spdk_mlx5_mkey_pool_obj *key2)
63 {
64 	return key1->mkey < key2->mkey ? -1 : key1->mkey > key2->mkey;
65 }
66 
67 RB_GENERATE_STATIC(mlx5_mkeys_tree, spdk_mlx5_mkey_pool_obj, node, mlx5_key_obj_compare);
68 
69 static TAILQ_HEAD(mlx5_mkey_pool_head,
70 		  spdk_mlx5_mkey_pool) g_mkey_pools = TAILQ_HEAD_INITIALIZER(g_mkey_pools);
71 static pthread_mutex_t g_mkey_pool_lock = PTHREAD_MUTEX_INITIALIZER;
72 
73 #define SPDK_KLM_MAX_TRANSLATION_ENTRIES_NUM   128
74 
75 static struct mlx5_mkey *
76 mlx5_mkey_create(struct ibv_pd *pd, struct mlx5_mkey_attr *attr)
77 {
78 	struct mlx5_wqe_data_seg *klms = attr->klm;
79 	uint32_t klm_count = attr->klm_count;
80 	int in_size_dw = DEVX_ST_SZ_DW(create_mkey_in) +
81 			 (klm_count ? SPDK_ALIGN_CEIL(klm_count, 4) : 0) * DEVX_ST_SZ_DW(klm);
82 	uint32_t in[in_size_dw];
83 	uint32_t out[DEVX_ST_SZ_DW(create_mkey_out)] = {0};
84 	void *mkc;
85 	uint32_t translation_size;
86 	struct mlx5_mkey *cmkey;
87 	struct ibv_context *ctx = pd->context;
88 	uint32_t pd_id = 0;
89 	uint32_t i;
90 	uint8_t *klm;
91 
92 	cmkey = calloc(1, sizeof(*cmkey));
93 	if (!cmkey) {
94 		SPDK_ERRLOG("failed to alloc cross_mkey\n");
95 		return NULL;
96 	}
97 
98 	memset(in, 0, in_size_dw * 4);
99 	DEVX_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
100 	mkc = DEVX_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
101 
102 	if (klm_count > 0) {
103 		klm = (uint8_t *)DEVX_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
104 		translation_size = SPDK_ALIGN_CEIL(klm_count, 4);
105 
106 		for (i = 0; i < klm_count; i++) {
107 			DEVX_SET(klm, klm, byte_count, klms[i].byte_count);
108 			DEVX_SET(klm, klm, mkey, klms[i].lkey);
109 			DEVX_SET64(klm, klm, address, klms[i].addr);
110 			klms += DEVX_ST_SZ_BYTES(klm);
111 		}
112 
113 		for (; i < translation_size; i++) {
114 			DEVX_SET(klm, klms, byte_count, 0x0);
115 			DEVX_SET(klm, klms, mkey, 0x0);
116 			DEVX_SET64(klm, klms, address, 0x0);
117 			klm += DEVX_ST_SZ_BYTES(klm);
118 		}
119 	}
120 
121 	DEVX_SET(mkc, mkc, access_mode_1_0, attr->log_entity_size ?
122 		 MLX5_MKC_ACCESS_MODE_KLMFBS :
123 		 MLX5_MKC_ACCESS_MODE_KLMS);
124 	DEVX_SET(mkc, mkc, log_page_size, attr->log_entity_size);
125 
126 	mlx5_get_pd_id(pd, &pd_id);
127 	DEVX_SET(create_mkey_in, in, translations_octword_actual_size, klm_count);
128 	if (klm_count == 0) {
129 		DEVX_SET(mkc, mkc, free, 0x1);
130 	}
131 	DEVX_SET(mkc, mkc, lw, 0x1);
132 	DEVX_SET(mkc, mkc, lr, 0x1);
133 	DEVX_SET(mkc, mkc, rw, 0x1);
134 	DEVX_SET(mkc, mkc, rr, 0x1);
135 	DEVX_SET(mkc, mkc, umr_en, 1);
136 	DEVX_SET(mkc, mkc, qpn, 0xffffff);
137 	DEVX_SET(mkc, mkc, pd, pd_id);
138 	DEVX_SET(mkc, mkc, translations_octword_size,
139 		 SPDK_KLM_MAX_TRANSLATION_ENTRIES_NUM);
140 	DEVX_SET(mkc, mkc, relaxed_ordering_write,
141 		 attr->relaxed_ordering_write);
142 	DEVX_SET(mkc, mkc, relaxed_ordering_read,
143 		 attr->relaxed_ordering_read);
144 	DEVX_SET64(mkc, mkc, start_addr, attr->addr);
145 	DEVX_SET64(mkc, mkc, len, attr->size);
146 	DEVX_SET(mkc, mkc, mkey_7_0, 0x42);
147 	if (attr->crypto_en) {
148 		DEVX_SET(mkc, mkc, crypto_en, 1);
149 	}
150 	if (attr->bsf_octowords) {
151 		DEVX_SET(mkc, mkc, bsf_en, 1);
152 		DEVX_SET(mkc, mkc, bsf_octword_size, attr->bsf_octowords);
153 	}
154 
155 	cmkey->devx_obj = mlx5dv_devx_obj_create(ctx, in, sizeof(in), out,
156 			  sizeof(out));
157 	if (!cmkey->devx_obj) {
158 		SPDK_ERRLOG("mlx5dv_devx_obj_create() failed to create mkey, errno:%d\n", errno);
159 		goto out_err;
160 	}
161 
162 	cmkey->mkey = DEVX_GET(create_mkey_out, out, mkey_index) << 8 | 0x42;
163 	return cmkey;
164 
165 out_err:
166 	free(cmkey);
167 	return NULL;
168 }
169 
170 static int
171 mlx5_mkey_destroy(struct mlx5_mkey *mkey)
172 {
173 	int ret = 0;
174 
175 	if (mkey->devx_obj) {
176 		ret = mlx5dv_devx_obj_destroy(mkey->devx_obj);
177 	}
178 
179 	free(mkey);
180 
181 	return ret;
182 }
183 
184 static int
185 mlx5_query_relaxed_ordering_caps(struct ibv_context *context,
186 				 struct mlx5_relaxed_ordering_caps *caps)
187 {
188 	uint8_t in[DEVX_ST_SZ_BYTES(query_hca_cap_in)] = {};
189 	uint8_t out[DEVX_ST_SZ_BYTES(query_hca_cap_out)] = {};
190 	int ret;
191 
192 	DEVX_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
193 	DEVX_SET(query_hca_cap_in, in, op_mod,
194 		 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE_CAP_2);
195 	ret = mlx5dv_devx_general_cmd(context, in, sizeof(in),
196 				      out, sizeof(out));
197 	if (ret) {
198 		return ret;
199 	}
200 
201 	caps->relaxed_ordering_write_pci_enabled = DEVX_GET(query_hca_cap_out,
202 			out, capability.cmd_hca_cap.relaxed_ordering_write_pci_enabled);
203 	caps->relaxed_ordering_write = DEVX_GET(query_hca_cap_out, out,
204 						capability.cmd_hca_cap.relaxed_ordering_write);
205 	caps->relaxed_ordering_read = DEVX_GET(query_hca_cap_out, out,
206 					       capability.cmd_hca_cap.relaxed_ordering_read);
207 	caps->relaxed_ordering_write_umr = DEVX_GET(query_hca_cap_out,
208 					   out, capability.cmd_hca_cap.relaxed_ordering_write_umr);
209 	caps->relaxed_ordering_read_umr = DEVX_GET(query_hca_cap_out,
210 					  out, capability.cmd_hca_cap.relaxed_ordering_read_umr);
211 	return 0;
212 }
213 
214 static int
215 mlx5_mkey_pool_create_mkey(struct mlx5_mkey **_mkey, struct ibv_pd *pd,
216 			   struct mlx5_relaxed_ordering_caps *caps, uint32_t flags)
217 {
218 	struct mlx5_mkey *mkey;
219 	struct mlx5_mkey_attr mkey_attr = {};
220 	uint32_t bsf_size = 0;
221 
222 	mkey_attr.addr = 0;
223 	mkey_attr.size = 0;
224 	mkey_attr.log_entity_size = 0;
225 	mkey_attr.relaxed_ordering_write = caps->relaxed_ordering_write;
226 	mkey_attr.relaxed_ordering_read = caps->relaxed_ordering_read;
227 	mkey_attr.klm_count = 0;
228 	mkey_attr.klm = NULL;
229 	if (flags & SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO) {
230 		mkey_attr.crypto_en = true;
231 		bsf_size += 64;
232 	}
233 	mkey_attr.bsf_octowords = bsf_size / 16;
234 
235 	mkey = mlx5_mkey_create(pd, &mkey_attr);
236 	if (!mkey) {
237 		SPDK_ERRLOG("Failed to create mkey on dev %s\n", pd->context->device->name);
238 		return -EINVAL;
239 	}
240 	*_mkey = mkey;
241 
242 	return 0;
243 }
244 
245 static void
246 mlx5_set_mkey_in_pool(struct spdk_mempool *mp, void *cb_arg, void *_mkey, unsigned obj_idx)
247 {
248 	struct spdk_mlx5_mkey_pool_obj *mkey = _mkey;
249 	struct spdk_mlx5_mkey_pool *pool = cb_arg;
250 
251 	assert(obj_idx < pool->num_mkeys);
252 	assert(pool->mkeys[obj_idx] != NULL);
253 	mkey->mkey = pool->mkeys[obj_idx]->mkey;
254 	mkey->pool_flag = pool->flags & 0xf;
255 	mkey->sig.sigerr_count = 1;
256 	mkey->sig.sigerr = false;
257 
258 	RB_INSERT(mlx5_mkeys_tree, &pool->tree, mkey);
259 }
260 
261 static const char *g_mkey_pool_names[] = {
262 	[SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO] = "crypto",
263 };
264 
265 static void
266 mlx5_mkey_pool_destroy(struct spdk_mlx5_mkey_pool *pool)
267 {
268 	uint32_t i;
269 
270 	if (pool->mpool) {
271 		spdk_mempool_free(pool->mpool);
272 	}
273 	if (pool->mkeys) {
274 		for (i = 0; i < pool->num_mkeys; i++) {
275 			if (pool->mkeys[i]) {
276 				mlx5_mkey_destroy(pool->mkeys[i]);
277 				pool->mkeys[i] = NULL;
278 			}
279 		}
280 		free(pool->mkeys);
281 	}
282 	TAILQ_REMOVE(&g_mkey_pools, pool, link);
283 	free(pool);
284 }
285 
286 static int
287 mlx5_mkey_pools_init(struct spdk_mlx5_mkey_pool_param *params, struct ibv_pd *pd)
288 {
289 	struct spdk_mlx5_mkey_pool *new_pool;
290 	struct mlx5_mkey **mkeys;
291 	struct mlx5_relaxed_ordering_caps caps;
292 	uint32_t j, pdn;
293 	int rc;
294 	char pool_name[32];
295 
296 	new_pool = calloc(1, sizeof(*new_pool));
297 	if (!new_pool) {
298 		rc = -ENOMEM;
299 		goto err;
300 	}
301 	TAILQ_INSERT_TAIL(&g_mkey_pools, new_pool, link);
302 	rc = mlx5_query_relaxed_ordering_caps(pd->context, &caps);
303 	if (rc) {
304 		SPDK_ERRLOG("Failed to get relaxed ordering capabilities, dev %s\n",
305 			    pd->context->device->dev_name);
306 		goto err;
307 	}
308 	mkeys = calloc(params->mkey_count, sizeof(struct mlx5_mkey *));
309 	if (!mkeys) {
310 		rc = -ENOMEM;
311 		goto err;
312 	}
313 	new_pool->mkeys = mkeys;
314 	new_pool->num_mkeys = params->mkey_count;
315 	new_pool->pd = pd;
316 	new_pool->flags = params->flags;
317 	for (j = 0; j < params->mkey_count; j++) {
318 		rc = mlx5_mkey_pool_create_mkey(&mkeys[j], pd, &caps, params->flags);
319 		if (rc) {
320 			goto err;
321 		}
322 	}
323 	rc = mlx5_get_pd_id(pd, &pdn);
324 	if (rc) {
325 		SPDK_ERRLOG("Failed to get pdn, pd %p\n", pd);
326 		goto err;
327 	}
328 	rc = snprintf(pool_name, 32, "%s_%s_%04u", pd->context->device->name,
329 		      g_mkey_pool_names[new_pool->flags], pdn);
330 	if (rc < 0) {
331 		goto err;
332 	}
333 	RB_INIT(&new_pool->tree);
334 	new_pool->mpool = spdk_mempool_create_ctor(pool_name, params->mkey_count,
335 			  sizeof(struct spdk_mlx5_mkey_pool_obj),
336 			  params->cache_per_thread, SPDK_ENV_SOCKET_ID_ANY,
337 			  mlx5_set_mkey_in_pool, new_pool);
338 	if (!new_pool->mpool) {
339 		SPDK_ERRLOG("Failed to create mempool\n");
340 		rc = -ENOMEM;
341 		goto err;
342 	}
343 
344 	return 0;
345 
346 err:
347 	mlx5_mkey_pool_destroy(new_pool);
348 
349 	return rc;
350 }
351 
352 static struct spdk_mlx5_mkey_pool *
353 mlx5_mkey_pool_get(struct ibv_pd *pd, uint32_t flags)
354 {
355 	struct spdk_mlx5_mkey_pool *pool;
356 
357 	TAILQ_FOREACH(pool, &g_mkey_pools, link) {
358 		if (pool->pd == pd && pool->flags == flags) {
359 			return pool;
360 		}
361 	}
362 
363 	return NULL;
364 }
365 
366 int
367 spdk_mlx5_mkey_pool_init(struct spdk_mlx5_mkey_pool_param *params, struct ibv_pd *pd)
368 {
369 	int rc;
370 
371 	if (!pd) {
372 		return -EINVAL;
373 	}
374 
375 	if (!params || !params->mkey_count) {
376 		return -EINVAL;
377 	}
378 	if ((params->flags & MLX5_UMR_POOL_VALID_FLAGS_MASK) != 0) {
379 		SPDK_ERRLOG("Invalid flags %x\n", params->flags);
380 		return -EINVAL;
381 	}
382 	if (params->cache_per_thread > params->mkey_count || !params->cache_per_thread) {
383 		params->cache_per_thread = params->mkey_count * 3 / 4 / spdk_env_get_core_count();
384 	}
385 
386 	pthread_mutex_lock(&g_mkey_pool_lock);
387 	if (mlx5_mkey_pool_get(pd, params->flags) != NULL) {
388 		pthread_mutex_unlock(&g_mkey_pool_lock);
389 		return -EEXIST;
390 	}
391 
392 	rc = mlx5_mkey_pools_init(params, pd);
393 	pthread_mutex_unlock(&g_mkey_pool_lock);
394 
395 	return rc;
396 }
397 
398 int
399 spdk_mlx5_mkey_pool_destroy(uint32_t flags, struct ibv_pd *pd)
400 {
401 	struct spdk_mlx5_mkey_pool *pool;
402 	int rc = 0;
403 
404 	if (!pd) {
405 		return -EINVAL;
406 	}
407 
408 	if ((flags & MLX5_UMR_POOL_VALID_FLAGS_MASK) != 0) {
409 		SPDK_ERRLOG("Invalid flags %x\n", flags);
410 		return -EINVAL;
411 	}
412 
413 	pthread_mutex_lock(&g_mkey_pool_lock);
414 	pool = mlx5_mkey_pool_get(pd, flags);
415 	if (!pool) {
416 		SPDK_ERRLOG("Cant find a pool for PD %p, flags %x\n", pd, flags);
417 		pthread_mutex_unlock(&g_mkey_pool_lock);
418 		return -ENODEV;
419 	}
420 	if (pool->refcnt) {
421 		SPDK_WARNLOG("Can't delete pool pd %p, dev %s\n", pool->pd, pool->pd->context->device->dev_name);
422 		rc = -EAGAIN;
423 	} else {
424 		mlx5_mkey_pool_destroy(pool);
425 	}
426 	pthread_mutex_unlock(&g_mkey_pool_lock);
427 
428 	return rc;
429 }
430 
431 struct spdk_mlx5_mkey_pool *
432 spdk_mlx5_mkey_pool_get_ref(struct ibv_pd *pd, uint32_t flags)
433 {
434 	struct spdk_mlx5_mkey_pool *pool;
435 
436 	if ((flags & MLX5_UMR_POOL_VALID_FLAGS_MASK) != 0) {
437 		SPDK_ERRLOG("Invalid flags %x\n", flags);
438 		return NULL;
439 	}
440 
441 	pthread_mutex_lock(&g_mkey_pool_lock);
442 	pool = mlx5_mkey_pool_get(pd, flags);
443 	if (pool) {
444 		pool->refcnt++;
445 	}
446 	pthread_mutex_unlock(&g_mkey_pool_lock);
447 
448 	return pool;
449 }
450 
451 void
452 spdk_mlx5_mkey_pool_put_ref(struct spdk_mlx5_mkey_pool *pool)
453 {
454 	pthread_mutex_lock(&g_mkey_pool_lock);
455 	pool->refcnt--;
456 	pthread_mutex_unlock(&g_mkey_pool_lock);
457 }
458 
459 int
460 spdk_mlx5_mkey_pool_get_bulk(struct spdk_mlx5_mkey_pool *pool,
461 			     struct spdk_mlx5_mkey_pool_obj **mkeys, uint32_t mkeys_count)
462 {
463 	assert(pool->mpool);
464 
465 	return spdk_mempool_get_bulk(pool->mpool, (void **)mkeys, mkeys_count);
466 }
467 
468 void
469 spdk_mlx5_mkey_pool_put_bulk(struct spdk_mlx5_mkey_pool *pool,
470 			     struct spdk_mlx5_mkey_pool_obj **mkeys, uint32_t mkeys_count)
471 {
472 	assert(pool->mpool);
473 
474 	spdk_mempool_put_bulk(pool->mpool, (void **)mkeys, mkeys_count);
475 }
476 
477 static inline void
478 _mlx5_set_umr_ctrl_seg_mtt(struct mlx5_wqe_umr_ctrl_seg *ctrl, uint32_t klms_octowords,
479 			   uint64_t mkey_mask)
480 {
481 	ctrl->flags |= MLX5_WQE_UMR_CTRL_FLAG_INLINE;
482 	ctrl->klm_octowords = htobe16(klms_octowords);
483 	/*
484 	 * Going to modify two properties of KLM mkey:
485 	 *  1. 'free' field: change this mkey from in free to in use
486 	 *  2. 'len' field: to include the total bytes in iovec
487 	 */
488 	mkey_mask |= MLX5_WQE_UMR_CTRL_MKEY_MASK_FREE | MLX5_WQE_UMR_CTRL_MKEY_MASK_LEN;
489 
490 	ctrl->mkey_mask |= htobe64(mkey_mask);
491 }
492 
493 static inline void
494 mlx5_set_umr_ctrl_seg_mtt(struct mlx5_wqe_umr_ctrl_seg *ctrl, uint32_t klms_octowords)
495 {
496 	_mlx5_set_umr_ctrl_seg_mtt(ctrl, klms_octowords, 0);
497 }
498 
499 static inline void
500 mlx5_set_umr_ctrl_seg_bsf_size(struct mlx5_wqe_umr_ctrl_seg *ctrl, int bsf_size)
501 {
502 	ctrl->bsf_octowords = htobe16(SPDK_ALIGN_CEIL(SPDK_CEIL_DIV(bsf_size, 16), 4));
503 }
504 
505 static inline void
506 mlx5_set_umr_mkey_seg_mtt(struct mlx5_wqe_mkey_context_seg *mkey,
507 			  struct spdk_mlx5_umr_attr *umr_attr)
508 {
509 	mkey->len = htobe64(umr_attr->umr_len);
510 }
511 
512 static void
513 mlx5_set_umr_mkey_seg(struct mlx5_wqe_mkey_context_seg *mkey,
514 		      struct spdk_mlx5_umr_attr *umr_attr)
515 {
516 	memset(mkey, 0, 64);
517 	mlx5_set_umr_mkey_seg_mtt(mkey, umr_attr);
518 }
519 
520 static inline void
521 mlx5_set_umr_inline_klm_seg(struct mlx5_wqe_umr_klm_seg *klm, struct ibv_sge *sge)
522 {
523 	klm->byte_count = htobe32(sge->length);
524 	klm->mkey = htobe32(sge->lkey);
525 	klm->address = htobe64(sge->addr);
526 }
527 
528 static void *
529 mlx5_build_inline_mtt(struct mlx5_hw_qp *qp, uint32_t *to_end, struct mlx5_wqe_umr_klm_seg *dst_klm,
530 		      struct spdk_mlx5_umr_attr *umr_attr)
531 {
532 	struct ibv_sge *src_sge = umr_attr->sge;
533 	int num_wqebbs = umr_attr->sge_count / 4;
534 	int tail = umr_attr->sge_count & 0x3;
535 	int i;
536 
537 	for (i = 0; i < num_wqebbs; i++) {
538 		mlx5_set_umr_inline_klm_seg(&dst_klm[0], src_sge++);
539 		mlx5_set_umr_inline_klm_seg(&dst_klm[1], src_sge++);
540 		mlx5_set_umr_inline_klm_seg(&dst_klm[2], src_sge++);
541 		mlx5_set_umr_inline_klm_seg(&dst_klm[3], src_sge++);
542 		/* sizeof(*dst_klm) * 4 == MLX5_SEND_WQE_BB */
543 		dst_klm = mlx5_qp_get_next_wqebb(qp, to_end, dst_klm);
544 	}
545 
546 	if (!tail) {
547 		return dst_klm;
548 	}
549 
550 	for (i = 0; i < tail; i++) {
551 		mlx5_set_umr_inline_klm_seg(&dst_klm[i], src_sge++);
552 	}
553 
554 	/* Fill PAD entries to make whole mtt aligned to 64B(MLX5_SEND_WQE_BB) */
555 	memset(&dst_klm[i], 0, MLX5_SEND_WQE_BB - sizeof(struct mlx5_wqe_umr_klm_seg) * tail);
556 
557 	return mlx5_qp_get_next_wqebb(qp, to_end, dst_klm);
558 }
559 
560 static inline void
561 mlx5_set_umr_crypto_bsf_seg(struct mlx5_crypto_bsf_seg *bsf, struct spdk_mlx5_umr_crypto_attr *attr,
562 			    uint32_t raw_data_size, uint8_t bsf_size)
563 {
564 	uint64_t *iv = (void *)bsf->xts_initial_tweak;
565 
566 	memset(bsf, 0, sizeof(*bsf));
567 	switch (attr->tweak_mode) {
568 	case SPDK_MLX5_CRYPTO_KEY_TWEAK_MODE_SIMPLE_LBA_LE:
569 		iv[0] = htole64(attr->xts_iv);
570 		iv[1] = 0;
571 		break;
572 	case SPDK_MLX5_CRYPTO_KEY_TWEAK_MODE_SIMPLE_LBA_BE:
573 		iv[0] = 0;
574 		iv[1] = htobe64(attr->xts_iv);
575 		break;
576 	default:
577 		assert(false && "unsupported tweak mode");
578 		break;
579 	}
580 
581 	bsf->size_type = (bsf_size << 6) | MLX5_CRYPTO_BSF_P_TYPE_CRYPTO;
582 	bsf->enc_order = attr->enc_order;
583 	bsf->raw_data_size = htobe32(raw_data_size);
584 	bsf->crypto_block_size_pointer = attr->bs_selector;
585 	bsf->dek_pointer = htobe32(attr->dek_obj_id);
586 	*((uint64_t *)bsf->keytag) = attr->keytag;
587 }
588 
589 static inline void
590 mlx5_umr_configure_with_wrap_around_crypto(struct spdk_mlx5_qp *qp,
591 		struct spdk_mlx5_umr_attr *umr_attr, struct spdk_mlx5_umr_crypto_attr *crypto_attr, uint64_t wr_id,
592 		uint32_t flags, uint32_t wqe_size, uint32_t umr_wqe_n_bb, uint32_t mtt_size)
593 {
594 	struct mlx5_hw_qp *hw = &qp->hw;
595 	struct mlx5_wqe_ctrl_seg *ctrl;
596 	struct mlx5_wqe_ctrl_seg *gen_ctrl;
597 	struct mlx5_wqe_umr_ctrl_seg *umr_ctrl;
598 	struct mlx5_wqe_mkey_context_seg *mkey;
599 	struct mlx5_wqe_umr_klm_seg *klm;
600 	struct mlx5_crypto_bsf_seg *bsf;
601 	uint8_t fm_ce_se;
602 	uint32_t pi, to_end;
603 
604 	fm_ce_se = mlx5_qp_fm_ce_se_update(qp, (uint8_t)flags);
605 
606 	ctrl = (struct mlx5_wqe_ctrl_seg *)mlx5_qp_get_wqe_bb(hw);
607 	pi = hw->sq_pi & (hw->sq_wqe_cnt - 1);
608 	to_end = (hw->sq_wqe_cnt - pi) * MLX5_SEND_WQE_BB;
609 
610 	/*
611 	 * sizeof(gen_ctrl) + sizeof(umr_ctrl) == MLX5_SEND_WQE_BB,
612 	 * so do not need to worry about wqe buffer wrap around.
613 	 *
614 	 * build genenal ctrl segment
615 	 */
616 	gen_ctrl = ctrl;
617 	mlx5_set_ctrl_seg(gen_ctrl, hw->sq_pi, MLX5_OPCODE_UMR, 0,
618 			  hw->qp_num, fm_ce_se,
619 			  SPDK_CEIL_DIV(wqe_size, 16), 0,
620 			  htobe32(umr_attr->mkey));
621 
622 	/* build umr ctrl segment */
623 	umr_ctrl = (struct mlx5_wqe_umr_ctrl_seg *)(gen_ctrl + 1);
624 	memset(umr_ctrl, 0, sizeof(*umr_ctrl));
625 	mlx5_set_umr_ctrl_seg_mtt(umr_ctrl, mtt_size);
626 	mlx5_set_umr_ctrl_seg_bsf_size(umr_ctrl, sizeof(struct mlx5_crypto_bsf_seg));
627 
628 	/* build mkey context segment */
629 	mkey = mlx5_qp_get_next_wqebb(hw, &to_end, ctrl);
630 	mlx5_set_umr_mkey_seg(mkey, umr_attr);
631 
632 	klm = mlx5_qp_get_next_wqebb(hw, &to_end, mkey);
633 	bsf = mlx5_build_inline_mtt(hw, &to_end, klm, umr_attr);
634 
635 	mlx5_set_umr_crypto_bsf_seg(bsf, crypto_attr, umr_attr->umr_len, MLX5_CRYPTO_BSF_SIZE_64B);
636 
637 	mlx5_qp_wqe_submit(qp, ctrl, umr_wqe_n_bb, pi);
638 
639 	mlx5_qp_set_comp(qp, pi, wr_id, fm_ce_se, umr_wqe_n_bb);
640 	assert(qp->tx_available >= umr_wqe_n_bb);
641 	qp->tx_available -= umr_wqe_n_bb;
642 }
643 
644 static inline void
645 mlx5_umr_configure_full_crypto(struct spdk_mlx5_qp *dv_qp, struct spdk_mlx5_umr_attr *umr_attr,
646 			       struct spdk_mlx5_umr_crypto_attr *crypto_attr, uint64_t wr_id,
647 			       uint32_t flags, uint32_t wqe_size, uint32_t umr_wqe_n_bb,
648 			       uint32_t mtt_size)
649 {
650 	struct mlx5_hw_qp *hw = &dv_qp->hw;
651 	struct mlx5_wqe_ctrl_seg *ctrl;
652 	struct mlx5_wqe_ctrl_seg *gen_ctrl;
653 	struct mlx5_wqe_umr_ctrl_seg *umr_ctrl;
654 	struct mlx5_wqe_mkey_context_seg *mkey;
655 	struct mlx5_wqe_umr_klm_seg *klm;
656 	struct mlx5_crypto_bsf_seg *bsf;
657 	uint8_t fm_ce_se;
658 	uint32_t pi;
659 	uint32_t i;
660 
661 	fm_ce_se = mlx5_qp_fm_ce_se_update(dv_qp, (uint8_t)flags);
662 
663 	ctrl = (struct mlx5_wqe_ctrl_seg *)mlx5_qp_get_wqe_bb(hw);
664 	pi = hw->sq_pi & (hw->sq_wqe_cnt - 1);
665 	gen_ctrl = ctrl;
666 	mlx5_set_ctrl_seg(gen_ctrl, hw->sq_pi, MLX5_OPCODE_UMR, 0,
667 			  hw->qp_num, fm_ce_se,
668 			  SPDK_CEIL_DIV(wqe_size, 16), 0,
669 			  htobe32(umr_attr->mkey));
670 
671 	/* build umr ctrl segment */
672 	umr_ctrl = (struct mlx5_wqe_umr_ctrl_seg *)(gen_ctrl + 1);
673 	memset(umr_ctrl, 0, sizeof(*umr_ctrl));
674 	mlx5_set_umr_ctrl_seg_mtt(umr_ctrl, mtt_size);
675 	mlx5_set_umr_ctrl_seg_bsf_size(umr_ctrl, sizeof(struct mlx5_crypto_bsf_seg));
676 
677 	/* build mkey context segment */
678 	mkey = (struct mlx5_wqe_mkey_context_seg *)(umr_ctrl + 1);
679 	mlx5_set_umr_mkey_seg(mkey, umr_attr);
680 
681 	klm = (struct mlx5_wqe_umr_klm_seg *)(mkey + 1);
682 	for (i = 0; i < umr_attr->sge_count; i++) {
683 		mlx5_set_umr_inline_klm_seg(klm, &umr_attr->sge[i]);
684 		/* sizeof(*klm) * 4 == MLX5_SEND_WQE_BB */
685 		klm = klm + 1;
686 	}
687 	/* fill PAD if existing */
688 	/* PAD entries is to make whole mtt aligned to 64B(MLX5_SEND_WQE_BB),
689 	 * So it will not happen wrap around during fill PAD entries. */
690 	for (; i < mtt_size; i++) {
691 		memset(klm, 0, sizeof(*klm));
692 		klm = klm + 1;
693 	}
694 
695 	bsf = (struct mlx5_crypto_bsf_seg *)klm;
696 	mlx5_set_umr_crypto_bsf_seg(bsf, crypto_attr, umr_attr->umr_len, MLX5_CRYPTO_BSF_SIZE_64B);
697 
698 	mlx5_qp_wqe_submit(dv_qp, ctrl, umr_wqe_n_bb, pi);
699 
700 	mlx5_qp_set_comp(dv_qp, pi, wr_id, fm_ce_se, umr_wqe_n_bb);
701 	assert(dv_qp->tx_available >= umr_wqe_n_bb);
702 	dv_qp->tx_available -= umr_wqe_n_bb;
703 }
704 
705 int
706 spdk_mlx5_umr_configure_crypto(struct spdk_mlx5_qp *qp, struct spdk_mlx5_umr_attr *umr_attr,
707 			       struct spdk_mlx5_umr_crypto_attr *crypto_attr, uint64_t wr_id, uint32_t flags)
708 {
709 	struct mlx5_hw_qp *hw = &qp->hw;
710 	uint32_t pi, to_end, umr_wqe_n_bb;
711 	uint32_t wqe_size, mtt_size;
712 	uint32_t inline_klm_size;
713 
714 	if (!spdk_unlikely(umr_attr->sge_count)) {
715 		return -EINVAL;
716 	}
717 
718 	pi = hw->sq_pi & (hw->sq_wqe_cnt - 1);
719 	to_end = (hw->sq_wqe_cnt - pi) * MLX5_SEND_WQE_BB;
720 
721 	/*
722 	 * UMR WQE LAYOUT:
723 	 * -----------------------------------------------------------------------
724 	 * | gen_ctrl | umr_ctrl | mkey_ctx | inline klm mtt | inline crypto bsf |
725 	 * -----------------------------------------------------------------------
726 	 *   16bytes    48bytes    64bytes   sge_count*16 bytes      64 bytes
727 	 *
728 	 * Note: size of inline klm mtt should be aligned to 64 bytes.
729 	 */
730 	wqe_size = sizeof(struct mlx5_wqe_ctrl_seg) + sizeof(struct mlx5_wqe_umr_ctrl_seg) +
731 		   sizeof(struct mlx5_wqe_mkey_context_seg);
732 	mtt_size = SPDK_ALIGN_CEIL(umr_attr->sge_count, 4);
733 	inline_klm_size = mtt_size * sizeof(struct mlx5_wqe_umr_klm_seg);
734 	wqe_size += inline_klm_size;
735 	wqe_size += sizeof(struct mlx5_crypto_bsf_seg);
736 
737 	umr_wqe_n_bb = SPDK_CEIL_DIV(wqe_size, MLX5_SEND_WQE_BB);
738 	if (spdk_unlikely(umr_wqe_n_bb > qp->tx_available)) {
739 		return -ENOMEM;
740 	}
741 	if (spdk_unlikely(umr_attr->sge_count > qp->max_send_sge)) {
742 		return -E2BIG;
743 	}
744 
745 	if (spdk_unlikely(to_end < wqe_size)) {
746 		mlx5_umr_configure_with_wrap_around_crypto(qp, umr_attr, crypto_attr, wr_id, flags, wqe_size,
747 				umr_wqe_n_bb,
748 				mtt_size);
749 	} else {
750 		mlx5_umr_configure_full_crypto(qp, umr_attr, crypto_attr, wr_id, flags, wqe_size, umr_wqe_n_bb,
751 					       mtt_size);
752 	}
753 
754 	return 0;
755 }
756