1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 */ 4 5 #include "spdk/env.h" 6 #include "spdk/thread.h" 7 #include "spdk/queue.h" 8 #include "spdk/log.h" 9 #include "spdk/string.h" 10 #include "spdk/likely.h" 11 #include "spdk/dma.h" 12 #include "spdk/json.h" 13 #include "spdk/util.h" 14 15 #include "spdk_internal/mlx5.h" 16 #include "spdk_internal/rdma_utils.h" 17 #include "spdk/accel_module.h" 18 #include "spdk_internal/assert.h" 19 #include "spdk_internal/sgl.h" 20 #include "accel_mlx5.h" 21 22 #include <infiniband/mlx5dv.h> 23 #include <rdma/rdma_cma.h> 24 25 #define ACCEL_MLX5_QP_SIZE (256u) 26 #define ACCEL_MLX5_NUM_REQUESTS (2048u - 1) 27 #define ACCEL_MLX5_RECOVER_POLLER_PERIOD_US (10000) 28 #define ACCEL_MLX5_MAX_SGE (16u) 29 #define ACCEL_MLX5_MAX_WC (64u) 30 #define ACCEL_MLX5_MAX_MKEYS_IN_TASK (16u) 31 32 /* Assume we have up to 16 devices */ 33 #define ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN ((SPDK_MLX5_DEV_MAX_NAME_LEN + 1) * 16) 34 35 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, task) \ 36 do { \ 37 assert((qp)->wrs_submitted < (qp)->wrs_max); \ 38 (qp)->wrs_submitted++; \ 39 assert((task)->num_wrs < UINT16_MAX); \ 40 (task)->num_wrs++; \ 41 } while (0) 42 43 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, task) \ 44 do { \ 45 assert((dev)->wrs_in_cq < (dev)->wrs_in_cq_max); \ 46 (dev)->wrs_in_cq++; \ 47 assert((qp)->wrs_submitted < (qp)->wrs_max); \ 48 (qp)->wrs_submitted++; \ 49 assert((task)->num_wrs < UINT16_MAX); \ 50 (task)->num_wrs++; \ 51 } while (0) 52 53 struct accel_mlx5_io_channel; 54 struct accel_mlx5_task; 55 56 struct accel_mlx5_dev_ctx { 57 struct ibv_context *context; 58 struct ibv_pd *pd; 59 struct spdk_memory_domain *domain; 60 TAILQ_ENTRY(accel_mlx5_dev_ctx) link; 61 bool crypto_mkeys; 62 bool crypto_multi_block; 63 }; 64 65 struct accel_mlx5_module { 66 struct spdk_accel_module_if module; 67 struct accel_mlx5_dev_ctx *dev_ctxs; 68 uint32_t num_ctxs; 69 struct accel_mlx5_attr attr; 70 char **allowed_devs; 71 size_t allowed_devs_count; 72 bool initialized; 73 bool enabled; 74 bool crypto_supported; 75 }; 76 77 struct accel_mlx5_sge { 78 uint32_t src_sge_count; 79 uint32_t dst_sge_count; 80 struct ibv_sge src_sge[ACCEL_MLX5_MAX_SGE]; 81 struct ibv_sge dst_sge[ACCEL_MLX5_MAX_SGE]; 82 }; 83 84 struct accel_mlx5_iov_sgl { 85 struct iovec *iov; 86 uint32_t iovcnt; 87 uint32_t iov_offset; 88 }; 89 90 struct accel_mlx5_task { 91 struct spdk_accel_task base; 92 struct accel_mlx5_iov_sgl src; 93 struct accel_mlx5_iov_sgl dst; 94 struct accel_mlx5_qp *qp; 95 STAILQ_ENTRY(accel_mlx5_task) link; 96 uint16_t num_reqs; 97 uint16_t num_completed_reqs; 98 uint16_t num_submitted_reqs; 99 uint16_t num_ops; /* number of allocated mkeys */ 100 uint16_t blocks_per_req; 101 uint16_t num_processed_blocks; 102 uint16_t num_blocks; 103 uint16_t num_wrs; /* Number of outstanding operations which consume qp slot */ 104 union { 105 uint8_t raw; 106 struct { 107 uint8_t inplace : 1; 108 uint8_t enc_order : 2; 109 }; 110 }; 111 /* Keep this array last since not all elements might be accessed, this reduces amount of data to be 112 * cached */ 113 struct spdk_mlx5_mkey_pool_obj *mkeys[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 114 }; 115 116 struct accel_mlx5_qp { 117 struct spdk_mlx5_qp *qp; 118 struct ibv_qp *verbs_qp; 119 struct accel_mlx5_dev *dev; 120 struct accel_mlx5_io_channel *ch; 121 /* tasks submitted to HW. We can't complete a task even in error case until we reap completions for all 122 * submitted requests */ 123 STAILQ_HEAD(, accel_mlx5_task) in_hw; 124 uint16_t wrs_submitted; 125 uint16_t wrs_max; 126 bool recovering; 127 struct spdk_poller *recover_poller; 128 }; 129 130 struct accel_mlx5_dev { 131 struct accel_mlx5_qp qp; 132 struct spdk_mlx5_cq *cq; 133 struct spdk_mlx5_mkey_pool *crypto_mkeys; 134 struct spdk_rdma_utils_mem_map *mmap; 135 struct accel_mlx5_dev_ctx *dev_ctx; 136 uint16_t wrs_in_cq; 137 uint16_t wrs_in_cq_max; 138 uint16_t crypto_split_blocks; 139 bool crypto_multi_block; 140 /* Pending tasks waiting for requests resources */ 141 STAILQ_HEAD(, accel_mlx5_task) nomem; 142 TAILQ_ENTRY(accel_mlx5_dev) link; 143 }; 144 145 struct accel_mlx5_io_channel { 146 struct accel_mlx5_dev *devs; 147 struct spdk_poller *poller; 148 uint32_t num_devs; 149 /* Index in \b devs to be used for operations in round-robin way */ 150 uint32_t dev_idx; 151 }; 152 153 static struct accel_mlx5_module g_accel_mlx5; 154 155 static inline void 156 accel_mlx5_iov_sgl_init(struct accel_mlx5_iov_sgl *s, struct iovec *iov, uint32_t iovcnt) 157 { 158 s->iov = iov; 159 s->iovcnt = iovcnt; 160 s->iov_offset = 0; 161 } 162 163 static inline void 164 accel_mlx5_iov_sgl_advance(struct accel_mlx5_iov_sgl *s, uint32_t step) 165 { 166 s->iov_offset += step; 167 while (s->iovcnt > 0) { 168 assert(s->iov != NULL); 169 if (s->iov_offset < s->iov->iov_len) { 170 break; 171 } 172 173 s->iov_offset -= s->iov->iov_len; 174 s->iov++; 175 s->iovcnt--; 176 } 177 } 178 179 static inline void 180 accel_mlx5_iov_sgl_unwind(struct accel_mlx5_iov_sgl *s, uint32_t max_iovs, uint32_t step) 181 { 182 SPDK_DEBUGLOG(accel_mlx5, "iov %p, iovcnt %u, max %u, offset %u, step %u\n", s->iov, s->iovcnt, 183 max_iovs, s->iov_offset, step); 184 while (s->iovcnt <= max_iovs) { 185 assert(s->iov != NULL); 186 if (s->iov_offset >= step) { 187 s->iov_offset -= step; 188 SPDK_DEBUGLOG(accel_mlx5, "\tEND, iov %p, iovcnt %u, offset %u\n", s->iov, s->iovcnt, 189 s->iov_offset); 190 return; 191 } 192 step -= s->iov_offset; 193 s->iov--; 194 s->iovcnt++; 195 s->iov_offset = s->iov->iov_len; 196 SPDK_DEBUGLOG(accel_mlx5, "\tiov %p, iovcnt %u, offset %u, step %u\n", s->iov, s->iovcnt, 197 s->iov_offset, step); 198 } 199 200 SPDK_ERRLOG("Can't unwind iovs, remaining %u\n", step); 201 assert(0); 202 } 203 204 static inline int 205 accel_mlx5_sge_unwind(struct ibv_sge *sge, uint32_t sge_count, uint32_t step) 206 { 207 int i; 208 209 assert(sge_count > 0); 210 SPDK_DEBUGLOG(accel_mlx5, "sge %p, count %u, step %u\n", sge, sge_count, step); 211 for (i = (int)sge_count - 1; i >= 0; i--) { 212 if (sge[i].length > step) { 213 sge[i].length -= step; 214 SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step); 215 return (int)i + 1; 216 } 217 SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step); 218 step -= sge[i].length; 219 } 220 221 SPDK_ERRLOG("Can't unwind sge, remaining %u\n", step); 222 assert(step == 0); 223 224 return 0; 225 } 226 227 static inline void 228 accel_mlx5_task_complete(struct accel_mlx5_task *task) 229 { 230 struct accel_mlx5_dev *dev = task->qp->dev; 231 232 assert(task->num_reqs == task->num_completed_reqs); 233 SPDK_DEBUGLOG(accel_mlx5, "Complete task %p, opc %d\n", task, task->base.op_code); 234 235 if (task->num_ops) { 236 spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops); 237 } 238 spdk_accel_task_complete(&task->base, 0); 239 } 240 241 static inline void 242 accel_mlx5_task_fail(struct accel_mlx5_task *task, int rc) 243 { 244 struct accel_mlx5_dev *dev = task->qp->dev; 245 246 assert(task->num_reqs == task->num_completed_reqs); 247 SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->base.op_code, rc); 248 249 if (task->num_ops) { 250 spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops); 251 } 252 spdk_accel_task_complete(&task->base, rc); 253 } 254 255 static int 256 accel_mlx5_translate_addr(void *addr, size_t size, struct spdk_memory_domain *domain, 257 void *domain_ctx, struct accel_mlx5_dev *dev, struct ibv_sge *sge) 258 { 259 struct spdk_rdma_utils_memory_translation map_translation; 260 struct spdk_memory_domain_translation_result domain_translation; 261 struct spdk_memory_domain_translation_ctx local_ctx; 262 int rc; 263 264 if (domain) { 265 domain_translation.size = sizeof(struct spdk_memory_domain_translation_result); 266 local_ctx.size = sizeof(local_ctx); 267 local_ctx.rdma.ibv_qp = dev->qp.verbs_qp; 268 rc = spdk_memory_domain_translate_data(domain, domain_ctx, dev->dev_ctx->domain, 269 &local_ctx, addr, size, &domain_translation); 270 if (spdk_unlikely(rc || domain_translation.iov_count != 1)) { 271 SPDK_ERRLOG("Memory domain translation failed, addr %p, length %zu, iovcnt %u\n", addr, size, 272 domain_translation.iov_count); 273 if (rc == 0) { 274 rc = -EINVAL; 275 } 276 277 return rc; 278 } 279 sge->lkey = domain_translation.rdma.lkey; 280 sge->addr = (uint64_t) domain_translation.iov.iov_base; 281 sge->length = domain_translation.iov.iov_len; 282 } else { 283 rc = spdk_rdma_utils_get_translation(dev->mmap, addr, size, 284 &map_translation); 285 if (spdk_unlikely(rc)) { 286 SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", addr, size); 287 return rc; 288 } 289 sge->lkey = spdk_rdma_utils_memory_translation_get_lkey(&map_translation); 290 sge->addr = (uint64_t)addr; 291 sge->length = size; 292 } 293 294 return 0; 295 } 296 297 static inline int 298 accel_mlx5_fill_block_sge(struct accel_mlx5_dev *dev, struct ibv_sge *sge, 299 struct accel_mlx5_iov_sgl *iovs, uint32_t len, uint32_t *_remaining, 300 struct spdk_memory_domain *domain, void *domain_ctx) 301 { 302 void *addr; 303 uint32_t remaining = len; 304 uint32_t size; 305 int i = 0; 306 int rc; 307 308 while (remaining && i < (int)ACCEL_MLX5_MAX_SGE) { 309 size = spdk_min(remaining, iovs->iov->iov_len - iovs->iov_offset); 310 addr = (void *)iovs->iov->iov_base + iovs->iov_offset; 311 rc = accel_mlx5_translate_addr(addr, size, domain, domain_ctx, dev, &sge[i]); 312 if (spdk_unlikely(rc)) { 313 return rc; 314 } 315 SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d]: lkey %u, len %u, addr %"PRIx64"\n", i, sge[i].lkey, 316 sge[i].length, sge[i].addr); 317 accel_mlx5_iov_sgl_advance(iovs, size); 318 i++; 319 assert(remaining >= size); 320 remaining -= size; 321 } 322 *_remaining = remaining; 323 324 return i; 325 } 326 327 static inline bool 328 accel_mlx5_compare_iovs(struct iovec *v1, struct iovec *v2, uint32_t iovcnt) 329 { 330 return memcmp(v1, v2, sizeof(*v1) * iovcnt) == 0; 331 } 332 333 static inline uint16_t 334 accel_mlx5_dev_get_available_slots(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp) 335 { 336 assert(qp->wrs_max >= qp->wrs_submitted); 337 assert(dev->wrs_in_cq_max >= dev->wrs_in_cq); 338 339 /* Each time we produce only 1 CQE, so we need 1 CQ slot */ 340 if (spdk_unlikely(dev->wrs_in_cq == dev->wrs_in_cq_max)) { 341 return 0; 342 } 343 344 return qp->wrs_max - qp->wrs_submitted; 345 } 346 347 static inline uint32_t 348 accel_mlx5_task_alloc_mkeys(struct accel_mlx5_task *task) 349 { 350 struct accel_mlx5_dev *dev = task->qp->dev; 351 uint32_t num_ops; 352 int rc; 353 354 assert(task->num_reqs > task->num_completed_reqs); 355 num_ops = task->num_reqs - task->num_completed_reqs; 356 num_ops = spdk_min(num_ops, ACCEL_MLX5_MAX_MKEYS_IN_TASK); 357 if (!num_ops) { 358 return 0; 359 } 360 rc = spdk_mlx5_mkey_pool_get_bulk(dev->crypto_mkeys, task->mkeys, num_ops); 361 if (spdk_unlikely(rc)) { 362 return 0; 363 } 364 assert(num_ops <= UINT16_MAX); 365 task->num_ops = num_ops; 366 367 return num_ops; 368 } 369 370 static inline uint8_t 371 bs_to_bs_selector(uint32_t bs) 372 { 373 switch (bs) { 374 case 512: 375 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_512; 376 case 520: 377 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_520; 378 case 4096: 379 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4096; 380 case 4160: 381 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4160; 382 default: 383 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED; 384 } 385 } 386 387 static inline int 388 accel_mlx5_configure_crypto_umr(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge, 389 uint32_t mkey, uint32_t num_blocks, struct spdk_mlx5_crypto_dek_data *dek_data) 390 { 391 struct spdk_mlx5_umr_crypto_attr cattr; 392 struct spdk_mlx5_umr_attr umr_attr; 393 struct accel_mlx5_qp *qp = mlx5_task->qp; 394 struct accel_mlx5_dev *dev = qp->dev; 395 struct spdk_accel_task *task = &mlx5_task->base; 396 uint32_t length, remaining = 0, block_size = task->block_size; 397 int rc; 398 399 length = num_blocks * block_size; 400 SPDK_DEBUGLOG(accel_mlx5, "task %p, domain %p, len %u, blocks %u\n", task, task->src_domain, length, 401 num_blocks); 402 rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src, length, &remaining, 403 task->src_domain, task->src_domain_ctx); 404 if (spdk_unlikely(rc <= 0)) { 405 if (rc == 0) { 406 rc = -EINVAL; 407 } 408 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 409 return rc; 410 } 411 sge->src_sge_count = rc; 412 if (spdk_unlikely(remaining)) { 413 uint32_t new_len = length - remaining; 414 uint32_t aligned_len, updated_num_blocks; 415 416 SPDK_DEBUGLOG(accel_mlx5, "Incorrect src iovs, handled %u out of %u bytes\n", new_len, length); 417 if (new_len < block_size) { 418 /* We need to process at least 1 block. If buffer is too fragmented, we can't do 419 * anything */ 420 return -ERANGE; 421 } 422 423 /* Regular integer division, we need to round down to prev block size */ 424 updated_num_blocks = new_len / block_size; 425 assert(updated_num_blocks); 426 assert(updated_num_blocks < num_blocks); 427 aligned_len = updated_num_blocks * block_size; 428 429 if (aligned_len < new_len) { 430 uint32_t dt = new_len - aligned_len; 431 432 /* We can't process part of block, need to unwind src iov_sgl and sge to the 433 * prev block boundary */ 434 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt); 435 accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt); 436 sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt); 437 if (!sge->src_sge_count) { 438 return -ERANGE; 439 } 440 } 441 SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len); 442 length = aligned_len; 443 num_blocks = updated_num_blocks; 444 } 445 446 cattr.xts_iv = task->iv + mlx5_task->num_processed_blocks; 447 cattr.keytag = 0; 448 cattr.dek_obj_id = dek_data->dek_obj_id; 449 cattr.tweak_mode = dek_data->tweak_mode; 450 cattr.enc_order = mlx5_task->enc_order; 451 cattr.bs_selector = bs_to_bs_selector(mlx5_task->base.block_size); 452 if (spdk_unlikely(cattr.bs_selector == SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED)) { 453 SPDK_ERRLOG("unsupported block size %u\n", mlx5_task->base.block_size); 454 return -EINVAL; 455 } 456 umr_attr.mkey = mkey; 457 umr_attr.sge = sge->src_sge; 458 459 if (!mlx5_task->inplace) { 460 SPDK_DEBUGLOG(accel_mlx5, "task %p, dst sge, domain %p, len %u\n", task, task->dst_domain, length); 461 rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, length, &remaining, 462 task->dst_domain, task->dst_domain_ctx); 463 if (spdk_unlikely(rc <= 0)) { 464 if (rc == 0) { 465 rc = -EINVAL; 466 } 467 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 468 return rc; 469 } 470 sge->dst_sge_count = rc; 471 if (spdk_unlikely(remaining)) { 472 uint32_t new_len = length - remaining; 473 uint32_t aligned_len, updated_num_blocks, dt; 474 475 SPDK_DEBUGLOG(accel_mlx5, "Incorrect dst iovs, handled %u out of %u bytes\n", new_len, length); 476 if (new_len < block_size) { 477 /* We need to process at least 1 block. If buffer is too fragmented, we can't do 478 * anything */ 479 return -ERANGE; 480 } 481 482 /* Regular integer division, we need to round down to prev block size */ 483 updated_num_blocks = new_len / block_size; 484 assert(updated_num_blocks); 485 assert(updated_num_blocks < num_blocks); 486 aligned_len = updated_num_blocks * block_size; 487 488 if (aligned_len < new_len) { 489 dt = new_len - aligned_len; 490 assert(dt > 0 && dt < length); 491 /* We can't process part of block, need to unwind src and dst iov_sgl and sge to the 492 * prev block boundary */ 493 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind dst sge for %u bytes\n", task, dt); 494 accel_mlx5_iov_sgl_unwind(&mlx5_task->dst, task->d.iovcnt, dt); 495 sge->dst_sge_count = accel_mlx5_sge_unwind(sge->dst_sge, sge->dst_sge_count, dt); 496 assert(sge->dst_sge_count > 0 && sge->dst_sge_count <= ACCEL_MLX5_MAX_SGE); 497 if (!sge->dst_sge_count) { 498 return -ERANGE; 499 } 500 } 501 assert(length > aligned_len); 502 dt = length - aligned_len; 503 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt); 504 /* The same for src iov_sgl and sge. In worst case we can unwind SRC 2 times */ 505 accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt); 506 sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt); 507 assert(sge->src_sge_count > 0 && sge->src_sge_count <= ACCEL_MLX5_MAX_SGE); 508 if (!sge->src_sge_count) { 509 return -ERANGE; 510 } 511 SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len); 512 length = aligned_len; 513 num_blocks = updated_num_blocks; 514 } 515 } 516 517 SPDK_DEBUGLOG(accel_mlx5, 518 "task %p: bs %u, iv %"PRIu64", enc_on_tx %d, tweak_mode %d, len %u, mkey %x, blocks %u\n", 519 mlx5_task, task->block_size, cattr.xts_iv, mlx5_task->enc_order, cattr.tweak_mode, length, mkey, 520 num_blocks); 521 522 umr_attr.sge_count = sge->src_sge_count; 523 umr_attr.umr_len = length; 524 assert((uint32_t)mlx5_task->num_processed_blocks + num_blocks <= UINT16_MAX); 525 mlx5_task->num_processed_blocks += num_blocks; 526 527 rc = spdk_mlx5_umr_configure_crypto(qp->qp, &umr_attr, &cattr, 0, 0); 528 529 return rc; 530 } 531 532 static inline int 533 accel_mlx5_task_process(struct accel_mlx5_task *mlx5_task) 534 { 535 struct accel_mlx5_sge sges[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 536 struct spdk_mlx5_crypto_dek_data dek_data; 537 struct accel_mlx5_qp *qp = mlx5_task->qp; 538 struct accel_mlx5_dev *dev = qp->dev; 539 /* First RDMA after UMR must have a SMALL_FENCE */ 540 uint32_t first_rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE; 541 uint16_t num_blocks; 542 uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs, 543 mlx5_task->num_ops); 544 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 545 uint16_t i; 546 int rc; 547 548 assert(qp_slot > 1); 549 num_ops = spdk_min(num_ops, qp_slot >> 1); 550 if (spdk_unlikely(!num_ops)) { 551 return -EINVAL; 552 } 553 554 rc = spdk_mlx5_crypto_get_dek_data(mlx5_task->base.crypto_key->priv, dev->dev_ctx->pd, &dek_data); 555 if (spdk_unlikely(rc)) { 556 return rc; 557 } 558 559 mlx5_task->num_wrs = 0; 560 SPDK_DEBUGLOG(accel_mlx5, "begin, task, %p, reqs: total %u, submitted %u, completed %u\n", 561 mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 562 for (i = 0; i < num_ops; i++) { 563 if (mlx5_task->num_submitted_reqs + i + 1 == mlx5_task->num_reqs) { 564 /* Last request may consume less than calculated if crypto_multi_block is true */ 565 assert(mlx5_task->num_blocks > mlx5_task->num_submitted_reqs); 566 num_blocks = mlx5_task->num_blocks - mlx5_task->num_processed_blocks; 567 } else { 568 num_blocks = mlx5_task->blocks_per_req; 569 } 570 571 rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sges[i], mlx5_task->mkeys[i]->mkey, num_blocks, 572 &dek_data); 573 if (spdk_unlikely(rc)) { 574 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 575 return rc; 576 } 577 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 578 } 579 580 /* Loop `num_ops - 1` for easy flags handling */ 581 for (i = 0; i < num_ops - 1; i++) { 582 /* UMR is used as a destination for RDMA_READ - from UMR to sge */ 583 if (mlx5_task->inplace) { 584 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0, 585 mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence); 586 } else { 587 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0, 588 mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence); 589 } 590 if (spdk_unlikely(rc)) { 591 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 592 return rc; 593 } 594 595 first_rdma_fence = 0; 596 assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs); 597 assert(mlx5_task->num_submitted_reqs < UINT16_MAX); 598 mlx5_task->num_submitted_reqs++; 599 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 600 } 601 602 if (mlx5_task->inplace) { 603 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0, 604 mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 605 } else { 606 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0, 607 mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 608 } 609 if (spdk_unlikely(rc)) { 610 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 611 return rc; 612 } 613 614 assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs); 615 assert(mlx5_task->num_submitted_reqs < UINT16_MAX); 616 mlx5_task->num_submitted_reqs++; 617 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 618 STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link); 619 620 if (spdk_unlikely(mlx5_task->num_submitted_reqs == mlx5_task->num_reqs && 621 mlx5_task->num_blocks > mlx5_task->num_processed_blocks)) { 622 /* We hit "out of sge 623 * entries" case with highly fragmented payload. In that case 624 * accel_mlx5_configure_crypto_umr function handled fewer data blocks than expected 625 * That means we need at least 1 more request to complete this task, this request will be 626 * executed once all submitted ones are completed */ 627 SPDK_DEBUGLOG(accel_mlx5, "task %p, processed %u/%u blocks, add extra req\n", mlx5_task, 628 mlx5_task->num_processed_blocks, mlx5_task->num_blocks); 629 mlx5_task->num_reqs++; 630 } 631 632 SPDK_DEBUGLOG(accel_mlx5, "end, task, %p, reqs: total %u, submitted %u, completed %u\n", mlx5_task, 633 mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 634 635 return 0; 636 } 637 638 static inline int 639 accel_mlx5_task_continue(struct accel_mlx5_task *task) 640 { 641 struct accel_mlx5_qp *qp = task->qp; 642 struct accel_mlx5_dev *dev = qp->dev; 643 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 644 645 if (spdk_unlikely(qp->recovering)) { 646 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 647 return 0; 648 } 649 650 assert(task->num_reqs > task->num_completed_reqs); 651 if (task->num_ops == 0) { 652 /* No mkeys allocated, try to allocate now */ 653 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task))) { 654 /* Pool is empty, queue this task */ 655 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 656 return -ENOMEM; 657 } 658 } 659 /* We need to post at least 1 UMR and 1 RDMA operation */ 660 if (spdk_unlikely(qp_slot < 2)) { 661 /* QP is full, queue this task */ 662 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 663 return -ENOMEM; 664 } 665 666 return accel_mlx5_task_process(task); 667 } 668 669 static inline int 670 accel_mlx5_task_init(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_dev *dev) 671 { 672 struct spdk_accel_task *task = &mlx5_task->base; 673 uint64_t src_nbytes = task->nbytes; 674 #ifdef DEBUG 675 uint64_t dst_nbytes; 676 uint32_t i; 677 #endif 678 switch (task->op_code) { 679 case SPDK_ACCEL_OPC_ENCRYPT: 680 mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE; 681 break; 682 case SPDK_ACCEL_OPC_DECRYPT: 683 mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_MEMORY; 684 break; 685 default: 686 SPDK_ERRLOG("Unsupported accel opcode %d\n", task->op_code); 687 return -ENOTSUP; 688 } 689 690 if (spdk_unlikely(src_nbytes % mlx5_task->base.block_size != 0)) { 691 return -EINVAL; 692 } 693 694 mlx5_task->qp = &dev->qp; 695 mlx5_task->num_completed_reqs = 0; 696 mlx5_task->num_submitted_reqs = 0; 697 mlx5_task->num_ops = 0; 698 mlx5_task->num_processed_blocks = 0; 699 assert(src_nbytes / mlx5_task->base.block_size <= UINT16_MAX); 700 mlx5_task->num_blocks = src_nbytes / mlx5_task->base.block_size; 701 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 702 if (task->d.iovcnt == 0 || (task->d.iovcnt == task->s.iovcnt && 703 accel_mlx5_compare_iovs(task->d.iovs, task->s.iovs, task->s.iovcnt))) { 704 mlx5_task->inplace = 1; 705 } else { 706 #ifdef DEBUG 707 dst_nbytes = 0; 708 for (i = 0; i < task->d.iovcnt; i++) { 709 dst_nbytes += task->d.iovs[i].iov_len; 710 } 711 712 if (spdk_unlikely(src_nbytes != dst_nbytes)) { 713 return -EINVAL; 714 } 715 #endif 716 mlx5_task->inplace = 0; 717 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 718 } 719 720 if (dev->crypto_multi_block) { 721 if (dev->crypto_split_blocks) { 722 assert(SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks) <= UINT16_MAX); 723 mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks); 724 /* Last req may consume less blocks */ 725 mlx5_task->blocks_per_req = spdk_min(mlx5_task->num_blocks, dev->crypto_split_blocks); 726 } else { 727 if (task->s.iovcnt > ACCEL_MLX5_MAX_SGE || task->d.iovcnt > ACCEL_MLX5_MAX_SGE) { 728 uint32_t max_sge_count = spdk_max(task->s.iovcnt, task->d.iovcnt); 729 730 assert(SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE) <= UINT16_MAX); 731 mlx5_task->num_reqs = SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE); 732 mlx5_task->blocks_per_req = SPDK_CEIL_DIV(mlx5_task->num_blocks, mlx5_task->num_reqs); 733 } else { 734 mlx5_task->num_reqs = 1; 735 mlx5_task->blocks_per_req = mlx5_task->num_blocks; 736 } 737 } 738 } else { 739 mlx5_task->num_reqs = mlx5_task->num_blocks; 740 mlx5_task->blocks_per_req = 1; 741 } 742 743 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(mlx5_task))) { 744 /* Pool is empty, queue this task */ 745 SPDK_DEBUGLOG(accel_mlx5, "no reqs in pool, dev %s\n", dev->dev_ctx->context->device->name); 746 return -ENOMEM; 747 } 748 if (spdk_unlikely(accel_mlx5_dev_get_available_slots(dev, &dev->qp) < 2)) { 749 /* Queue is full, queue this task */ 750 SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", dev->dev_ctx->context->device->name, 751 mlx5_task->qp); 752 return -ENOMEM; 753 } 754 755 SPDK_DEBUGLOG(accel_mlx5, "task %p, src_iovs %u, dst_iovs %u, num_reqs %u, " 756 "blocks/req %u, blocks %u, inplace %d\n", task, task->s.iovcnt, task->d.iovcnt, 757 mlx5_task->num_reqs, mlx5_task->blocks_per_req, mlx5_task->num_blocks, mlx5_task->inplace); 758 759 return 0; 760 } 761 762 static int 763 accel_mlx5_submit_tasks(struct spdk_io_channel *_ch, struct spdk_accel_task *task) 764 { 765 struct accel_mlx5_io_channel *ch = spdk_io_channel_get_ctx(_ch); 766 struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base); 767 struct accel_mlx5_dev *dev; 768 int rc; 769 770 if (!g_accel_mlx5.enabled || !task->crypto_key || 771 task->crypto_key->module_if != &g_accel_mlx5.module || 772 !task->crypto_key->priv) { 773 return -EINVAL; 774 } 775 dev = &ch->devs[ch->dev_idx]; 776 ch->dev_idx++; 777 if (ch->dev_idx == ch->num_devs) { 778 ch->dev_idx = 0; 779 } 780 781 rc = accel_mlx5_task_init(mlx5_task, dev); 782 if (spdk_unlikely(rc)) { 783 if (rc == -ENOMEM) { 784 SPDK_DEBUGLOG(accel_mlx5, "no reqs to handle new task %p (required %u), put to queue\n", mlx5_task, 785 mlx5_task->num_reqs); 786 STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link); 787 return 0; 788 } 789 return rc; 790 } 791 792 if (spdk_unlikely(mlx5_task->qp->recovering)) { 793 STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link); 794 return 0; 795 } 796 797 return accel_mlx5_task_process(mlx5_task); 798 } 799 800 static void accel_mlx5_recover_qp(struct accel_mlx5_qp *qp); 801 802 static int 803 accel_mlx5_recover_qp_poller(void *arg) 804 { 805 struct accel_mlx5_qp *qp = arg; 806 807 spdk_poller_unregister(&qp->recover_poller); 808 accel_mlx5_recover_qp(qp); 809 return SPDK_POLLER_BUSY; 810 } 811 812 static void 813 accel_mlx5_recover_qp(struct accel_mlx5_qp *qp) 814 { 815 struct accel_mlx5_dev *dev = qp->dev; 816 struct spdk_mlx5_qp_attr mlx5_qp_attr = {}; 817 int rc; 818 819 SPDK_NOTICELOG("Recovering qp %p, core %u\n", qp, spdk_env_get_current_core()); 820 if (qp->qp) { 821 spdk_mlx5_qp_destroy(qp->qp); 822 qp->qp = NULL; 823 } 824 825 mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size; 826 mlx5_qp_attr.cap.max_recv_wr = 0; 827 mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE; 828 mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE; 829 830 rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp); 831 if (rc) { 832 SPDK_ERRLOG("Failed to create mlx5 dma QP, rc %d. Retry in %d usec\n", 833 rc, ACCEL_MLX5_RECOVER_POLLER_PERIOD_US); 834 qp->recover_poller = SPDK_POLLER_REGISTER(accel_mlx5_recover_qp_poller, qp, 835 ACCEL_MLX5_RECOVER_POLLER_PERIOD_US); 836 return; 837 } 838 839 qp->recovering = false; 840 } 841 842 static inline void 843 accel_mlx5_process_error_cpl(struct spdk_mlx5_cq_completion *wc, struct accel_mlx5_task *task) 844 { 845 struct accel_mlx5_qp *qp = task->qp; 846 847 if (wc->status != IBV_WC_WR_FLUSH_ERR) { 848 SPDK_WARNLOG("RDMA: qp %p, task %p, WC status %d, core %u\n", 849 qp, task, wc->status, spdk_env_get_current_core()); 850 } else { 851 SPDK_DEBUGLOG(accel_mlx5, 852 "RDMA: qp %p, task %p, WC status %d, core %u\n", 853 qp, task, wc->status, spdk_env_get_current_core()); 854 } 855 856 qp->recovering = true; 857 assert(task->num_completed_reqs <= task->num_submitted_reqs); 858 if (task->num_completed_reqs == task->num_submitted_reqs) { 859 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 860 accel_mlx5_task_fail(task, -EIO); 861 } 862 } 863 864 static inline int64_t 865 accel_mlx5_poll_cq(struct accel_mlx5_dev *dev) 866 { 867 struct spdk_mlx5_cq_completion wc[ACCEL_MLX5_MAX_WC]; 868 struct accel_mlx5_task *task; 869 struct accel_mlx5_qp *qp; 870 int reaped, i, rc; 871 uint16_t completed; 872 873 reaped = spdk_mlx5_cq_poll_completions(dev->cq, wc, ACCEL_MLX5_MAX_WC); 874 if (spdk_unlikely(reaped < 0)) { 875 SPDK_ERRLOG("Error polling CQ! (%d): %s\n", errno, spdk_strerror(errno)); 876 return reaped; 877 } else if (reaped == 0) { 878 return 0; 879 } 880 881 SPDK_DEBUGLOG(accel_mlx5, "Reaped %d cpls on dev %s\n", reaped, 882 dev->dev_ctx->context->device->name); 883 884 for (i = 0; i < reaped; i++) { 885 if (spdk_unlikely(!wc[i].wr_id)) { 886 /* Unsignaled completion with error, ignore */ 887 continue; 888 } 889 task = (struct accel_mlx5_task *)wc[i].wr_id; 890 qp = task->qp; 891 assert(task == STAILQ_FIRST(&qp->in_hw) && "submission mismatch"); 892 assert(task->num_submitted_reqs > task->num_completed_reqs); 893 completed = task->num_submitted_reqs - task->num_completed_reqs; 894 assert((uint32_t)task->num_completed_reqs + completed <= UINT16_MAX); 895 task->num_completed_reqs += completed; 896 assert(qp->wrs_submitted >= task->num_wrs); 897 qp->wrs_submitted -= task->num_wrs; 898 assert(dev->wrs_in_cq > 0); 899 dev->wrs_in_cq--; 900 901 if (wc[i].status) { 902 accel_mlx5_process_error_cpl(&wc[i], task); 903 if (qp->wrs_submitted == 0) { 904 assert(STAILQ_EMPTY(&qp->in_hw)); 905 accel_mlx5_recover_qp(qp); 906 } 907 continue; 908 } 909 910 SPDK_DEBUGLOG(accel_mlx5, "task %p, remaining %u\n", task, 911 task->num_reqs - task->num_completed_reqs); 912 if (task->num_completed_reqs == task->num_reqs) { 913 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 914 accel_mlx5_task_complete(task); 915 } else { 916 assert(task->num_submitted_reqs < task->num_reqs); 917 assert(task->num_completed_reqs == task->num_submitted_reqs); 918 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 919 rc = accel_mlx5_task_continue(task); 920 if (spdk_unlikely(rc)) { 921 if (rc != -ENOMEM) { 922 accel_mlx5_task_fail(task, rc); 923 } 924 } 925 } 926 } 927 928 return reaped; 929 } 930 931 static inline void 932 accel_mlx5_resubmit_nomem_tasks(struct accel_mlx5_dev *dev) 933 { 934 struct accel_mlx5_task *task, *tmp, *last; 935 int rc; 936 937 last = STAILQ_LAST(&dev->nomem, accel_mlx5_task, link); 938 STAILQ_FOREACH_SAFE(task, &dev->nomem, link, tmp) { 939 STAILQ_REMOVE_HEAD(&dev->nomem, link); 940 rc = accel_mlx5_task_continue(task); 941 if (spdk_unlikely(rc)) { 942 if (rc != -ENOMEM) { 943 accel_mlx5_task_fail(task, rc); 944 } 945 break; 946 } 947 /* If qpair is recovering, task is added back to the nomem list and 0 is returned. In that case we 948 * need a special condition to iterate the list once and stop this FOREACH loop */ 949 if (task == last) { 950 break; 951 } 952 } 953 } 954 955 static int 956 accel_mlx5_poller(void *ctx) 957 { 958 struct accel_mlx5_io_channel *ch = ctx; 959 struct accel_mlx5_dev *dev; 960 961 int64_t completions = 0, rc; 962 uint32_t i; 963 964 for (i = 0; i < ch->num_devs; i++) { 965 dev = &ch->devs[i]; 966 if (dev->wrs_in_cq) { 967 rc = accel_mlx5_poll_cq(dev); 968 if (spdk_unlikely(rc < 0)) { 969 SPDK_ERRLOG("Error %"PRId64" on CQ, dev %s\n", rc, dev->dev_ctx->context->device->name); 970 } 971 completions += rc; 972 if (dev->qp.wrs_submitted) { 973 spdk_mlx5_qp_complete_send(dev->qp.qp); 974 } 975 } 976 if (!STAILQ_EMPTY(&dev->nomem)) { 977 accel_mlx5_resubmit_nomem_tasks(dev); 978 } 979 } 980 981 return !!completions; 982 } 983 984 static bool 985 accel_mlx5_supports_opcode(enum spdk_accel_opcode opc) 986 { 987 assert(g_accel_mlx5.enabled); 988 989 switch (opc) { 990 case SPDK_ACCEL_OPC_ENCRYPT: 991 case SPDK_ACCEL_OPC_DECRYPT: 992 return g_accel_mlx5.crypto_supported; 993 default: 994 return false; 995 } 996 } 997 998 static struct spdk_io_channel * 999 accel_mlx5_get_io_channel(void) 1000 { 1001 assert(g_accel_mlx5.enabled); 1002 return spdk_get_io_channel(&g_accel_mlx5); 1003 } 1004 1005 static int 1006 accel_mlx5_create_qp(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp) 1007 { 1008 struct spdk_mlx5_qp_attr mlx5_qp_attr = {}; 1009 int rc; 1010 1011 mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size; 1012 mlx5_qp_attr.cap.max_recv_wr = 0; 1013 mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE; 1014 mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE; 1015 1016 rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp); 1017 if (rc) { 1018 return rc; 1019 } 1020 1021 STAILQ_INIT(&qp->in_hw); 1022 qp->dev = dev; 1023 qp->verbs_qp = spdk_mlx5_qp_get_verbs_qp(qp->qp); 1024 assert(qp->verbs_qp); 1025 qp->wrs_max = g_accel_mlx5.attr.qp_size; 1026 1027 return 0; 1028 } 1029 1030 static void 1031 accel_mlx5_destroy_cb(void *io_device, void *ctx_buf) 1032 { 1033 struct accel_mlx5_io_channel *ch = ctx_buf; 1034 struct accel_mlx5_dev *dev; 1035 uint32_t i; 1036 1037 spdk_poller_unregister(&ch->poller); 1038 for (i = 0; i < ch->num_devs; i++) { 1039 dev = &ch->devs[i]; 1040 spdk_mlx5_qp_destroy(dev->qp.qp); 1041 if (dev->cq) { 1042 spdk_mlx5_cq_destroy(dev->cq); 1043 } 1044 spdk_poller_unregister(&dev->qp.recover_poller); 1045 if (dev->crypto_mkeys) { 1046 spdk_mlx5_mkey_pool_put_ref(dev->crypto_mkeys); 1047 } 1048 spdk_rdma_utils_free_mem_map(&dev->mmap); 1049 } 1050 free(ch->devs); 1051 } 1052 1053 static int 1054 accel_mlx5_create_cb(void *io_device, void *ctx_buf) 1055 { 1056 struct spdk_mlx5_cq_attr cq_attr = {}; 1057 struct accel_mlx5_io_channel *ch = ctx_buf; 1058 struct accel_mlx5_dev_ctx *dev_ctx; 1059 struct accel_mlx5_dev *dev; 1060 uint32_t i; 1061 int rc; 1062 1063 ch->devs = calloc(g_accel_mlx5.num_ctxs, sizeof(*ch->devs)); 1064 if (!ch->devs) { 1065 SPDK_ERRLOG("Memory allocation failed\n"); 1066 return -ENOMEM; 1067 } 1068 1069 for (i = 0; i < g_accel_mlx5.num_ctxs; i++) { 1070 dev_ctx = &g_accel_mlx5.dev_ctxs[i]; 1071 dev = &ch->devs[i]; 1072 dev->dev_ctx = dev_ctx; 1073 1074 if (dev_ctx->crypto_mkeys) { 1075 dev->crypto_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO); 1076 if (!dev->crypto_mkeys) { 1077 SPDK_ERRLOG("Failed to get crypto mkey pool channel, dev %s\n", dev_ctx->context->device->name); 1078 /* Should not happen since mkey pool is created on accel_mlx5 initialization. 1079 * We should not be here if pool creation failed */ 1080 assert(0); 1081 goto err_out; 1082 } 1083 } 1084 1085 memset(&cq_attr, 0, sizeof(cq_attr)); 1086 cq_attr.cqe_cnt = g_accel_mlx5.attr.qp_size; 1087 cq_attr.cqe_size = 64; 1088 cq_attr.cq_context = dev; 1089 1090 ch->num_devs++; 1091 rc = spdk_mlx5_cq_create(dev_ctx->pd, &cq_attr, &dev->cq); 1092 if (rc) { 1093 SPDK_ERRLOG("Failed to create mlx5 CQ, rc %d\n", rc); 1094 goto err_out; 1095 } 1096 1097 rc = accel_mlx5_create_qp(dev, &dev->qp); 1098 if (rc) { 1099 SPDK_ERRLOG("Failed to create mlx5 QP, rc %d\n", rc); 1100 goto err_out; 1101 } 1102 1103 dev->mmap = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL, 1104 IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE); 1105 if (!dev->mmap) { 1106 SPDK_ERRLOG("Failed to create memory map\n"); 1107 rc = -ENOMEM; 1108 goto err_out; 1109 } 1110 dev->crypto_multi_block = dev_ctx->crypto_multi_block; 1111 dev->crypto_split_blocks = dev_ctx->crypto_multi_block ? g_accel_mlx5.attr.crypto_split_blocks : 0; 1112 dev->wrs_in_cq_max = g_accel_mlx5.attr.qp_size; 1113 STAILQ_INIT(&dev->nomem); 1114 } 1115 1116 ch->poller = SPDK_POLLER_REGISTER(accel_mlx5_poller, ch, 0); 1117 1118 return 0; 1119 1120 err_out: 1121 accel_mlx5_destroy_cb(&g_accel_mlx5, ctx_buf); 1122 return rc; 1123 } 1124 1125 void 1126 accel_mlx5_get_default_attr(struct accel_mlx5_attr *attr) 1127 { 1128 assert(attr); 1129 1130 attr->qp_size = ACCEL_MLX5_QP_SIZE; 1131 attr->num_requests = ACCEL_MLX5_NUM_REQUESTS; 1132 attr->allowed_devs = NULL; 1133 attr->crypto_split_blocks = 0; 1134 } 1135 1136 static void 1137 accel_mlx5_allowed_devs_free(void) 1138 { 1139 size_t i; 1140 1141 if (!g_accel_mlx5.allowed_devs) { 1142 return; 1143 } 1144 1145 for (i = 0; i < g_accel_mlx5.allowed_devs_count; i++) { 1146 free(g_accel_mlx5.allowed_devs[i]); 1147 } 1148 free(g_accel_mlx5.attr.allowed_devs); 1149 free(g_accel_mlx5.allowed_devs); 1150 g_accel_mlx5.attr.allowed_devs = NULL; 1151 g_accel_mlx5.allowed_devs = NULL; 1152 g_accel_mlx5.allowed_devs_count = 0; 1153 } 1154 1155 static int 1156 accel_mlx5_allowed_devs_parse(const char *allowed_devs) 1157 { 1158 char *str, *tmp, *tok; 1159 size_t devs_count = 0; 1160 1161 str = strdup(allowed_devs); 1162 if (!str) { 1163 return -ENOMEM; 1164 } 1165 1166 accel_mlx5_allowed_devs_free(); 1167 1168 tmp = str; 1169 while ((tmp = strchr(tmp, ',')) != NULL) { 1170 tmp++; 1171 devs_count++; 1172 } 1173 devs_count++; 1174 1175 g_accel_mlx5.allowed_devs = calloc(devs_count, sizeof(char *)); 1176 if (!g_accel_mlx5.allowed_devs) { 1177 free(str); 1178 return -ENOMEM; 1179 } 1180 1181 devs_count = 0; 1182 tok = strtok(str, ","); 1183 while (tok) { 1184 g_accel_mlx5.allowed_devs[devs_count] = strdup(tok); 1185 if (!g_accel_mlx5.allowed_devs[devs_count]) { 1186 free(str); 1187 accel_mlx5_allowed_devs_free(); 1188 return -ENOMEM; 1189 } 1190 tok = strtok(NULL, ","); 1191 devs_count++; 1192 g_accel_mlx5.allowed_devs_count++; 1193 } 1194 1195 free(str); 1196 1197 return 0; 1198 } 1199 1200 int 1201 accel_mlx5_enable(struct accel_mlx5_attr *attr) 1202 { 1203 int rc; 1204 1205 if (g_accel_mlx5.enabled) { 1206 return -EEXIST; 1207 } 1208 if (attr) { 1209 g_accel_mlx5.attr = *attr; 1210 g_accel_mlx5.attr.allowed_devs = NULL; 1211 1212 if (attr->allowed_devs) { 1213 /* Contains a copy of user's string */ 1214 g_accel_mlx5.attr.allowed_devs = strndup(attr->allowed_devs, ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN); 1215 if (!g_accel_mlx5.attr.allowed_devs) { 1216 return -ENOMEM; 1217 } 1218 rc = accel_mlx5_allowed_devs_parse(g_accel_mlx5.attr.allowed_devs); 1219 if (rc) { 1220 return rc; 1221 } 1222 rc = spdk_mlx5_crypto_devs_allow((const char *const *)g_accel_mlx5.allowed_devs, 1223 g_accel_mlx5.allowed_devs_count); 1224 if (rc) { 1225 accel_mlx5_allowed_devs_free(); 1226 return rc; 1227 } 1228 } 1229 } else { 1230 accel_mlx5_get_default_attr(&g_accel_mlx5.attr); 1231 } 1232 1233 g_accel_mlx5.enabled = true; 1234 spdk_accel_module_list_add(&g_accel_mlx5.module); 1235 1236 return 0; 1237 } 1238 1239 static void 1240 accel_mlx5_free_resources(void) 1241 { 1242 struct accel_mlx5_dev_ctx *dev_ctx; 1243 uint32_t i; 1244 1245 for (i = 0; i < g_accel_mlx5.num_ctxs; i++) { 1246 dev_ctx = &g_accel_mlx5.dev_ctxs[i]; 1247 if (dev_ctx->pd) { 1248 if (dev_ctx->crypto_mkeys) { 1249 spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO, dev_ctx->pd); 1250 } 1251 spdk_rdma_utils_put_pd(dev_ctx->pd); 1252 } 1253 if (dev_ctx->domain) { 1254 spdk_rdma_utils_put_memory_domain(dev_ctx->domain); 1255 } 1256 } 1257 1258 free(g_accel_mlx5.dev_ctxs); 1259 g_accel_mlx5.dev_ctxs = NULL; 1260 g_accel_mlx5.initialized = false; 1261 } 1262 1263 static void 1264 accel_mlx5_deinit_cb(void *ctx) 1265 { 1266 accel_mlx5_free_resources(); 1267 spdk_accel_module_finish(); 1268 } 1269 1270 static void 1271 accel_mlx5_deinit(void *ctx) 1272 { 1273 if (g_accel_mlx5.allowed_devs) { 1274 accel_mlx5_allowed_devs_free(); 1275 } 1276 spdk_mlx5_crypto_devs_allow(NULL, 0); 1277 if (g_accel_mlx5.initialized) { 1278 spdk_io_device_unregister(&g_accel_mlx5, accel_mlx5_deinit_cb); 1279 } else { 1280 spdk_accel_module_finish(); 1281 } 1282 } 1283 1284 static int 1285 accel_mlx5_mkeys_create(struct ibv_pd *pd, uint32_t num_mkeys, uint32_t flags) 1286 { 1287 struct spdk_mlx5_mkey_pool_param pool_param = {}; 1288 1289 pool_param.mkey_count = num_mkeys; 1290 pool_param.cache_per_thread = num_mkeys * 3 / 4 / spdk_env_get_core_count(); 1291 pool_param.flags = flags; 1292 1293 return spdk_mlx5_mkey_pool_init(&pool_param, pd); 1294 } 1295 1296 static int 1297 accel_mlx5_dev_ctx_init(struct accel_mlx5_dev_ctx *dev_ctx, struct ibv_context *dev, 1298 struct spdk_mlx5_device_caps *caps) 1299 { 1300 struct ibv_pd *pd; 1301 int rc; 1302 1303 pd = spdk_rdma_utils_get_pd(dev); 1304 if (!pd) { 1305 SPDK_ERRLOG("Failed to get PD for context %p, dev %s\n", dev, dev->device->name); 1306 return -EINVAL; 1307 } 1308 dev_ctx->context = dev; 1309 dev_ctx->pd = pd; 1310 dev_ctx->domain = spdk_rdma_utils_get_memory_domain(pd); 1311 if (!dev_ctx->domain) { 1312 return -ENOMEM; 1313 } 1314 1315 if (g_accel_mlx5.crypto_supported) { 1316 dev_ctx->crypto_multi_block = caps->crypto.multi_block_be_tweak; 1317 if (!dev_ctx->crypto_multi_block && g_accel_mlx5.attr.crypto_split_blocks) { 1318 SPDK_WARNLOG("\"crypto_split_blocks\" is set but dev %s doesn't support multi block crypto\n", 1319 dev->device->name); 1320 } 1321 rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO); 1322 if (rc) { 1323 SPDK_ERRLOG("Failed to create crypto mkeys pool, rc %d, dev %s\n", rc, dev->device->name); 1324 return rc; 1325 } 1326 dev_ctx->crypto_mkeys = true; 1327 } 1328 1329 return 0; 1330 } 1331 1332 static struct ibv_context ** 1333 accel_mlx5_get_devices(int *_num_devs) 1334 { 1335 struct ibv_context **rdma_devs, **rdma_devs_out = NULL, *dev; 1336 struct ibv_device_attr dev_attr; 1337 size_t j; 1338 int num_devs = 0, i, rc; 1339 int num_devs_out = 0; 1340 bool dev_allowed; 1341 1342 rdma_devs = rdma_get_devices(&num_devs); 1343 if (!rdma_devs || !num_devs) { 1344 *_num_devs = 0; 1345 return NULL; 1346 } 1347 1348 rdma_devs_out = calloc(num_devs + 1, sizeof(struct ibv_context *)); 1349 if (!rdma_devs_out) { 1350 SPDK_ERRLOG("Memory allocation failed\n"); 1351 rdma_free_devices(rdma_devs); 1352 *_num_devs = 0; 1353 return NULL; 1354 } 1355 1356 for (i = 0; i < num_devs; i++) { 1357 dev = rdma_devs[i]; 1358 rc = ibv_query_device(dev, &dev_attr); 1359 if (rc) { 1360 SPDK_ERRLOG("Failed to query dev %s, skipping\n", dev->device->name); 1361 continue; 1362 } 1363 if (dev_attr.vendor_id != SPDK_MLX5_VENDOR_ID_MELLANOX) { 1364 SPDK_DEBUGLOG(accel_mlx5, "dev %s is not Mellanox device, skipping\n", dev->device->name); 1365 continue; 1366 } 1367 1368 if (g_accel_mlx5.allowed_devs_count) { 1369 dev_allowed = false; 1370 for (j = 0; j < g_accel_mlx5.allowed_devs_count; j++) { 1371 if (strcmp(g_accel_mlx5.allowed_devs[j], dev->device->name) == 0) { 1372 dev_allowed = true; 1373 break; 1374 } 1375 } 1376 if (!dev_allowed) { 1377 continue; 1378 } 1379 } 1380 1381 rdma_devs_out[num_devs_out] = dev; 1382 num_devs_out++; 1383 } 1384 1385 rdma_free_devices(rdma_devs); 1386 *_num_devs = num_devs_out; 1387 1388 return rdma_devs_out; 1389 } 1390 1391 static inline bool 1392 accel_mlx5_dev_supports_crypto(struct spdk_mlx5_device_caps *caps) 1393 { 1394 return caps->crypto_supported && !caps->crypto.wrapped_import_method_aes_xts && 1395 (caps->crypto.single_block_le_tweak || 1396 caps->crypto.multi_block_le_tweak || caps->crypto.multi_block_be_tweak); 1397 } 1398 1399 static int 1400 accel_mlx5_init(void) 1401 { 1402 struct spdk_mlx5_device_caps *caps; 1403 struct ibv_context **rdma_devs, *dev; 1404 int num_devs = 0, rc = 0, i; 1405 int best_dev = -1, first_dev = 0; 1406 bool supports_crypto; 1407 bool find_best_dev = g_accel_mlx5.allowed_devs_count == 0; 1408 1409 if (!g_accel_mlx5.enabled) { 1410 return -EINVAL; 1411 } 1412 1413 rdma_devs = accel_mlx5_get_devices(&num_devs); 1414 if (!rdma_devs || !num_devs) { 1415 return -ENODEV; 1416 } 1417 caps = calloc(num_devs, sizeof(*caps)); 1418 if (!caps) { 1419 rc = -ENOMEM; 1420 goto cleanup; 1421 } 1422 1423 g_accel_mlx5.crypto_supported = true; 1424 g_accel_mlx5.num_ctxs = 0; 1425 1426 /* Iterate devices. We support an offload if all devices support it */ 1427 for (i = 0; i < num_devs; i++) { 1428 dev = rdma_devs[i]; 1429 1430 rc = spdk_mlx5_device_query_caps(dev, &caps[i]); 1431 if (rc) { 1432 SPDK_ERRLOG("Failed to get crypto caps, dev %s\n", dev->device->name); 1433 goto cleanup; 1434 } 1435 supports_crypto = accel_mlx5_dev_supports_crypto(&caps[i]); 1436 if (!supports_crypto) { 1437 SPDK_DEBUGLOG(accel_mlx5, "Disable crypto support because dev %s doesn't support it\n", 1438 rdma_devs[i]->device->name); 1439 g_accel_mlx5.crypto_supported = false; 1440 } 1441 if (find_best_dev) { 1442 if (supports_crypto && best_dev == -1) { 1443 best_dev = i; 1444 } 1445 } 1446 } 1447 1448 /* User didn't specify devices to use, try to select the best one */ 1449 if (find_best_dev) { 1450 if (best_dev == -1) { 1451 best_dev = 0; 1452 } 1453 supports_crypto = accel_mlx5_dev_supports_crypto(&caps[best_dev]); 1454 SPDK_NOTICELOG("Select dev %s, crypto %d\n", rdma_devs[best_dev]->device->name, supports_crypto); 1455 g_accel_mlx5.crypto_supported = supports_crypto; 1456 first_dev = best_dev; 1457 num_devs = 1; 1458 if (supports_crypto) { 1459 const char *const dev_name[] = { rdma_devs[best_dev]->device->name }; 1460 /* Let mlx5 library know which device to use */ 1461 spdk_mlx5_crypto_devs_allow(dev_name, 1); 1462 } 1463 } else { 1464 SPDK_NOTICELOG("Found %d devices, crypto %d\n", num_devs, g_accel_mlx5.crypto_supported); 1465 } 1466 1467 if (!g_accel_mlx5.crypto_supported) { 1468 /* Now accel_mlx5 supports only crypto, exit if no devs supports crypto offload */ 1469 rc = -ENODEV; 1470 goto cleanup; 1471 } 1472 1473 g_accel_mlx5.dev_ctxs = calloc(num_devs, sizeof(*g_accel_mlx5.dev_ctxs)); 1474 if (!g_accel_mlx5.dev_ctxs) { 1475 SPDK_ERRLOG("Memory allocation failed\n"); 1476 rc = -ENOMEM; 1477 goto cleanup; 1478 } 1479 1480 for (i = first_dev; i < first_dev + num_devs; i++) { 1481 rc = accel_mlx5_dev_ctx_init(&g_accel_mlx5.dev_ctxs[g_accel_mlx5.num_ctxs++], 1482 rdma_devs[i], &caps[i]); 1483 if (rc) { 1484 goto cleanup; 1485 } 1486 } 1487 1488 SPDK_NOTICELOG("Accel framework mlx5 initialized, found %d devices.\n", num_devs); 1489 spdk_io_device_register(&g_accel_mlx5, accel_mlx5_create_cb, accel_mlx5_destroy_cb, 1490 sizeof(struct accel_mlx5_io_channel), "accel_mlx5"); 1491 g_accel_mlx5.initialized = true; 1492 free(rdma_devs); 1493 free(caps); 1494 1495 return 0; 1496 1497 cleanup: 1498 free(rdma_devs); 1499 free(caps); 1500 accel_mlx5_free_resources(); 1501 1502 return rc; 1503 } 1504 1505 static void 1506 accel_mlx5_write_config_json(struct spdk_json_write_ctx *w) 1507 { 1508 if (g_accel_mlx5.enabled) { 1509 spdk_json_write_object_begin(w); 1510 spdk_json_write_named_string(w, "method", "mlx5_scan_accel_module"); 1511 spdk_json_write_named_object_begin(w, "params"); 1512 spdk_json_write_named_uint16(w, "qp_size", g_accel_mlx5.attr.qp_size); 1513 spdk_json_write_named_uint32(w, "num_requests", g_accel_mlx5.attr.num_requests); 1514 if (g_accel_mlx5.attr.allowed_devs) { 1515 spdk_json_write_named_string(w, "allowed_devs", g_accel_mlx5.attr.allowed_devs); 1516 } 1517 spdk_json_write_named_uint16(w, "crypto_split_blocks", g_accel_mlx5.attr.crypto_split_blocks); 1518 spdk_json_write_object_end(w); 1519 spdk_json_write_object_end(w); 1520 } 1521 } 1522 1523 static size_t 1524 accel_mlx5_get_ctx_size(void) 1525 { 1526 return sizeof(struct accel_mlx5_task); 1527 } 1528 1529 static int 1530 accel_mlx5_crypto_key_init(struct spdk_accel_crypto_key *key) 1531 { 1532 struct spdk_mlx5_crypto_dek_create_attr attr = {}; 1533 struct spdk_mlx5_crypto_keytag *keytag; 1534 int rc; 1535 1536 if (!key || !key->key || !key->key2 || !key->key_size || !key->key2_size) { 1537 return -EINVAL; 1538 } 1539 1540 attr.dek = calloc(1, key->key_size + key->key2_size); 1541 if (!attr.dek) { 1542 return -ENOMEM; 1543 } 1544 1545 memcpy(attr.dek, key->key, key->key_size); 1546 memcpy(attr.dek + key->key_size, key->key2, key->key2_size); 1547 attr.dek_len = key->key_size + key->key2_size; 1548 1549 rc = spdk_mlx5_crypto_keytag_create(&attr, &keytag); 1550 spdk_memset_s(attr.dek, attr.dek_len, 0, attr.dek_len); 1551 free(attr.dek); 1552 if (rc) { 1553 SPDK_ERRLOG("Failed to create a keytag, rc %d\n", rc); 1554 return rc; 1555 } 1556 1557 key->priv = keytag; 1558 1559 return 0; 1560 } 1561 1562 static void 1563 accel_mlx5_crypto_key_deinit(struct spdk_accel_crypto_key *key) 1564 { 1565 if (!key || key->module_if != &g_accel_mlx5.module || !key->priv) { 1566 return; 1567 } 1568 1569 spdk_mlx5_crypto_keytag_destroy(key->priv); 1570 } 1571 1572 static bool 1573 accel_mlx5_crypto_supports_cipher(enum spdk_accel_cipher cipher, size_t key_size) 1574 { 1575 switch (cipher) { 1576 case SPDK_ACCEL_CIPHER_AES_XTS: 1577 return key_size == SPDK_ACCEL_AES_XTS_128_KEY_SIZE || key_size == SPDK_ACCEL_AES_XTS_256_KEY_SIZE; 1578 default: 1579 return false; 1580 } 1581 } 1582 1583 static int 1584 accel_mlx5_get_memory_domains(struct spdk_memory_domain **domains, int array_size) 1585 { 1586 int i, size; 1587 1588 if (!domains || !array_size) { 1589 return (int)g_accel_mlx5.num_ctxs; 1590 } 1591 1592 size = spdk_min(array_size, (int)g_accel_mlx5.num_ctxs); 1593 1594 for (i = 0; i < size; i++) { 1595 domains[i] = g_accel_mlx5.dev_ctxs[i].domain; 1596 } 1597 1598 return (int)g_accel_mlx5.num_ctxs; 1599 } 1600 1601 static struct accel_mlx5_module g_accel_mlx5 = { 1602 .module = { 1603 .module_init = accel_mlx5_init, 1604 .module_fini = accel_mlx5_deinit, 1605 .write_config_json = accel_mlx5_write_config_json, 1606 .get_ctx_size = accel_mlx5_get_ctx_size, 1607 .name = "mlx5", 1608 .supports_opcode = accel_mlx5_supports_opcode, 1609 .get_io_channel = accel_mlx5_get_io_channel, 1610 .submit_tasks = accel_mlx5_submit_tasks, 1611 .crypto_key_init = accel_mlx5_crypto_key_init, 1612 .crypto_key_deinit = accel_mlx5_crypto_key_deinit, 1613 .crypto_supports_cipher = accel_mlx5_crypto_supports_cipher, 1614 .get_memory_domains = accel_mlx5_get_memory_domains, 1615 } 1616 }; 1617 1618 SPDK_LOG_REGISTER_COMPONENT(accel_mlx5) 1619