1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 */ 4 5 #include "spdk/env.h" 6 #include "spdk/thread.h" 7 #include "spdk/queue.h" 8 #include "spdk/log.h" 9 #include "spdk/string.h" 10 #include "spdk/likely.h" 11 #include "spdk/dma.h" 12 #include "spdk/json.h" 13 #include "spdk/util.h" 14 15 #include "spdk_internal/mlx5.h" 16 #include "spdk_internal/rdma_utils.h" 17 #include "spdk/accel_module.h" 18 #include "spdk_internal/assert.h" 19 #include "spdk_internal/sgl.h" 20 #include "accel_mlx5.h" 21 22 #include <infiniband/mlx5dv.h> 23 #include <rdma/rdma_cma.h> 24 25 #define ACCEL_MLX5_QP_SIZE (256u) 26 #define ACCEL_MLX5_NUM_REQUESTS (2048u - 1) 27 #define ACCEL_MLX5_RECOVER_POLLER_PERIOD_US (10000) 28 #define ACCEL_MLX5_MAX_SGE (16u) 29 #define ACCEL_MLX5_MAX_WC (64u) 30 #define ACCEL_MLX5_MAX_MKEYS_IN_TASK (16u) 31 32 /* Assume we have up to 16 devices */ 33 #define ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN ((SPDK_MLX5_DEV_MAX_NAME_LEN + 1) * 16) 34 35 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, task) \ 36 do { \ 37 assert((qp)->wrs_submitted < (qp)->wrs_max); \ 38 (qp)->wrs_submitted++; \ 39 assert((task)->num_wrs < UINT16_MAX); \ 40 (task)->num_wrs++; \ 41 } while (0) 42 43 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, task) \ 44 do { \ 45 assert((dev)->wrs_in_cq < (dev)->wrs_in_cq_max); \ 46 (dev)->wrs_in_cq++; \ 47 assert((qp)->wrs_submitted < (qp)->wrs_max); \ 48 (qp)->wrs_submitted++; \ 49 assert((task)->num_wrs < UINT16_MAX); \ 50 (task)->num_wrs++; \ 51 } while (0) 52 53 struct accel_mlx5_io_channel; 54 struct accel_mlx5_task; 55 56 struct accel_mlx5_dev_ctx { 57 struct ibv_context *context; 58 struct ibv_pd *pd; 59 struct spdk_memory_domain *domain; 60 TAILQ_ENTRY(accel_mlx5_dev_ctx) link; 61 bool crypto_mkeys; 62 bool crypto_multi_block; 63 }; 64 65 struct accel_mlx5_module { 66 struct spdk_accel_module_if module; 67 struct accel_mlx5_dev_ctx *dev_ctxs; 68 uint32_t num_ctxs; 69 struct accel_mlx5_attr attr; 70 char **allowed_devs; 71 size_t allowed_devs_count; 72 bool initialized; 73 bool enabled; 74 bool crypto_supported; 75 }; 76 77 struct accel_mlx5_sge { 78 uint32_t src_sge_count; 79 uint32_t dst_sge_count; 80 struct ibv_sge src_sge[ACCEL_MLX5_MAX_SGE]; 81 struct ibv_sge dst_sge[ACCEL_MLX5_MAX_SGE]; 82 }; 83 84 struct accel_mlx5_iov_sgl { 85 struct iovec *iov; 86 uint32_t iovcnt; 87 uint32_t iov_offset; 88 }; 89 90 enum accel_mlx5_opcode { 91 ACCEL_MLX5_OPC_COPY, 92 ACCEL_MLX5_OPC_CRYPTO, 93 ACCEL_MLX5_OPC_LAST 94 }; 95 96 struct accel_mlx5_task { 97 struct spdk_accel_task base; 98 struct accel_mlx5_iov_sgl src; 99 struct accel_mlx5_iov_sgl dst; 100 struct accel_mlx5_qp *qp; 101 STAILQ_ENTRY(accel_mlx5_task) link; 102 uint16_t num_reqs; 103 uint16_t num_completed_reqs; 104 uint16_t num_submitted_reqs; 105 uint16_t num_ops; /* number of allocated mkeys or number of operations */ 106 uint16_t blocks_per_req; 107 uint16_t num_processed_blocks; 108 uint16_t num_blocks; 109 uint16_t num_wrs; /* Number of outstanding operations which consume qp slot */ 110 union { 111 uint8_t raw; 112 struct { 113 uint8_t inplace : 1; 114 uint8_t enc_order : 2; 115 uint8_t mlx5_opcode: 5; 116 }; 117 }; 118 /* Keep this array last since not all elements might be accessed, this reduces amount of data to be 119 * cached */ 120 struct spdk_mlx5_mkey_pool_obj *mkeys[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 121 }; 122 123 struct accel_mlx5_qp { 124 struct spdk_mlx5_qp *qp; 125 struct ibv_qp *verbs_qp; 126 struct accel_mlx5_dev *dev; 127 struct accel_mlx5_io_channel *ch; 128 /* tasks submitted to HW. We can't complete a task even in error case until we reap completions for all 129 * submitted requests */ 130 STAILQ_HEAD(, accel_mlx5_task) in_hw; 131 uint16_t wrs_submitted; 132 uint16_t wrs_max; 133 bool recovering; 134 struct spdk_poller *recover_poller; 135 }; 136 137 struct accel_mlx5_dev { 138 struct accel_mlx5_qp qp; 139 struct spdk_mlx5_cq *cq; 140 struct spdk_mlx5_mkey_pool *crypto_mkeys; 141 struct spdk_rdma_utils_mem_map *mmap; 142 struct accel_mlx5_dev_ctx *dev_ctx; 143 uint16_t wrs_in_cq; 144 uint16_t wrs_in_cq_max; 145 uint16_t crypto_split_blocks; 146 bool crypto_multi_block; 147 /* Pending tasks waiting for requests resources */ 148 STAILQ_HEAD(, accel_mlx5_task) nomem; 149 TAILQ_ENTRY(accel_mlx5_dev) link; 150 }; 151 152 struct accel_mlx5_io_channel { 153 struct accel_mlx5_dev *devs; 154 struct spdk_poller *poller; 155 uint32_t num_devs; 156 /* Index in \b devs to be used for operations in round-robin way */ 157 uint32_t dev_idx; 158 }; 159 160 struct accel_mlx5_task_operations { 161 int (*init)(struct accel_mlx5_task *task); 162 int (*process)(struct accel_mlx5_task *task); 163 int (*cont)(struct accel_mlx5_task *task); 164 void (*complete)(struct accel_mlx5_task *task); 165 }; 166 167 static struct accel_mlx5_module g_accel_mlx5; 168 169 static inline void 170 accel_mlx5_iov_sgl_init(struct accel_mlx5_iov_sgl *s, struct iovec *iov, uint32_t iovcnt) 171 { 172 s->iov = iov; 173 s->iovcnt = iovcnt; 174 s->iov_offset = 0; 175 } 176 177 static inline void 178 accel_mlx5_iov_sgl_advance(struct accel_mlx5_iov_sgl *s, uint32_t step) 179 { 180 s->iov_offset += step; 181 while (s->iovcnt > 0) { 182 assert(s->iov != NULL); 183 if (s->iov_offset < s->iov->iov_len) { 184 break; 185 } 186 187 s->iov_offset -= s->iov->iov_len; 188 s->iov++; 189 s->iovcnt--; 190 } 191 } 192 193 static inline void 194 accel_mlx5_iov_sgl_unwind(struct accel_mlx5_iov_sgl *s, uint32_t max_iovs, uint32_t step) 195 { 196 SPDK_DEBUGLOG(accel_mlx5, "iov %p, iovcnt %u, max %u, offset %u, step %u\n", s->iov, s->iovcnt, 197 max_iovs, s->iov_offset, step); 198 while (s->iovcnt <= max_iovs) { 199 assert(s->iov != NULL); 200 if (s->iov_offset >= step) { 201 s->iov_offset -= step; 202 SPDK_DEBUGLOG(accel_mlx5, "\tEND, iov %p, iovcnt %u, offset %u\n", s->iov, s->iovcnt, 203 s->iov_offset); 204 return; 205 } 206 step -= s->iov_offset; 207 s->iov--; 208 s->iovcnt++; 209 s->iov_offset = s->iov->iov_len; 210 SPDK_DEBUGLOG(accel_mlx5, "\tiov %p, iovcnt %u, offset %u, step %u\n", s->iov, s->iovcnt, 211 s->iov_offset, step); 212 } 213 214 SPDK_ERRLOG("Can't unwind iovs, remaining %u\n", step); 215 assert(0); 216 } 217 218 static inline int 219 accel_mlx5_sge_unwind(struct ibv_sge *sge, uint32_t sge_count, uint32_t step) 220 { 221 int i; 222 223 assert(sge_count > 0); 224 SPDK_DEBUGLOG(accel_mlx5, "sge %p, count %u, step %u\n", sge, sge_count, step); 225 for (i = (int)sge_count - 1; i >= 0; i--) { 226 if (sge[i].length > step) { 227 sge[i].length -= step; 228 SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step); 229 return (int)i + 1; 230 } 231 SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step); 232 step -= sge[i].length; 233 } 234 235 SPDK_ERRLOG("Can't unwind sge, remaining %u\n", step); 236 assert(step == 0); 237 238 return 0; 239 } 240 241 static inline void 242 accel_mlx5_crypto_task_complete(struct accel_mlx5_task *task) 243 { 244 struct accel_mlx5_dev *dev = task->qp->dev; 245 246 assert(task->num_ops); 247 spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops); 248 spdk_accel_task_complete(&task->base, 0); 249 } 250 251 static inline void 252 accel_mlx5_task_fail(struct accel_mlx5_task *task, int rc) 253 { 254 struct accel_mlx5_dev *dev = task->qp->dev; 255 256 assert(task->num_reqs == task->num_completed_reqs); 257 SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->base.op_code, rc); 258 259 if (task->num_ops) { 260 if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO) { 261 spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops); 262 } 263 } 264 spdk_accel_task_complete(&task->base, rc); 265 } 266 267 static int 268 accel_mlx5_translate_addr(void *addr, size_t size, struct spdk_memory_domain *domain, 269 void *domain_ctx, struct accel_mlx5_dev *dev, struct ibv_sge *sge) 270 { 271 struct spdk_rdma_utils_memory_translation map_translation; 272 struct spdk_memory_domain_translation_result domain_translation; 273 struct spdk_memory_domain_translation_ctx local_ctx; 274 int rc; 275 276 if (domain) { 277 domain_translation.size = sizeof(struct spdk_memory_domain_translation_result); 278 local_ctx.size = sizeof(local_ctx); 279 local_ctx.rdma.ibv_qp = dev->qp.verbs_qp; 280 rc = spdk_memory_domain_translate_data(domain, domain_ctx, dev->dev_ctx->domain, 281 &local_ctx, addr, size, &domain_translation); 282 if (spdk_unlikely(rc || domain_translation.iov_count != 1)) { 283 SPDK_ERRLOG("Memory domain translation failed, addr %p, length %zu, iovcnt %u\n", addr, size, 284 domain_translation.iov_count); 285 if (rc == 0) { 286 rc = -EINVAL; 287 } 288 289 return rc; 290 } 291 sge->lkey = domain_translation.rdma.lkey; 292 sge->addr = (uint64_t) domain_translation.iov.iov_base; 293 sge->length = domain_translation.iov.iov_len; 294 } else { 295 rc = spdk_rdma_utils_get_translation(dev->mmap, addr, size, 296 &map_translation); 297 if (spdk_unlikely(rc)) { 298 SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", addr, size); 299 return rc; 300 } 301 sge->lkey = spdk_rdma_utils_memory_translation_get_lkey(&map_translation); 302 sge->addr = (uint64_t)addr; 303 sge->length = size; 304 } 305 306 return 0; 307 } 308 309 static inline int 310 accel_mlx5_fill_block_sge(struct accel_mlx5_dev *dev, struct ibv_sge *sge, 311 struct accel_mlx5_iov_sgl *iovs, uint32_t len, uint32_t *_remaining, 312 struct spdk_memory_domain *domain, void *domain_ctx) 313 { 314 void *addr; 315 uint32_t remaining = len; 316 uint32_t size; 317 int i = 0; 318 int rc; 319 320 while (remaining && i < (int)ACCEL_MLX5_MAX_SGE) { 321 size = spdk_min(remaining, iovs->iov->iov_len - iovs->iov_offset); 322 addr = (void *)iovs->iov->iov_base + iovs->iov_offset; 323 rc = accel_mlx5_translate_addr(addr, size, domain, domain_ctx, dev, &sge[i]); 324 if (spdk_unlikely(rc)) { 325 return rc; 326 } 327 SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d]: lkey %u, len %u, addr %"PRIx64"\n", i, sge[i].lkey, 328 sge[i].length, sge[i].addr); 329 accel_mlx5_iov_sgl_advance(iovs, size); 330 i++; 331 assert(remaining >= size); 332 remaining -= size; 333 } 334 *_remaining = remaining; 335 336 return i; 337 } 338 339 static inline bool 340 accel_mlx5_compare_iovs(struct iovec *v1, struct iovec *v2, uint32_t iovcnt) 341 { 342 return memcmp(v1, v2, sizeof(*v1) * iovcnt) == 0; 343 } 344 345 static inline uint16_t 346 accel_mlx5_dev_get_available_slots(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp) 347 { 348 assert(qp->wrs_max >= qp->wrs_submitted); 349 assert(dev->wrs_in_cq_max >= dev->wrs_in_cq); 350 351 /* Each time we produce only 1 CQE, so we need 1 CQ slot */ 352 if (spdk_unlikely(dev->wrs_in_cq == dev->wrs_in_cq_max)) { 353 return 0; 354 } 355 356 return qp->wrs_max - qp->wrs_submitted; 357 } 358 359 static inline uint32_t 360 accel_mlx5_task_alloc_mkeys(struct accel_mlx5_task *task) 361 { 362 struct accel_mlx5_dev *dev = task->qp->dev; 363 uint32_t num_ops; 364 int rc; 365 366 assert(task->num_reqs > task->num_completed_reqs); 367 num_ops = task->num_reqs - task->num_completed_reqs; 368 num_ops = spdk_min(num_ops, ACCEL_MLX5_MAX_MKEYS_IN_TASK); 369 if (!num_ops) { 370 return 0; 371 } 372 rc = spdk_mlx5_mkey_pool_get_bulk(dev->crypto_mkeys, task->mkeys, num_ops); 373 if (spdk_unlikely(rc)) { 374 return 0; 375 } 376 assert(num_ops <= UINT16_MAX); 377 task->num_ops = num_ops; 378 379 return num_ops; 380 } 381 382 static inline uint8_t 383 bs_to_bs_selector(uint32_t bs) 384 { 385 switch (bs) { 386 case 512: 387 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_512; 388 case 520: 389 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_520; 390 case 4096: 391 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4096; 392 case 4160: 393 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4160; 394 default: 395 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED; 396 } 397 } 398 399 static inline int 400 accel_mlx5_configure_crypto_umr(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge, 401 uint32_t mkey, uint32_t num_blocks, struct spdk_mlx5_crypto_dek_data *dek_data) 402 { 403 struct spdk_mlx5_umr_crypto_attr cattr; 404 struct spdk_mlx5_umr_attr umr_attr; 405 struct accel_mlx5_qp *qp = mlx5_task->qp; 406 struct accel_mlx5_dev *dev = qp->dev; 407 struct spdk_accel_task *task = &mlx5_task->base; 408 uint32_t length, remaining = 0, block_size = task->block_size; 409 int rc; 410 411 length = num_blocks * block_size; 412 SPDK_DEBUGLOG(accel_mlx5, "task %p, domain %p, len %u, blocks %u\n", task, task->src_domain, length, 413 num_blocks); 414 rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src, length, &remaining, 415 task->src_domain, task->src_domain_ctx); 416 if (spdk_unlikely(rc <= 0)) { 417 if (rc == 0) { 418 rc = -EINVAL; 419 } 420 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 421 return rc; 422 } 423 sge->src_sge_count = rc; 424 if (spdk_unlikely(remaining)) { 425 uint32_t new_len = length - remaining; 426 uint32_t aligned_len, updated_num_blocks; 427 428 SPDK_DEBUGLOG(accel_mlx5, "Incorrect src iovs, handled %u out of %u bytes\n", new_len, length); 429 if (new_len < block_size) { 430 /* We need to process at least 1 block. If buffer is too fragmented, we can't do 431 * anything */ 432 return -ERANGE; 433 } 434 435 /* Regular integer division, we need to round down to prev block size */ 436 updated_num_blocks = new_len / block_size; 437 assert(updated_num_blocks); 438 assert(updated_num_blocks < num_blocks); 439 aligned_len = updated_num_blocks * block_size; 440 441 if (aligned_len < new_len) { 442 uint32_t dt = new_len - aligned_len; 443 444 /* We can't process part of block, need to unwind src iov_sgl and sge to the 445 * prev block boundary */ 446 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt); 447 accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt); 448 sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt); 449 if (!sge->src_sge_count) { 450 return -ERANGE; 451 } 452 } 453 SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len); 454 length = aligned_len; 455 num_blocks = updated_num_blocks; 456 } 457 458 cattr.xts_iv = task->iv + mlx5_task->num_processed_blocks; 459 cattr.keytag = 0; 460 cattr.dek_obj_id = dek_data->dek_obj_id; 461 cattr.tweak_mode = dek_data->tweak_mode; 462 cattr.enc_order = mlx5_task->enc_order; 463 cattr.bs_selector = bs_to_bs_selector(mlx5_task->base.block_size); 464 if (spdk_unlikely(cattr.bs_selector == SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED)) { 465 SPDK_ERRLOG("unsupported block size %u\n", mlx5_task->base.block_size); 466 return -EINVAL; 467 } 468 umr_attr.mkey = mkey; 469 umr_attr.sge = sge->src_sge; 470 471 if (!mlx5_task->inplace) { 472 SPDK_DEBUGLOG(accel_mlx5, "task %p, dst sge, domain %p, len %u\n", task, task->dst_domain, length); 473 rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, length, &remaining, 474 task->dst_domain, task->dst_domain_ctx); 475 if (spdk_unlikely(rc <= 0)) { 476 if (rc == 0) { 477 rc = -EINVAL; 478 } 479 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 480 return rc; 481 } 482 sge->dst_sge_count = rc; 483 if (spdk_unlikely(remaining)) { 484 uint32_t new_len = length - remaining; 485 uint32_t aligned_len, updated_num_blocks, dt; 486 487 SPDK_DEBUGLOG(accel_mlx5, "Incorrect dst iovs, handled %u out of %u bytes\n", new_len, length); 488 if (new_len < block_size) { 489 /* We need to process at least 1 block. If buffer is too fragmented, we can't do 490 * anything */ 491 return -ERANGE; 492 } 493 494 /* Regular integer division, we need to round down to prev block size */ 495 updated_num_blocks = new_len / block_size; 496 assert(updated_num_blocks); 497 assert(updated_num_blocks < num_blocks); 498 aligned_len = updated_num_blocks * block_size; 499 500 if (aligned_len < new_len) { 501 dt = new_len - aligned_len; 502 assert(dt > 0 && dt < length); 503 /* We can't process part of block, need to unwind src and dst iov_sgl and sge to the 504 * prev block boundary */ 505 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind dst sge for %u bytes\n", task, dt); 506 accel_mlx5_iov_sgl_unwind(&mlx5_task->dst, task->d.iovcnt, dt); 507 sge->dst_sge_count = accel_mlx5_sge_unwind(sge->dst_sge, sge->dst_sge_count, dt); 508 assert(sge->dst_sge_count > 0 && sge->dst_sge_count <= ACCEL_MLX5_MAX_SGE); 509 if (!sge->dst_sge_count) { 510 return -ERANGE; 511 } 512 } 513 assert(length > aligned_len); 514 dt = length - aligned_len; 515 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt); 516 /* The same for src iov_sgl and sge. In worst case we can unwind SRC 2 times */ 517 accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt); 518 sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt); 519 assert(sge->src_sge_count > 0 && sge->src_sge_count <= ACCEL_MLX5_MAX_SGE); 520 if (!sge->src_sge_count) { 521 return -ERANGE; 522 } 523 SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len); 524 length = aligned_len; 525 num_blocks = updated_num_blocks; 526 } 527 } 528 529 SPDK_DEBUGLOG(accel_mlx5, 530 "task %p: bs %u, iv %"PRIu64", enc_on_tx %d, tweak_mode %d, len %u, mkey %x, blocks %u\n", 531 mlx5_task, task->block_size, cattr.xts_iv, mlx5_task->enc_order, cattr.tweak_mode, length, mkey, 532 num_blocks); 533 534 umr_attr.sge_count = sge->src_sge_count; 535 umr_attr.umr_len = length; 536 assert((uint32_t)mlx5_task->num_processed_blocks + num_blocks <= UINT16_MAX); 537 mlx5_task->num_processed_blocks += num_blocks; 538 539 rc = spdk_mlx5_umr_configure_crypto(qp->qp, &umr_attr, &cattr, 0, 0); 540 541 return rc; 542 } 543 544 static inline int 545 accel_mlx5_crypto_task_process(struct accel_mlx5_task *mlx5_task) 546 { 547 struct accel_mlx5_sge sges[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 548 struct spdk_mlx5_crypto_dek_data dek_data; 549 struct accel_mlx5_qp *qp = mlx5_task->qp; 550 struct accel_mlx5_dev *dev = qp->dev; 551 /* First RDMA after UMR must have a SMALL_FENCE */ 552 uint32_t first_rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE; 553 uint16_t num_blocks; 554 uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs, 555 mlx5_task->num_ops); 556 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 557 uint16_t i; 558 int rc; 559 560 assert(qp_slot > 1); 561 num_ops = spdk_min(num_ops, qp_slot >> 1); 562 if (spdk_unlikely(!num_ops)) { 563 return -EINVAL; 564 } 565 566 rc = spdk_mlx5_crypto_get_dek_data(mlx5_task->base.crypto_key->priv, dev->dev_ctx->pd, &dek_data); 567 if (spdk_unlikely(rc)) { 568 return rc; 569 } 570 571 mlx5_task->num_wrs = 0; 572 SPDK_DEBUGLOG(accel_mlx5, "begin, task, %p, reqs: total %u, submitted %u, completed %u\n", 573 mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 574 for (i = 0; i < num_ops; i++) { 575 if (mlx5_task->num_submitted_reqs + i + 1 == mlx5_task->num_reqs) { 576 /* Last request may consume less than calculated if crypto_multi_block is true */ 577 assert(mlx5_task->num_blocks > mlx5_task->num_submitted_reqs); 578 num_blocks = mlx5_task->num_blocks - mlx5_task->num_processed_blocks; 579 } else { 580 num_blocks = mlx5_task->blocks_per_req; 581 } 582 583 rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sges[i], mlx5_task->mkeys[i]->mkey, num_blocks, 584 &dek_data); 585 if (spdk_unlikely(rc)) { 586 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 587 return rc; 588 } 589 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 590 } 591 592 /* Loop `num_ops - 1` for easy flags handling */ 593 for (i = 0; i < num_ops - 1; i++) { 594 /* UMR is used as a destination for RDMA_READ - from UMR to sge */ 595 if (mlx5_task->inplace) { 596 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0, 597 mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence); 598 } else { 599 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0, 600 mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence); 601 } 602 if (spdk_unlikely(rc)) { 603 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 604 return rc; 605 } 606 607 first_rdma_fence = 0; 608 assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs); 609 assert(mlx5_task->num_submitted_reqs < UINT16_MAX); 610 mlx5_task->num_submitted_reqs++; 611 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 612 } 613 614 if (mlx5_task->inplace) { 615 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0, 616 mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 617 } else { 618 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0, 619 mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 620 } 621 if (spdk_unlikely(rc)) { 622 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 623 return rc; 624 } 625 626 assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs); 627 assert(mlx5_task->num_submitted_reqs < UINT16_MAX); 628 mlx5_task->num_submitted_reqs++; 629 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 630 STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link); 631 632 if (spdk_unlikely(mlx5_task->num_submitted_reqs == mlx5_task->num_reqs && 633 mlx5_task->num_blocks > mlx5_task->num_processed_blocks)) { 634 /* We hit "out of sge 635 * entries" case with highly fragmented payload. In that case 636 * accel_mlx5_configure_crypto_umr function handled fewer data blocks than expected 637 * That means we need at least 1 more request to complete this task, this request will be 638 * executed once all submitted ones are completed */ 639 SPDK_DEBUGLOG(accel_mlx5, "task %p, processed %u/%u blocks, add extra req\n", mlx5_task, 640 mlx5_task->num_processed_blocks, mlx5_task->num_blocks); 641 mlx5_task->num_reqs++; 642 } 643 644 SPDK_DEBUGLOG(accel_mlx5, "end, task, %p, reqs: total %u, submitted %u, completed %u\n", mlx5_task, 645 mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 646 647 return 0; 648 } 649 650 static inline int 651 accel_mlx5_crypto_task_continue(struct accel_mlx5_task *task) 652 { 653 struct accel_mlx5_qp *qp = task->qp; 654 struct accel_mlx5_dev *dev = qp->dev; 655 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 656 657 assert(task->num_reqs > task->num_completed_reqs); 658 if (task->num_ops == 0) { 659 /* No mkeys allocated, try to allocate now */ 660 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task))) { 661 /* Pool is empty, queue this task */ 662 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 663 return -ENOMEM; 664 } 665 } 666 /* We need to post at least 1 UMR and 1 RDMA operation */ 667 if (spdk_unlikely(qp_slot < 2)) { 668 /* QP is full, queue this task */ 669 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 670 return -ENOMEM; 671 } 672 673 return accel_mlx5_crypto_task_process(task); 674 } 675 676 static inline int 677 accel_mlx5_crypto_task_init(struct accel_mlx5_task *mlx5_task) 678 { 679 struct spdk_accel_task *task = &mlx5_task->base; 680 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 681 uint64_t src_nbytes = task->nbytes; 682 #ifdef DEBUG 683 uint64_t dst_nbytes; 684 uint32_t i; 685 #endif 686 bool crypto_key_ok; 687 688 crypto_key_ok = (task->crypto_key && task->crypto_key->module_if == &g_accel_mlx5.module && 689 task->crypto_key->priv); 690 if (spdk_unlikely((task->nbytes % mlx5_task->base.block_size != 0) || !crypto_key_ok)) { 691 if (crypto_key_ok) { 692 SPDK_ERRLOG("src length %"PRIu64" is not a multiple of the block size %u\n", task->nbytes, 693 mlx5_task->base.block_size); 694 } else { 695 SPDK_ERRLOG("Wrong crypto key provided\n"); 696 } 697 return -EINVAL; 698 } 699 700 assert(src_nbytes / mlx5_task->base.block_size <= UINT16_MAX); 701 mlx5_task->num_blocks = src_nbytes / mlx5_task->base.block_size; 702 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 703 if (task->d.iovcnt == 0 || (task->d.iovcnt == task->s.iovcnt && 704 accel_mlx5_compare_iovs(task->d.iovs, task->s.iovs, task->s.iovcnt))) { 705 mlx5_task->inplace = 1; 706 } else { 707 #ifdef DEBUG 708 dst_nbytes = 0; 709 for (i = 0; i < task->d.iovcnt; i++) { 710 dst_nbytes += task->d.iovs[i].iov_len; 711 } 712 713 if (spdk_unlikely(src_nbytes != dst_nbytes)) { 714 return -EINVAL; 715 } 716 #endif 717 mlx5_task->inplace = 0; 718 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 719 } 720 721 if (dev->crypto_multi_block) { 722 if (dev->crypto_split_blocks) { 723 assert(SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks) <= UINT16_MAX); 724 mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks); 725 /* Last req may consume less blocks */ 726 mlx5_task->blocks_per_req = spdk_min(mlx5_task->num_blocks, dev->crypto_split_blocks); 727 } else { 728 if (task->s.iovcnt > ACCEL_MLX5_MAX_SGE || task->d.iovcnt > ACCEL_MLX5_MAX_SGE) { 729 uint32_t max_sge_count = spdk_max(task->s.iovcnt, task->d.iovcnt); 730 731 assert(SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE) <= UINT16_MAX); 732 mlx5_task->num_reqs = SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE); 733 mlx5_task->blocks_per_req = SPDK_CEIL_DIV(mlx5_task->num_blocks, mlx5_task->num_reqs); 734 } else { 735 mlx5_task->num_reqs = 1; 736 mlx5_task->blocks_per_req = mlx5_task->num_blocks; 737 } 738 } 739 } else { 740 mlx5_task->num_reqs = mlx5_task->num_blocks; 741 mlx5_task->blocks_per_req = 1; 742 } 743 744 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(mlx5_task))) { 745 /* Pool is empty, queue this task */ 746 SPDK_DEBUGLOG(accel_mlx5, "no reqs in pool, dev %s\n", dev->dev_ctx->context->device->name); 747 return -ENOMEM; 748 } 749 if (spdk_unlikely(accel_mlx5_dev_get_available_slots(dev, &dev->qp) < 2)) { 750 /* Queue is full, queue this task */ 751 SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", dev->dev_ctx->context->device->name, 752 mlx5_task->qp); 753 return -ENOMEM; 754 } 755 756 SPDK_DEBUGLOG(accel_mlx5, "task %p, src_iovs %u, dst_iovs %u, num_reqs %u, " 757 "blocks/req %u, blocks %u, inplace %d\n", task, task->s.iovcnt, task->d.iovcnt, 758 mlx5_task->num_reqs, mlx5_task->blocks_per_req, mlx5_task->num_blocks, mlx5_task->inplace); 759 760 return 0; 761 } 762 763 static inline void 764 accel_mlx5_copy_task_complete(struct accel_mlx5_task *mlx5_task) 765 { 766 spdk_accel_task_complete(&mlx5_task->base, 0); 767 } 768 769 static inline int 770 accel_mlx5_copy_task_process_one(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_qp *qp, 771 uint64_t wrid, uint32_t fence) 772 { 773 struct spdk_accel_task *task = &mlx5_task->base; 774 struct accel_mlx5_sge sge; 775 uint32_t remaining; 776 uint32_t dst_len; 777 int rc; 778 779 /* Limit one RDMA_WRITE by length of dst buffer. Not all src buffers may fit into one dst buffer due to 780 * limitation on ACCEL_MLX5_MAX_SGE. If this is the case then remaining is not zero */ 781 assert(mlx5_task->dst.iov->iov_len > mlx5_task->dst.iov_offset); 782 dst_len = mlx5_task->dst.iov->iov_len - mlx5_task->dst.iov_offset; 783 rc = accel_mlx5_fill_block_sge(qp->dev, sge.src_sge, &mlx5_task->src, dst_len, &remaining, 784 task->src_domain, task->src_domain_ctx); 785 if (spdk_unlikely(rc <= 0)) { 786 if (rc == 0) { 787 rc = -EINVAL; 788 } 789 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 790 return rc; 791 } 792 sge.src_sge_count = rc; 793 assert(dst_len > remaining); 794 dst_len -= remaining; 795 796 rc = accel_mlx5_fill_block_sge(qp->dev, sge.dst_sge, &mlx5_task->dst, dst_len, &remaining, 797 task->dst_domain, task->dst_domain_ctx); 798 if (spdk_unlikely(rc != 1)) { 799 /* We use single dst entry, any result other than 1 is an error */ 800 if (rc == 0) { 801 rc = -EINVAL; 802 } 803 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 804 return rc; 805 } 806 if (spdk_unlikely(remaining)) { 807 SPDK_ERRLOG("Incorrect dst length, remaining %u\n", remaining); 808 assert(0); 809 return -EINVAL; 810 } 811 812 rc = spdk_mlx5_qp_rdma_write(mlx5_task->qp->qp, sge.src_sge, sge.src_sge_count, 813 sge.dst_sge[0].addr, sge.dst_sge[0].lkey, wrid, fence); 814 if (spdk_unlikely(rc)) { 815 SPDK_ERRLOG("new RDMA WRITE failed with %d\n", rc); 816 return rc; 817 } 818 819 return 0; 820 } 821 822 static inline int 823 accel_mlx5_copy_task_process(struct accel_mlx5_task *mlx5_task) 824 { 825 826 struct accel_mlx5_qp *qp = mlx5_task->qp; 827 struct accel_mlx5_dev *dev = qp->dev; 828 uint16_t i; 829 int rc; 830 831 mlx5_task->num_wrs = 0; 832 assert(mlx5_task->num_reqs > 0); 833 assert(mlx5_task->num_ops > 0); 834 835 /* Handle n-1 reqs in order to simplify wrid and fence handling */ 836 for (i = 0; i < mlx5_task->num_ops - 1; i++) { 837 rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, 0, 0); 838 if (spdk_unlikely(rc)) { 839 return rc; 840 } 841 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 842 mlx5_task->num_submitted_reqs++; 843 } 844 845 rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, (uint64_t)mlx5_task, 846 SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 847 if (spdk_unlikely(rc)) { 848 return rc; 849 } 850 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 851 mlx5_task->num_submitted_reqs++; 852 STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link); 853 854 SPDK_DEBUGLOG(accel_mlx5, "end, copy task, %p\n", mlx5_task); 855 856 return 0; 857 } 858 859 static inline int 860 accel_mlx5_copy_task_continue(struct accel_mlx5_task *task) 861 { 862 struct accel_mlx5_qp *qp = task->qp; 863 struct accel_mlx5_dev *dev = qp->dev; 864 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 865 866 task->num_ops = spdk_min(qp_slot, task->num_reqs - task->num_completed_reqs); 867 if (spdk_unlikely(task->num_ops == 0)) { 868 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 869 return -ENOMEM; 870 } 871 return accel_mlx5_copy_task_process(task); 872 } 873 874 static inline uint32_t 875 accel_mlx5_get_copy_task_count(struct iovec *src_iov, uint32_t src_iovcnt, 876 struct iovec *dst_iov, uint32_t dst_iovcnt) 877 { 878 uint32_t src = 0; 879 uint32_t dst = 0; 880 uint64_t src_offset = 0; 881 uint64_t dst_offset = 0; 882 uint32_t num_ops = 0; 883 uint32_t src_sge_count = 0; 884 885 while (src < src_iovcnt && dst < dst_iovcnt) { 886 uint64_t src_len = src_iov[src].iov_len - src_offset; 887 uint64_t dst_len = dst_iov[dst].iov_len - dst_offset; 888 889 if (dst_len < src_len) { 890 dst_offset = 0; 891 src_offset += dst_len; 892 dst++; 893 num_ops++; 894 src_sge_count = 0; 895 } else if (src_len < dst_len) { 896 dst_offset += src_len; 897 src_offset = 0; 898 src++; 899 if (++src_sge_count >= ACCEL_MLX5_MAX_SGE) { 900 num_ops++; 901 src_sge_count = 0; 902 } 903 } else { 904 dst_offset = 0; 905 src_offset = 0; 906 dst++; 907 src++; 908 num_ops++; 909 src_sge_count = 0; 910 } 911 } 912 913 assert(src == src_iovcnt); 914 assert(dst == dst_iovcnt); 915 assert(src_offset == 0); 916 assert(dst_offset == 0); 917 return num_ops; 918 } 919 920 static inline int 921 accel_mlx5_copy_task_init(struct accel_mlx5_task *mlx5_task) 922 { 923 struct spdk_accel_task *task = &mlx5_task->base; 924 struct accel_mlx5_qp *qp = mlx5_task->qp; 925 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp); 926 927 if (spdk_likely(task->s.iovcnt <= ACCEL_MLX5_MAX_SGE)) { 928 mlx5_task->num_reqs = task->d.iovcnt; 929 } else if (task->d.iovcnt == 1) { 930 mlx5_task->num_reqs = SPDK_CEIL_DIV(task->s.iovcnt, ACCEL_MLX5_MAX_SGE); 931 } else { 932 mlx5_task->num_reqs = accel_mlx5_get_copy_task_count(task->s.iovs, task->s.iovcnt, 933 task->d.iovs, task->d.iovcnt); 934 } 935 mlx5_task->inplace = 0; 936 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 937 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 938 mlx5_task->num_ops = spdk_min(qp_slot, mlx5_task->num_reqs); 939 if (spdk_unlikely(!mlx5_task->num_ops)) { 940 return -ENOMEM; 941 } 942 SPDK_DEBUGLOG(accel_mlx5, "copy task num_reqs %u, num_ops %u\n", mlx5_task->num_reqs, 943 mlx5_task->num_ops); 944 945 return 0; 946 } 947 948 static int 949 accel_mlx5_task_op_not_implemented(struct accel_mlx5_task *mlx5_task) 950 { 951 SPDK_ERRLOG("wrong function called\n"); 952 SPDK_UNREACHABLE(); 953 } 954 955 static void 956 accel_mlx5_task_op_not_implemented_v(struct accel_mlx5_task *mlx5_task) 957 { 958 SPDK_ERRLOG("wrong function called\n"); 959 SPDK_UNREACHABLE(); 960 } 961 962 static int 963 accel_mlx5_task_op_not_supported(struct accel_mlx5_task *mlx5_task) 964 { 965 SPDK_ERRLOG("Unsupported opcode %d\n", mlx5_task->base.op_code); 966 967 return -ENOTSUP; 968 } 969 970 static struct accel_mlx5_task_operations g_accel_mlx5_tasks_ops[] = { 971 [ACCEL_MLX5_OPC_COPY] = { 972 .init = accel_mlx5_copy_task_init, 973 .process = accel_mlx5_copy_task_process, 974 .cont = accel_mlx5_copy_task_continue, 975 .complete = accel_mlx5_copy_task_complete, 976 }, 977 [ACCEL_MLX5_OPC_CRYPTO] = { 978 .init = accel_mlx5_crypto_task_init, 979 .process = accel_mlx5_crypto_task_process, 980 .cont = accel_mlx5_crypto_task_continue, 981 .complete = accel_mlx5_crypto_task_complete, 982 }, 983 [ACCEL_MLX5_OPC_LAST] = { 984 .init = accel_mlx5_task_op_not_supported, 985 .process = accel_mlx5_task_op_not_implemented, 986 .cont = accel_mlx5_task_op_not_implemented, 987 .complete = accel_mlx5_task_op_not_implemented_v 988 }, 989 }; 990 991 static inline void 992 accel_mlx5_task_complete(struct accel_mlx5_task *task) 993 { 994 assert(task->num_reqs == task->num_completed_reqs); 995 SPDK_DEBUGLOG(accel_mlx5, "Complete task %p, opc %d\n", task, task->base.op_code); 996 997 g_accel_mlx5_tasks_ops[task->mlx5_opcode].complete(task); 998 } 999 1000 static inline int 1001 accel_mlx5_task_continue(struct accel_mlx5_task *task) 1002 { 1003 struct accel_mlx5_qp *qp = task->qp; 1004 struct accel_mlx5_dev *dev = qp->dev; 1005 1006 if (spdk_unlikely(qp->recovering)) { 1007 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1008 return 0; 1009 } 1010 1011 return g_accel_mlx5_tasks_ops[task->mlx5_opcode].cont(task); 1012 } 1013 static inline void 1014 accel_mlx5_task_init_opcode(struct accel_mlx5_task *mlx5_task) 1015 { 1016 uint8_t base_opcode = mlx5_task->base.op_code; 1017 1018 switch (base_opcode) { 1019 case SPDK_ACCEL_OPC_COPY: 1020 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_COPY; 1021 break; 1022 case SPDK_ACCEL_OPC_ENCRYPT: 1023 assert(g_accel_mlx5.crypto_supported); 1024 mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE; 1025 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO; 1026 break; 1027 case SPDK_ACCEL_OPC_DECRYPT: 1028 assert(g_accel_mlx5.crypto_supported); 1029 mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_MEMORY; 1030 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO; 1031 break; 1032 default: 1033 SPDK_ERRLOG("wrong opcode %d\n", base_opcode); 1034 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_LAST; 1035 } 1036 } 1037 1038 static inline void 1039 accel_mlx5_task_reset(struct accel_mlx5_task *mlx5_task) 1040 { 1041 mlx5_task->num_completed_reqs = 0; 1042 mlx5_task->num_submitted_reqs = 0; 1043 mlx5_task->num_ops = 0; 1044 mlx5_task->num_processed_blocks = 0; 1045 mlx5_task->raw = 0; 1046 } 1047 1048 static int 1049 accel_mlx5_submit_tasks(struct spdk_io_channel *_ch, struct spdk_accel_task *task) 1050 { 1051 struct accel_mlx5_io_channel *ch = spdk_io_channel_get_ctx(_ch); 1052 struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base); 1053 struct accel_mlx5_dev *dev; 1054 int rc; 1055 1056 /* We should not receive any tasks if the module was not enabled */ 1057 assert(g_accel_mlx5.enabled); 1058 1059 dev = &ch->devs[ch->dev_idx]; 1060 ch->dev_idx++; 1061 if (ch->dev_idx == ch->num_devs) { 1062 ch->dev_idx = 0; 1063 } 1064 1065 mlx5_task->qp = &dev->qp; 1066 accel_mlx5_task_reset(mlx5_task); 1067 accel_mlx5_task_init_opcode(mlx5_task); 1068 1069 rc = g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].init(mlx5_task); 1070 if (spdk_unlikely(rc)) { 1071 if (rc == -ENOMEM) { 1072 SPDK_DEBUGLOG(accel_mlx5, "no reqs to handle new task %p (required %u), put to queue\n", mlx5_task, 1073 mlx5_task->num_reqs); 1074 STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link); 1075 return 0; 1076 } 1077 SPDK_ERRLOG("Task opc %d init failed, rc %d\n", task->op_code, rc); 1078 return rc; 1079 } 1080 1081 if (spdk_unlikely(mlx5_task->qp->recovering)) { 1082 STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link); 1083 return 0; 1084 } 1085 1086 return g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].process(mlx5_task); 1087 } 1088 1089 static void accel_mlx5_recover_qp(struct accel_mlx5_qp *qp); 1090 1091 static int 1092 accel_mlx5_recover_qp_poller(void *arg) 1093 { 1094 struct accel_mlx5_qp *qp = arg; 1095 1096 spdk_poller_unregister(&qp->recover_poller); 1097 accel_mlx5_recover_qp(qp); 1098 return SPDK_POLLER_BUSY; 1099 } 1100 1101 static void 1102 accel_mlx5_recover_qp(struct accel_mlx5_qp *qp) 1103 { 1104 struct accel_mlx5_dev *dev = qp->dev; 1105 struct spdk_mlx5_qp_attr mlx5_qp_attr = {}; 1106 int rc; 1107 1108 SPDK_NOTICELOG("Recovering qp %p, core %u\n", qp, spdk_env_get_current_core()); 1109 if (qp->qp) { 1110 spdk_mlx5_qp_destroy(qp->qp); 1111 qp->qp = NULL; 1112 } 1113 1114 mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size; 1115 mlx5_qp_attr.cap.max_recv_wr = 0; 1116 mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE; 1117 mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE; 1118 1119 rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp); 1120 if (rc) { 1121 SPDK_ERRLOG("Failed to create mlx5 dma QP, rc %d. Retry in %d usec\n", 1122 rc, ACCEL_MLX5_RECOVER_POLLER_PERIOD_US); 1123 qp->recover_poller = SPDK_POLLER_REGISTER(accel_mlx5_recover_qp_poller, qp, 1124 ACCEL_MLX5_RECOVER_POLLER_PERIOD_US); 1125 return; 1126 } 1127 1128 qp->recovering = false; 1129 } 1130 1131 static inline void 1132 accel_mlx5_process_error_cpl(struct spdk_mlx5_cq_completion *wc, struct accel_mlx5_task *task) 1133 { 1134 struct accel_mlx5_qp *qp = task->qp; 1135 1136 if (wc->status != IBV_WC_WR_FLUSH_ERR) { 1137 SPDK_WARNLOG("RDMA: qp %p, task %p, WC status %d, core %u\n", 1138 qp, task, wc->status, spdk_env_get_current_core()); 1139 } else { 1140 SPDK_DEBUGLOG(accel_mlx5, 1141 "RDMA: qp %p, task %p, WC status %d, core %u\n", 1142 qp, task, wc->status, spdk_env_get_current_core()); 1143 } 1144 1145 qp->recovering = true; 1146 assert(task->num_completed_reqs <= task->num_submitted_reqs); 1147 if (task->num_completed_reqs == task->num_submitted_reqs) { 1148 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 1149 accel_mlx5_task_fail(task, -EIO); 1150 } 1151 } 1152 1153 static inline int64_t 1154 accel_mlx5_poll_cq(struct accel_mlx5_dev *dev) 1155 { 1156 struct spdk_mlx5_cq_completion wc[ACCEL_MLX5_MAX_WC]; 1157 struct accel_mlx5_task *task; 1158 struct accel_mlx5_qp *qp; 1159 int reaped, i, rc; 1160 uint16_t completed; 1161 1162 reaped = spdk_mlx5_cq_poll_completions(dev->cq, wc, ACCEL_MLX5_MAX_WC); 1163 if (spdk_unlikely(reaped < 0)) { 1164 SPDK_ERRLOG("Error polling CQ! (%d): %s\n", errno, spdk_strerror(errno)); 1165 return reaped; 1166 } else if (reaped == 0) { 1167 return 0; 1168 } 1169 1170 SPDK_DEBUGLOG(accel_mlx5, "Reaped %d cpls on dev %s\n", reaped, 1171 dev->dev_ctx->context->device->name); 1172 1173 for (i = 0; i < reaped; i++) { 1174 if (spdk_unlikely(!wc[i].wr_id)) { 1175 /* Unsignaled completion with error, ignore */ 1176 continue; 1177 } 1178 task = (struct accel_mlx5_task *)wc[i].wr_id; 1179 qp = task->qp; 1180 assert(task == STAILQ_FIRST(&qp->in_hw) && "submission mismatch"); 1181 assert(task->num_submitted_reqs > task->num_completed_reqs); 1182 completed = task->num_submitted_reqs - task->num_completed_reqs; 1183 assert((uint32_t)task->num_completed_reqs + completed <= UINT16_MAX); 1184 task->num_completed_reqs += completed; 1185 assert(qp->wrs_submitted >= task->num_wrs); 1186 qp->wrs_submitted -= task->num_wrs; 1187 assert(dev->wrs_in_cq > 0); 1188 dev->wrs_in_cq--; 1189 1190 if (wc[i].status) { 1191 accel_mlx5_process_error_cpl(&wc[i], task); 1192 if (qp->wrs_submitted == 0) { 1193 assert(STAILQ_EMPTY(&qp->in_hw)); 1194 accel_mlx5_recover_qp(qp); 1195 } 1196 continue; 1197 } 1198 1199 SPDK_DEBUGLOG(accel_mlx5, "task %p, remaining %u\n", task, 1200 task->num_reqs - task->num_completed_reqs); 1201 if (task->num_completed_reqs == task->num_reqs) { 1202 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 1203 accel_mlx5_task_complete(task); 1204 } else { 1205 assert(task->num_submitted_reqs < task->num_reqs); 1206 assert(task->num_completed_reqs == task->num_submitted_reqs); 1207 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 1208 rc = accel_mlx5_task_continue(task); 1209 if (spdk_unlikely(rc)) { 1210 if (rc != -ENOMEM) { 1211 accel_mlx5_task_fail(task, rc); 1212 } 1213 } 1214 } 1215 } 1216 1217 return reaped; 1218 } 1219 1220 static inline void 1221 accel_mlx5_resubmit_nomem_tasks(struct accel_mlx5_dev *dev) 1222 { 1223 struct accel_mlx5_task *task, *tmp, *last; 1224 int rc; 1225 1226 last = STAILQ_LAST(&dev->nomem, accel_mlx5_task, link); 1227 STAILQ_FOREACH_SAFE(task, &dev->nomem, link, tmp) { 1228 STAILQ_REMOVE_HEAD(&dev->nomem, link); 1229 rc = accel_mlx5_task_continue(task); 1230 if (spdk_unlikely(rc)) { 1231 if (rc != -ENOMEM) { 1232 accel_mlx5_task_fail(task, rc); 1233 } 1234 break; 1235 } 1236 /* If qpair is recovering, task is added back to the nomem list and 0 is returned. In that case we 1237 * need a special condition to iterate the list once and stop this FOREACH loop */ 1238 if (task == last) { 1239 break; 1240 } 1241 } 1242 } 1243 1244 static int 1245 accel_mlx5_poller(void *ctx) 1246 { 1247 struct accel_mlx5_io_channel *ch = ctx; 1248 struct accel_mlx5_dev *dev; 1249 1250 int64_t completions = 0, rc; 1251 uint32_t i; 1252 1253 for (i = 0; i < ch->num_devs; i++) { 1254 dev = &ch->devs[i]; 1255 if (dev->wrs_in_cq) { 1256 rc = accel_mlx5_poll_cq(dev); 1257 if (spdk_unlikely(rc < 0)) { 1258 SPDK_ERRLOG("Error %"PRId64" on CQ, dev %s\n", rc, dev->dev_ctx->context->device->name); 1259 } 1260 completions += rc; 1261 if (dev->qp.wrs_submitted) { 1262 spdk_mlx5_qp_complete_send(dev->qp.qp); 1263 } 1264 } 1265 if (!STAILQ_EMPTY(&dev->nomem)) { 1266 accel_mlx5_resubmit_nomem_tasks(dev); 1267 } 1268 } 1269 1270 return !!completions; 1271 } 1272 1273 static bool 1274 accel_mlx5_supports_opcode(enum spdk_accel_opcode opc) 1275 { 1276 assert(g_accel_mlx5.enabled); 1277 1278 switch (opc) { 1279 case SPDK_ACCEL_OPC_COPY: 1280 return true; 1281 case SPDK_ACCEL_OPC_ENCRYPT: 1282 case SPDK_ACCEL_OPC_DECRYPT: 1283 return g_accel_mlx5.crypto_supported; 1284 default: 1285 return false; 1286 } 1287 } 1288 1289 static struct spdk_io_channel * 1290 accel_mlx5_get_io_channel(void) 1291 { 1292 assert(g_accel_mlx5.enabled); 1293 return spdk_get_io_channel(&g_accel_mlx5); 1294 } 1295 1296 static int 1297 accel_mlx5_create_qp(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp) 1298 { 1299 struct spdk_mlx5_qp_attr mlx5_qp_attr = {}; 1300 int rc; 1301 1302 mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size; 1303 mlx5_qp_attr.cap.max_recv_wr = 0; 1304 mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE; 1305 mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE; 1306 1307 rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp); 1308 if (rc) { 1309 return rc; 1310 } 1311 1312 STAILQ_INIT(&qp->in_hw); 1313 qp->dev = dev; 1314 qp->verbs_qp = spdk_mlx5_qp_get_verbs_qp(qp->qp); 1315 assert(qp->verbs_qp); 1316 qp->wrs_max = g_accel_mlx5.attr.qp_size; 1317 1318 return 0; 1319 } 1320 1321 static void 1322 accel_mlx5_destroy_cb(void *io_device, void *ctx_buf) 1323 { 1324 struct accel_mlx5_io_channel *ch = ctx_buf; 1325 struct accel_mlx5_dev *dev; 1326 uint32_t i; 1327 1328 spdk_poller_unregister(&ch->poller); 1329 for (i = 0; i < ch->num_devs; i++) { 1330 dev = &ch->devs[i]; 1331 spdk_mlx5_qp_destroy(dev->qp.qp); 1332 if (dev->cq) { 1333 spdk_mlx5_cq_destroy(dev->cq); 1334 } 1335 spdk_poller_unregister(&dev->qp.recover_poller); 1336 if (dev->crypto_mkeys) { 1337 spdk_mlx5_mkey_pool_put_ref(dev->crypto_mkeys); 1338 } 1339 spdk_rdma_utils_free_mem_map(&dev->mmap); 1340 } 1341 free(ch->devs); 1342 } 1343 1344 static int 1345 accel_mlx5_create_cb(void *io_device, void *ctx_buf) 1346 { 1347 struct spdk_mlx5_cq_attr cq_attr = {}; 1348 struct accel_mlx5_io_channel *ch = ctx_buf; 1349 struct accel_mlx5_dev_ctx *dev_ctx; 1350 struct accel_mlx5_dev *dev; 1351 uint32_t i; 1352 int rc; 1353 1354 ch->devs = calloc(g_accel_mlx5.num_ctxs, sizeof(*ch->devs)); 1355 if (!ch->devs) { 1356 SPDK_ERRLOG("Memory allocation failed\n"); 1357 return -ENOMEM; 1358 } 1359 1360 for (i = 0; i < g_accel_mlx5.num_ctxs; i++) { 1361 dev_ctx = &g_accel_mlx5.dev_ctxs[i]; 1362 dev = &ch->devs[i]; 1363 dev->dev_ctx = dev_ctx; 1364 1365 if (dev_ctx->crypto_mkeys) { 1366 dev->crypto_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO); 1367 if (!dev->crypto_mkeys) { 1368 SPDK_ERRLOG("Failed to get crypto mkey pool channel, dev %s\n", dev_ctx->context->device->name); 1369 /* Should not happen since mkey pool is created on accel_mlx5 initialization. 1370 * We should not be here if pool creation failed */ 1371 assert(0); 1372 goto err_out; 1373 } 1374 } 1375 1376 memset(&cq_attr, 0, sizeof(cq_attr)); 1377 cq_attr.cqe_cnt = g_accel_mlx5.attr.qp_size; 1378 cq_attr.cqe_size = 64; 1379 cq_attr.cq_context = dev; 1380 1381 ch->num_devs++; 1382 rc = spdk_mlx5_cq_create(dev_ctx->pd, &cq_attr, &dev->cq); 1383 if (rc) { 1384 SPDK_ERRLOG("Failed to create mlx5 CQ, rc %d\n", rc); 1385 goto err_out; 1386 } 1387 1388 rc = accel_mlx5_create_qp(dev, &dev->qp); 1389 if (rc) { 1390 SPDK_ERRLOG("Failed to create mlx5 QP, rc %d\n", rc); 1391 goto err_out; 1392 } 1393 1394 dev->mmap = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL, 1395 IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE); 1396 if (!dev->mmap) { 1397 SPDK_ERRLOG("Failed to create memory map\n"); 1398 rc = -ENOMEM; 1399 goto err_out; 1400 } 1401 dev->crypto_multi_block = dev_ctx->crypto_multi_block; 1402 dev->crypto_split_blocks = dev_ctx->crypto_multi_block ? g_accel_mlx5.attr.crypto_split_blocks : 0; 1403 dev->wrs_in_cq_max = g_accel_mlx5.attr.qp_size; 1404 STAILQ_INIT(&dev->nomem); 1405 } 1406 1407 ch->poller = SPDK_POLLER_REGISTER(accel_mlx5_poller, ch, 0); 1408 1409 return 0; 1410 1411 err_out: 1412 accel_mlx5_destroy_cb(&g_accel_mlx5, ctx_buf); 1413 return rc; 1414 } 1415 1416 void 1417 accel_mlx5_get_default_attr(struct accel_mlx5_attr *attr) 1418 { 1419 assert(attr); 1420 1421 attr->qp_size = ACCEL_MLX5_QP_SIZE; 1422 attr->num_requests = ACCEL_MLX5_NUM_REQUESTS; 1423 attr->allowed_devs = NULL; 1424 attr->crypto_split_blocks = 0; 1425 } 1426 1427 static void 1428 accel_mlx5_allowed_devs_free(void) 1429 { 1430 size_t i; 1431 1432 if (!g_accel_mlx5.allowed_devs) { 1433 return; 1434 } 1435 1436 for (i = 0; i < g_accel_mlx5.allowed_devs_count; i++) { 1437 free(g_accel_mlx5.allowed_devs[i]); 1438 } 1439 free(g_accel_mlx5.attr.allowed_devs); 1440 free(g_accel_mlx5.allowed_devs); 1441 g_accel_mlx5.attr.allowed_devs = NULL; 1442 g_accel_mlx5.allowed_devs = NULL; 1443 g_accel_mlx5.allowed_devs_count = 0; 1444 } 1445 1446 static int 1447 accel_mlx5_allowed_devs_parse(const char *allowed_devs) 1448 { 1449 char *str, *tmp, *tok; 1450 size_t devs_count = 0; 1451 1452 str = strdup(allowed_devs); 1453 if (!str) { 1454 return -ENOMEM; 1455 } 1456 1457 accel_mlx5_allowed_devs_free(); 1458 1459 tmp = str; 1460 while ((tmp = strchr(tmp, ',')) != NULL) { 1461 tmp++; 1462 devs_count++; 1463 } 1464 devs_count++; 1465 1466 g_accel_mlx5.allowed_devs = calloc(devs_count, sizeof(char *)); 1467 if (!g_accel_mlx5.allowed_devs) { 1468 free(str); 1469 return -ENOMEM; 1470 } 1471 1472 devs_count = 0; 1473 tok = strtok(str, ","); 1474 while (tok) { 1475 g_accel_mlx5.allowed_devs[devs_count] = strdup(tok); 1476 if (!g_accel_mlx5.allowed_devs[devs_count]) { 1477 free(str); 1478 accel_mlx5_allowed_devs_free(); 1479 return -ENOMEM; 1480 } 1481 tok = strtok(NULL, ","); 1482 devs_count++; 1483 g_accel_mlx5.allowed_devs_count++; 1484 } 1485 1486 free(str); 1487 1488 return 0; 1489 } 1490 1491 int 1492 accel_mlx5_enable(struct accel_mlx5_attr *attr) 1493 { 1494 int rc; 1495 1496 if (g_accel_mlx5.enabled) { 1497 return -EEXIST; 1498 } 1499 if (attr) { 1500 g_accel_mlx5.attr = *attr; 1501 g_accel_mlx5.attr.allowed_devs = NULL; 1502 1503 if (attr->allowed_devs) { 1504 /* Contains a copy of user's string */ 1505 g_accel_mlx5.attr.allowed_devs = strndup(attr->allowed_devs, ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN); 1506 if (!g_accel_mlx5.attr.allowed_devs) { 1507 return -ENOMEM; 1508 } 1509 rc = accel_mlx5_allowed_devs_parse(g_accel_mlx5.attr.allowed_devs); 1510 if (rc) { 1511 return rc; 1512 } 1513 rc = spdk_mlx5_crypto_devs_allow((const char *const *)g_accel_mlx5.allowed_devs, 1514 g_accel_mlx5.allowed_devs_count); 1515 if (rc) { 1516 accel_mlx5_allowed_devs_free(); 1517 return rc; 1518 } 1519 } 1520 } else { 1521 accel_mlx5_get_default_attr(&g_accel_mlx5.attr); 1522 } 1523 1524 g_accel_mlx5.enabled = true; 1525 spdk_accel_module_list_add(&g_accel_mlx5.module); 1526 1527 return 0; 1528 } 1529 1530 static void 1531 accel_mlx5_free_resources(void) 1532 { 1533 struct accel_mlx5_dev_ctx *dev_ctx; 1534 uint32_t i; 1535 1536 for (i = 0; i < g_accel_mlx5.num_ctxs; i++) { 1537 dev_ctx = &g_accel_mlx5.dev_ctxs[i]; 1538 if (dev_ctx->pd) { 1539 if (dev_ctx->crypto_mkeys) { 1540 spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO, dev_ctx->pd); 1541 } 1542 spdk_rdma_utils_put_pd(dev_ctx->pd); 1543 } 1544 if (dev_ctx->domain) { 1545 spdk_rdma_utils_put_memory_domain(dev_ctx->domain); 1546 } 1547 } 1548 1549 free(g_accel_mlx5.dev_ctxs); 1550 g_accel_mlx5.dev_ctxs = NULL; 1551 g_accel_mlx5.initialized = false; 1552 } 1553 1554 static void 1555 accel_mlx5_deinit_cb(void *ctx) 1556 { 1557 accel_mlx5_free_resources(); 1558 spdk_accel_module_finish(); 1559 } 1560 1561 static void 1562 accel_mlx5_deinit(void *ctx) 1563 { 1564 if (g_accel_mlx5.allowed_devs) { 1565 accel_mlx5_allowed_devs_free(); 1566 } 1567 spdk_mlx5_crypto_devs_allow(NULL, 0); 1568 if (g_accel_mlx5.initialized) { 1569 spdk_io_device_unregister(&g_accel_mlx5, accel_mlx5_deinit_cb); 1570 } else { 1571 spdk_accel_module_finish(); 1572 } 1573 } 1574 1575 static int 1576 accel_mlx5_mkeys_create(struct ibv_pd *pd, uint32_t num_mkeys, uint32_t flags) 1577 { 1578 struct spdk_mlx5_mkey_pool_param pool_param = {}; 1579 1580 pool_param.mkey_count = num_mkeys; 1581 pool_param.cache_per_thread = num_mkeys * 3 / 4 / spdk_env_get_core_count(); 1582 pool_param.flags = flags; 1583 1584 return spdk_mlx5_mkey_pool_init(&pool_param, pd); 1585 } 1586 1587 static int 1588 accel_mlx5_dev_ctx_init(struct accel_mlx5_dev_ctx *dev_ctx, struct ibv_context *dev, 1589 struct spdk_mlx5_device_caps *caps) 1590 { 1591 struct ibv_pd *pd; 1592 int rc; 1593 1594 pd = spdk_rdma_utils_get_pd(dev); 1595 if (!pd) { 1596 SPDK_ERRLOG("Failed to get PD for context %p, dev %s\n", dev, dev->device->name); 1597 return -EINVAL; 1598 } 1599 dev_ctx->context = dev; 1600 dev_ctx->pd = pd; 1601 dev_ctx->domain = spdk_rdma_utils_get_memory_domain(pd); 1602 if (!dev_ctx->domain) { 1603 return -ENOMEM; 1604 } 1605 1606 if (g_accel_mlx5.crypto_supported) { 1607 dev_ctx->crypto_multi_block = caps->crypto.multi_block_be_tweak; 1608 if (!dev_ctx->crypto_multi_block && g_accel_mlx5.attr.crypto_split_blocks) { 1609 SPDK_WARNLOG("\"crypto_split_blocks\" is set but dev %s doesn't support multi block crypto\n", 1610 dev->device->name); 1611 } 1612 rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO); 1613 if (rc) { 1614 SPDK_ERRLOG("Failed to create crypto mkeys pool, rc %d, dev %s\n", rc, dev->device->name); 1615 return rc; 1616 } 1617 dev_ctx->crypto_mkeys = true; 1618 } 1619 1620 return 0; 1621 } 1622 1623 static struct ibv_context ** 1624 accel_mlx5_get_devices(int *_num_devs) 1625 { 1626 struct ibv_context **rdma_devs, **rdma_devs_out = NULL, *dev; 1627 struct ibv_device_attr dev_attr; 1628 size_t j; 1629 int num_devs = 0, i, rc; 1630 int num_devs_out = 0; 1631 bool dev_allowed; 1632 1633 rdma_devs = rdma_get_devices(&num_devs); 1634 if (!rdma_devs || !num_devs) { 1635 *_num_devs = 0; 1636 return NULL; 1637 } 1638 1639 rdma_devs_out = calloc(num_devs + 1, sizeof(struct ibv_context *)); 1640 if (!rdma_devs_out) { 1641 SPDK_ERRLOG("Memory allocation failed\n"); 1642 rdma_free_devices(rdma_devs); 1643 *_num_devs = 0; 1644 return NULL; 1645 } 1646 1647 for (i = 0; i < num_devs; i++) { 1648 dev = rdma_devs[i]; 1649 rc = ibv_query_device(dev, &dev_attr); 1650 if (rc) { 1651 SPDK_ERRLOG("Failed to query dev %s, skipping\n", dev->device->name); 1652 continue; 1653 } 1654 if (dev_attr.vendor_id != SPDK_MLX5_VENDOR_ID_MELLANOX) { 1655 SPDK_DEBUGLOG(accel_mlx5, "dev %s is not Mellanox device, skipping\n", dev->device->name); 1656 continue; 1657 } 1658 1659 if (g_accel_mlx5.allowed_devs_count) { 1660 dev_allowed = false; 1661 for (j = 0; j < g_accel_mlx5.allowed_devs_count; j++) { 1662 if (strcmp(g_accel_mlx5.allowed_devs[j], dev->device->name) == 0) { 1663 dev_allowed = true; 1664 break; 1665 } 1666 } 1667 if (!dev_allowed) { 1668 continue; 1669 } 1670 } 1671 1672 rdma_devs_out[num_devs_out] = dev; 1673 num_devs_out++; 1674 } 1675 1676 rdma_free_devices(rdma_devs); 1677 *_num_devs = num_devs_out; 1678 1679 return rdma_devs_out; 1680 } 1681 1682 static inline bool 1683 accel_mlx5_dev_supports_crypto(struct spdk_mlx5_device_caps *caps) 1684 { 1685 return caps->crypto_supported && !caps->crypto.wrapped_import_method_aes_xts && 1686 (caps->crypto.single_block_le_tweak || 1687 caps->crypto.multi_block_le_tweak || caps->crypto.multi_block_be_tweak); 1688 } 1689 1690 static int 1691 accel_mlx5_init(void) 1692 { 1693 struct spdk_mlx5_device_caps *caps; 1694 struct ibv_context **rdma_devs, *dev; 1695 int num_devs = 0, rc = 0, i; 1696 int best_dev = -1, first_dev = 0; 1697 bool supports_crypto; 1698 bool find_best_dev = g_accel_mlx5.allowed_devs_count == 0; 1699 1700 if (!g_accel_mlx5.enabled) { 1701 return -EINVAL; 1702 } 1703 1704 rdma_devs = accel_mlx5_get_devices(&num_devs); 1705 if (!rdma_devs || !num_devs) { 1706 return -ENODEV; 1707 } 1708 caps = calloc(num_devs, sizeof(*caps)); 1709 if (!caps) { 1710 rc = -ENOMEM; 1711 goto cleanup; 1712 } 1713 1714 g_accel_mlx5.crypto_supported = true; 1715 g_accel_mlx5.num_ctxs = 0; 1716 1717 /* Iterate devices. We support an offload if all devices support it */ 1718 for (i = 0; i < num_devs; i++) { 1719 dev = rdma_devs[i]; 1720 1721 rc = spdk_mlx5_device_query_caps(dev, &caps[i]); 1722 if (rc) { 1723 SPDK_ERRLOG("Failed to get crypto caps, dev %s\n", dev->device->name); 1724 goto cleanup; 1725 } 1726 supports_crypto = accel_mlx5_dev_supports_crypto(&caps[i]); 1727 if (!supports_crypto) { 1728 SPDK_DEBUGLOG(accel_mlx5, "Disable crypto support because dev %s doesn't support it\n", 1729 rdma_devs[i]->device->name); 1730 g_accel_mlx5.crypto_supported = false; 1731 } 1732 if (find_best_dev) { 1733 if (supports_crypto && best_dev == -1) { 1734 best_dev = i; 1735 } 1736 } 1737 } 1738 1739 /* User didn't specify devices to use, try to select the best one */ 1740 if (find_best_dev) { 1741 if (best_dev == -1) { 1742 best_dev = 0; 1743 } 1744 supports_crypto = accel_mlx5_dev_supports_crypto(&caps[best_dev]); 1745 SPDK_NOTICELOG("Select dev %s, crypto %d\n", rdma_devs[best_dev]->device->name, supports_crypto); 1746 g_accel_mlx5.crypto_supported = supports_crypto; 1747 first_dev = best_dev; 1748 num_devs = 1; 1749 if (supports_crypto) { 1750 const char *const dev_name[] = { rdma_devs[best_dev]->device->name }; 1751 /* Let mlx5 library know which device to use */ 1752 spdk_mlx5_crypto_devs_allow(dev_name, 1); 1753 } 1754 } else { 1755 SPDK_NOTICELOG("Found %d devices, crypto %d\n", num_devs, g_accel_mlx5.crypto_supported); 1756 } 1757 1758 g_accel_mlx5.dev_ctxs = calloc(num_devs, sizeof(*g_accel_mlx5.dev_ctxs)); 1759 if (!g_accel_mlx5.dev_ctxs) { 1760 SPDK_ERRLOG("Memory allocation failed\n"); 1761 rc = -ENOMEM; 1762 goto cleanup; 1763 } 1764 1765 for (i = first_dev; i < first_dev + num_devs; i++) { 1766 rc = accel_mlx5_dev_ctx_init(&g_accel_mlx5.dev_ctxs[g_accel_mlx5.num_ctxs++], 1767 rdma_devs[i], &caps[i]); 1768 if (rc) { 1769 goto cleanup; 1770 } 1771 } 1772 1773 SPDK_NOTICELOG("Accel framework mlx5 initialized, found %d devices.\n", num_devs); 1774 spdk_io_device_register(&g_accel_mlx5, accel_mlx5_create_cb, accel_mlx5_destroy_cb, 1775 sizeof(struct accel_mlx5_io_channel), "accel_mlx5"); 1776 g_accel_mlx5.initialized = true; 1777 free(rdma_devs); 1778 free(caps); 1779 1780 return 0; 1781 1782 cleanup: 1783 free(rdma_devs); 1784 free(caps); 1785 accel_mlx5_free_resources(); 1786 1787 return rc; 1788 } 1789 1790 static void 1791 accel_mlx5_write_config_json(struct spdk_json_write_ctx *w) 1792 { 1793 if (g_accel_mlx5.enabled) { 1794 spdk_json_write_object_begin(w); 1795 spdk_json_write_named_string(w, "method", "mlx5_scan_accel_module"); 1796 spdk_json_write_named_object_begin(w, "params"); 1797 spdk_json_write_named_uint16(w, "qp_size", g_accel_mlx5.attr.qp_size); 1798 spdk_json_write_named_uint32(w, "num_requests", g_accel_mlx5.attr.num_requests); 1799 if (g_accel_mlx5.attr.allowed_devs) { 1800 spdk_json_write_named_string(w, "allowed_devs", g_accel_mlx5.attr.allowed_devs); 1801 } 1802 spdk_json_write_named_uint16(w, "crypto_split_blocks", g_accel_mlx5.attr.crypto_split_blocks); 1803 spdk_json_write_object_end(w); 1804 spdk_json_write_object_end(w); 1805 } 1806 } 1807 1808 static size_t 1809 accel_mlx5_get_ctx_size(void) 1810 { 1811 return sizeof(struct accel_mlx5_task); 1812 } 1813 1814 static int 1815 accel_mlx5_crypto_key_init(struct spdk_accel_crypto_key *key) 1816 { 1817 struct spdk_mlx5_crypto_dek_create_attr attr = {}; 1818 struct spdk_mlx5_crypto_keytag *keytag; 1819 int rc; 1820 1821 if (!key || !key->key || !key->key2 || !key->key_size || !key->key2_size) { 1822 return -EINVAL; 1823 } 1824 1825 attr.dek = calloc(1, key->key_size + key->key2_size); 1826 if (!attr.dek) { 1827 return -ENOMEM; 1828 } 1829 1830 memcpy(attr.dek, key->key, key->key_size); 1831 memcpy(attr.dek + key->key_size, key->key2, key->key2_size); 1832 attr.dek_len = key->key_size + key->key2_size; 1833 1834 rc = spdk_mlx5_crypto_keytag_create(&attr, &keytag); 1835 spdk_memset_s(attr.dek, attr.dek_len, 0, attr.dek_len); 1836 free(attr.dek); 1837 if (rc) { 1838 SPDK_ERRLOG("Failed to create a keytag, rc %d\n", rc); 1839 return rc; 1840 } 1841 1842 key->priv = keytag; 1843 1844 return 0; 1845 } 1846 1847 static void 1848 accel_mlx5_crypto_key_deinit(struct spdk_accel_crypto_key *key) 1849 { 1850 if (!key || key->module_if != &g_accel_mlx5.module || !key->priv) { 1851 return; 1852 } 1853 1854 spdk_mlx5_crypto_keytag_destroy(key->priv); 1855 } 1856 1857 static bool 1858 accel_mlx5_crypto_supports_cipher(enum spdk_accel_cipher cipher, size_t key_size) 1859 { 1860 switch (cipher) { 1861 case SPDK_ACCEL_CIPHER_AES_XTS: 1862 return key_size == SPDK_ACCEL_AES_XTS_128_KEY_SIZE || key_size == SPDK_ACCEL_AES_XTS_256_KEY_SIZE; 1863 default: 1864 return false; 1865 } 1866 } 1867 1868 static int 1869 accel_mlx5_get_memory_domains(struct spdk_memory_domain **domains, int array_size) 1870 { 1871 int i, size; 1872 1873 if (!domains || !array_size) { 1874 return (int)g_accel_mlx5.num_ctxs; 1875 } 1876 1877 size = spdk_min(array_size, (int)g_accel_mlx5.num_ctxs); 1878 1879 for (i = 0; i < size; i++) { 1880 domains[i] = g_accel_mlx5.dev_ctxs[i].domain; 1881 } 1882 1883 return (int)g_accel_mlx5.num_ctxs; 1884 } 1885 1886 static struct accel_mlx5_module g_accel_mlx5 = { 1887 .module = { 1888 .module_init = accel_mlx5_init, 1889 .module_fini = accel_mlx5_deinit, 1890 .write_config_json = accel_mlx5_write_config_json, 1891 .get_ctx_size = accel_mlx5_get_ctx_size, 1892 .name = "mlx5", 1893 .supports_opcode = accel_mlx5_supports_opcode, 1894 .get_io_channel = accel_mlx5_get_io_channel, 1895 .submit_tasks = accel_mlx5_submit_tasks, 1896 .crypto_key_init = accel_mlx5_crypto_key_init, 1897 .crypto_key_deinit = accel_mlx5_crypto_key_deinit, 1898 .crypto_supports_cipher = accel_mlx5_crypto_supports_cipher, 1899 .get_memory_domains = accel_mlx5_get_memory_domains, 1900 } 1901 }; 1902 1903 SPDK_LOG_REGISTER_COMPONENT(accel_mlx5) 1904