1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 */ 4 5 #include "spdk/env.h" 6 #include "spdk/thread.h" 7 #include "spdk/queue.h" 8 #include "spdk/log.h" 9 #include "spdk/string.h" 10 #include "spdk/likely.h" 11 #include "spdk/dma.h" 12 #include "spdk/json.h" 13 #include "spdk/util.h" 14 15 #include "spdk_internal/mlx5.h" 16 #include "spdk_internal/rdma_utils.h" 17 #include "spdk/accel_module.h" 18 #include "spdk_internal/assert.h" 19 #include "spdk_internal/sgl.h" 20 #include "accel_mlx5.h" 21 22 #include <infiniband/mlx5dv.h> 23 #include <rdma/rdma_cma.h> 24 25 #define ACCEL_MLX5_QP_SIZE (256u) 26 #define ACCEL_MLX5_NUM_REQUESTS (2048u - 1) 27 #define ACCEL_MLX5_RECOVER_POLLER_PERIOD_US (10000) 28 #define ACCEL_MLX5_MAX_SGE (16u) 29 #define ACCEL_MLX5_MAX_WC (64u) 30 #define ACCEL_MLX5_MAX_MKEYS_IN_TASK (16u) 31 32 /* Assume we have up to 16 devices */ 33 #define ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN ((SPDK_MLX5_DEV_MAX_NAME_LEN + 1) * 16) 34 35 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, task) \ 36 do { \ 37 assert((qp)->wrs_submitted < (qp)->wrs_max); \ 38 (qp)->wrs_submitted++; \ 39 assert((task)->num_wrs < UINT16_MAX); \ 40 (task)->num_wrs++; \ 41 } while (0) 42 43 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, task) \ 44 do { \ 45 assert((dev)->wrs_in_cq < (dev)->wrs_in_cq_max); \ 46 (dev)->wrs_in_cq++; \ 47 assert((qp)->wrs_submitted < (qp)->wrs_max); \ 48 (qp)->wrs_submitted++; \ 49 assert((task)->num_wrs < UINT16_MAX); \ 50 (task)->num_wrs++; \ 51 } while (0) 52 53 struct accel_mlx5_io_channel; 54 struct accel_mlx5_task; 55 56 struct accel_mlx5_dev_ctx { 57 struct ibv_context *context; 58 struct ibv_pd *pd; 59 struct spdk_memory_domain *domain; 60 struct spdk_mempool *psv_pool; 61 TAILQ_ENTRY(accel_mlx5_dev_ctx) link; 62 struct spdk_mlx5_psv **psvs; 63 bool crypto_mkeys; 64 bool sig_mkeys; 65 bool crypto_multi_block; 66 }; 67 68 enum accel_mlx5_opcode { 69 ACCEL_MLX5_OPC_COPY, 70 ACCEL_MLX5_OPC_CRYPTO, 71 ACCEL_MLX5_OPC_CRC32C, 72 ACCEL_MLX5_OPC_LAST 73 }; 74 75 struct accel_mlx5_stats { 76 uint64_t crypto_umrs; 77 uint64_t sig_umrs; 78 uint64_t rdma_reads; 79 uint64_t rdma_writes; 80 uint64_t polls; 81 uint64_t idle_polls; 82 uint64_t completions; 83 uint64_t nomem_qdepth; 84 uint64_t nomem_mkey; 85 uint64_t opcodes[ACCEL_MLX5_OPC_LAST]; 86 }; 87 88 struct accel_mlx5_module { 89 struct spdk_accel_module_if module; 90 struct accel_mlx5_stats stats; 91 struct spdk_spinlock lock; 92 struct accel_mlx5_dev_ctx *dev_ctxs; 93 uint32_t num_ctxs; 94 struct accel_mlx5_attr attr; 95 char **allowed_devs; 96 size_t allowed_devs_count; 97 bool initialized; 98 bool enabled; 99 bool crypto_supported; 100 bool crc32c_supported; 101 }; 102 103 struct accel_mlx5_sge { 104 uint32_t src_sge_count; 105 uint32_t dst_sge_count; 106 struct ibv_sge src_sge[ACCEL_MLX5_MAX_SGE]; 107 struct ibv_sge dst_sge[ACCEL_MLX5_MAX_SGE]; 108 }; 109 110 struct accel_mlx5_iov_sgl { 111 struct iovec *iov; 112 uint32_t iovcnt; 113 uint32_t iov_offset; 114 }; 115 116 struct accel_mlx5_psv_wrapper { 117 uint32_t psv_index; 118 struct { 119 uint32_t error : 1; 120 uint32_t reserved : 31; 121 } bits; 122 /* mlx5 engine requires DMAable memory, use this member to copy user's crc value since we don't know which 123 * memory it is in */ 124 uint32_t crc; 125 uint32_t crc_lkey; 126 }; 127 128 struct accel_mlx5_task { 129 struct spdk_accel_task base; 130 struct accel_mlx5_iov_sgl src; 131 struct accel_mlx5_iov_sgl dst; 132 struct accel_mlx5_qp *qp; 133 STAILQ_ENTRY(accel_mlx5_task) link; 134 uint16_t num_reqs; 135 uint16_t num_completed_reqs; 136 uint16_t num_submitted_reqs; 137 uint16_t num_ops; /* number of allocated mkeys or number of operations */ 138 uint16_t num_wrs; /* Number of outstanding operations which consume qp slot */ 139 union { 140 struct { 141 uint16_t blocks_per_req; 142 uint16_t num_processed_blocks; 143 uint16_t num_blocks; 144 }; 145 struct { 146 struct accel_mlx5_psv_wrapper *psv; 147 uint32_t last_umr_len; 148 uint8_t last_mkey_idx; 149 }; 150 }; 151 union { 152 uint8_t raw; 153 struct { 154 uint8_t inplace : 1; 155 uint8_t enc_order : 2; 156 uint8_t mlx5_opcode: 5; 157 }; 158 }; 159 /* Keep this array last since not all elements might be accessed, this reduces amount of data to be 160 * cached */ 161 struct spdk_mlx5_mkey_pool_obj *mkeys[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 162 }; 163 164 SPDK_STATIC_ASSERT(ACCEL_MLX5_MAX_MKEYS_IN_TASK <= UINT8_MAX, "uint8_t is used to iterate mkeys"); 165 166 struct accel_mlx5_qp { 167 struct spdk_mlx5_qp *qp; 168 struct ibv_qp *verbs_qp; 169 struct accel_mlx5_dev *dev; 170 struct accel_mlx5_io_channel *ch; 171 /* tasks submitted to HW. We can't complete a task even in error case until we reap completions for all 172 * submitted requests */ 173 STAILQ_HEAD(, accel_mlx5_task) in_hw; 174 uint16_t wrs_submitted; 175 uint16_t wrs_max; 176 bool recovering; 177 struct spdk_poller *recover_poller; 178 }; 179 180 struct accel_mlx5_dev { 181 struct accel_mlx5_qp qp; 182 struct spdk_mlx5_cq *cq; 183 struct spdk_mlx5_mkey_pool *crypto_mkeys; 184 struct spdk_mlx5_mkey_pool *sig_mkeys; 185 struct spdk_rdma_utils_mem_map *mmap; 186 struct accel_mlx5_dev_ctx *dev_ctx; 187 uint16_t wrs_in_cq; 188 uint16_t wrs_in_cq_max; 189 uint16_t crypto_split_blocks; 190 bool crypto_multi_block; 191 /* Pending tasks waiting for requests resources */ 192 STAILQ_HEAD(, accel_mlx5_task) nomem; 193 TAILQ_ENTRY(accel_mlx5_dev) link; 194 struct accel_mlx5_stats stats; 195 }; 196 197 struct accel_mlx5_io_channel { 198 struct accel_mlx5_dev *devs; 199 struct spdk_poller *poller; 200 uint32_t num_devs; 201 /* Index in \b devs to be used for operations in round-robin way */ 202 uint32_t dev_idx; 203 }; 204 205 struct accel_mlx5_task_operations { 206 int (*init)(struct accel_mlx5_task *task); 207 int (*process)(struct accel_mlx5_task *task); 208 int (*cont)(struct accel_mlx5_task *task); 209 void (*complete)(struct accel_mlx5_task *task); 210 }; 211 212 struct accel_mlx5_psv_pool_iter_cb_args { 213 struct accel_mlx5_dev_ctx *dev; 214 struct spdk_rdma_utils_mem_map *map; 215 int rc; 216 }; 217 218 struct accel_mlx5_dump_stats_ctx { 219 struct accel_mlx5_stats total; 220 struct spdk_json_write_ctx *w; 221 enum accel_mlx5_dump_state_level level; 222 accel_mlx5_dump_stat_done_cb cb; 223 void *ctx; 224 }; 225 226 static struct accel_mlx5_module g_accel_mlx5; 227 228 static inline void 229 accel_mlx5_iov_sgl_init(struct accel_mlx5_iov_sgl *s, struct iovec *iov, uint32_t iovcnt) 230 { 231 s->iov = iov; 232 s->iovcnt = iovcnt; 233 s->iov_offset = 0; 234 } 235 236 static inline void 237 accel_mlx5_iov_sgl_advance(struct accel_mlx5_iov_sgl *s, uint32_t step) 238 { 239 s->iov_offset += step; 240 while (s->iovcnt > 0) { 241 assert(s->iov != NULL); 242 if (s->iov_offset < s->iov->iov_len) { 243 break; 244 } 245 246 s->iov_offset -= s->iov->iov_len; 247 s->iov++; 248 s->iovcnt--; 249 } 250 } 251 252 static inline void 253 accel_mlx5_iov_sgl_unwind(struct accel_mlx5_iov_sgl *s, uint32_t max_iovs, uint32_t step) 254 { 255 SPDK_DEBUGLOG(accel_mlx5, "iov %p, iovcnt %u, max %u, offset %u, step %u\n", s->iov, s->iovcnt, 256 max_iovs, s->iov_offset, step); 257 while (s->iovcnt <= max_iovs) { 258 assert(s->iov != NULL); 259 if (s->iov_offset >= step) { 260 s->iov_offset -= step; 261 SPDK_DEBUGLOG(accel_mlx5, "\tEND, iov %p, iovcnt %u, offset %u\n", s->iov, s->iovcnt, 262 s->iov_offset); 263 return; 264 } 265 step -= s->iov_offset; 266 s->iov--; 267 s->iovcnt++; 268 s->iov_offset = s->iov->iov_len; 269 SPDK_DEBUGLOG(accel_mlx5, "\tiov %p, iovcnt %u, offset %u, step %u\n", s->iov, s->iovcnt, 270 s->iov_offset, step); 271 } 272 273 SPDK_ERRLOG("Can't unwind iovs, remaining %u\n", step); 274 assert(0); 275 } 276 277 static inline int 278 accel_mlx5_sge_unwind(struct ibv_sge *sge, uint32_t sge_count, uint32_t step) 279 { 280 int i; 281 282 assert(sge_count > 0); 283 SPDK_DEBUGLOG(accel_mlx5, "sge %p, count %u, step %u\n", sge, sge_count, step); 284 for (i = (int)sge_count - 1; i >= 0; i--) { 285 if (sge[i].length > step) { 286 sge[i].length -= step; 287 SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step); 288 return (int)i + 1; 289 } 290 SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step); 291 step -= sge[i].length; 292 } 293 294 SPDK_ERRLOG("Can't unwind sge, remaining %u\n", step); 295 assert(step == 0); 296 297 return 0; 298 } 299 300 static inline void 301 accel_mlx5_crypto_task_complete(struct accel_mlx5_task *task) 302 { 303 struct accel_mlx5_dev *dev = task->qp->dev; 304 305 assert(task->num_ops); 306 spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops); 307 spdk_accel_task_complete(&task->base, 0); 308 } 309 310 static inline void 311 accel_mlx5_task_fail(struct accel_mlx5_task *task, int rc) 312 { 313 struct accel_mlx5_dev *dev = task->qp->dev; 314 315 assert(task->num_reqs == task->num_completed_reqs); 316 SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->base.op_code, rc); 317 318 if (task->num_ops) { 319 if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO) { 320 spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops); 321 } 322 if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRC32C) { 323 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops); 324 spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv); 325 } 326 } 327 spdk_accel_task_complete(&task->base, rc); 328 } 329 330 static int 331 accel_mlx5_translate_addr(void *addr, size_t size, struct spdk_memory_domain *domain, 332 void *domain_ctx, struct accel_mlx5_dev *dev, struct ibv_sge *sge) 333 { 334 struct spdk_rdma_utils_memory_translation map_translation; 335 struct spdk_memory_domain_translation_result domain_translation; 336 struct spdk_memory_domain_translation_ctx local_ctx; 337 int rc; 338 339 if (domain) { 340 domain_translation.size = sizeof(struct spdk_memory_domain_translation_result); 341 local_ctx.size = sizeof(local_ctx); 342 local_ctx.rdma.ibv_qp = dev->qp.verbs_qp; 343 rc = spdk_memory_domain_translate_data(domain, domain_ctx, dev->dev_ctx->domain, 344 &local_ctx, addr, size, &domain_translation); 345 if (spdk_unlikely(rc || domain_translation.iov_count != 1)) { 346 SPDK_ERRLOG("Memory domain translation failed, addr %p, length %zu, iovcnt %u\n", addr, size, 347 domain_translation.iov_count); 348 if (rc == 0) { 349 rc = -EINVAL; 350 } 351 352 return rc; 353 } 354 sge->lkey = domain_translation.rdma.lkey; 355 sge->addr = (uint64_t) domain_translation.iov.iov_base; 356 sge->length = domain_translation.iov.iov_len; 357 } else { 358 rc = spdk_rdma_utils_get_translation(dev->mmap, addr, size, 359 &map_translation); 360 if (spdk_unlikely(rc)) { 361 SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", addr, size); 362 return rc; 363 } 364 sge->lkey = spdk_rdma_utils_memory_translation_get_lkey(&map_translation); 365 sge->addr = (uint64_t)addr; 366 sge->length = size; 367 } 368 369 return 0; 370 } 371 372 static inline int 373 accel_mlx5_fill_block_sge(struct accel_mlx5_dev *dev, struct ibv_sge *sge, 374 struct accel_mlx5_iov_sgl *iovs, uint32_t len, uint32_t *_remaining, 375 struct spdk_memory_domain *domain, void *domain_ctx) 376 { 377 void *addr; 378 uint32_t remaining = len; 379 uint32_t size; 380 int i = 0; 381 int rc; 382 383 while (remaining && i < (int)ACCEL_MLX5_MAX_SGE) { 384 size = spdk_min(remaining, iovs->iov->iov_len - iovs->iov_offset); 385 addr = (void *)iovs->iov->iov_base + iovs->iov_offset; 386 rc = accel_mlx5_translate_addr(addr, size, domain, domain_ctx, dev, &sge[i]); 387 if (spdk_unlikely(rc)) { 388 return rc; 389 } 390 SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d]: lkey %u, len %u, addr %"PRIx64"\n", i, sge[i].lkey, 391 sge[i].length, sge[i].addr); 392 accel_mlx5_iov_sgl_advance(iovs, size); 393 i++; 394 assert(remaining >= size); 395 remaining -= size; 396 } 397 *_remaining = remaining; 398 399 return i; 400 } 401 402 static inline bool 403 accel_mlx5_compare_iovs(struct iovec *v1, struct iovec *v2, uint32_t iovcnt) 404 { 405 return memcmp(v1, v2, sizeof(*v1) * iovcnt) == 0; 406 } 407 408 static inline uint16_t 409 accel_mlx5_dev_get_available_slots(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp) 410 { 411 assert(qp->wrs_max >= qp->wrs_submitted); 412 assert(dev->wrs_in_cq_max >= dev->wrs_in_cq); 413 414 /* Each time we produce only 1 CQE, so we need 1 CQ slot */ 415 if (spdk_unlikely(dev->wrs_in_cq == dev->wrs_in_cq_max)) { 416 return 0; 417 } 418 419 return qp->wrs_max - qp->wrs_submitted; 420 } 421 422 static inline uint32_t 423 accel_mlx5_task_alloc_mkeys(struct accel_mlx5_task *task, struct spdk_mlx5_mkey_pool *pool) 424 { 425 uint32_t num_ops; 426 int rc; 427 428 assert(task->num_reqs > task->num_completed_reqs); 429 num_ops = task->num_reqs - task->num_completed_reqs; 430 num_ops = spdk_min(num_ops, ACCEL_MLX5_MAX_MKEYS_IN_TASK); 431 if (!num_ops) { 432 return 0; 433 } 434 rc = spdk_mlx5_mkey_pool_get_bulk(pool, task->mkeys, num_ops); 435 if (spdk_unlikely(rc)) { 436 return 0; 437 } 438 assert(num_ops <= UINT16_MAX); 439 task->num_ops = num_ops; 440 441 return num_ops; 442 } 443 444 static inline uint8_t 445 bs_to_bs_selector(uint32_t bs) 446 { 447 switch (bs) { 448 case 512: 449 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_512; 450 case 520: 451 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_520; 452 case 4096: 453 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4096; 454 case 4160: 455 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4160; 456 default: 457 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED; 458 } 459 } 460 461 static inline int 462 accel_mlx5_configure_crypto_umr(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge, 463 uint32_t mkey, uint32_t num_blocks, struct spdk_mlx5_crypto_dek_data *dek_data) 464 { 465 struct spdk_mlx5_umr_crypto_attr cattr; 466 struct spdk_mlx5_umr_attr umr_attr; 467 struct accel_mlx5_qp *qp = mlx5_task->qp; 468 struct accel_mlx5_dev *dev = qp->dev; 469 struct spdk_accel_task *task = &mlx5_task->base; 470 uint32_t length, remaining = 0, block_size = task->block_size; 471 int rc; 472 473 length = num_blocks * block_size; 474 SPDK_DEBUGLOG(accel_mlx5, "task %p, domain %p, len %u, blocks %u\n", task, task->src_domain, length, 475 num_blocks); 476 rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src, length, &remaining, 477 task->src_domain, task->src_domain_ctx); 478 if (spdk_unlikely(rc <= 0)) { 479 if (rc == 0) { 480 rc = -EINVAL; 481 } 482 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 483 return rc; 484 } 485 sge->src_sge_count = rc; 486 if (spdk_unlikely(remaining)) { 487 uint32_t new_len = length - remaining; 488 uint32_t aligned_len, updated_num_blocks; 489 490 SPDK_DEBUGLOG(accel_mlx5, "Incorrect src iovs, handled %u out of %u bytes\n", new_len, length); 491 if (new_len < block_size) { 492 /* We need to process at least 1 block. If buffer is too fragmented, we can't do 493 * anything */ 494 return -ERANGE; 495 } 496 497 /* Regular integer division, we need to round down to prev block size */ 498 updated_num_blocks = new_len / block_size; 499 assert(updated_num_blocks); 500 assert(updated_num_blocks < num_blocks); 501 aligned_len = updated_num_blocks * block_size; 502 503 if (aligned_len < new_len) { 504 uint32_t dt = new_len - aligned_len; 505 506 /* We can't process part of block, need to unwind src iov_sgl and sge to the 507 * prev block boundary */ 508 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt); 509 accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt); 510 sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt); 511 if (!sge->src_sge_count) { 512 return -ERANGE; 513 } 514 } 515 SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len); 516 length = aligned_len; 517 num_blocks = updated_num_blocks; 518 } 519 520 cattr.xts_iv = task->iv + mlx5_task->num_processed_blocks; 521 cattr.keytag = 0; 522 cattr.dek_obj_id = dek_data->dek_obj_id; 523 cattr.tweak_mode = dek_data->tweak_mode; 524 cattr.enc_order = mlx5_task->enc_order; 525 cattr.bs_selector = bs_to_bs_selector(mlx5_task->base.block_size); 526 if (spdk_unlikely(cattr.bs_selector == SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED)) { 527 SPDK_ERRLOG("unsupported block size %u\n", mlx5_task->base.block_size); 528 return -EINVAL; 529 } 530 umr_attr.mkey = mkey; 531 umr_attr.sge = sge->src_sge; 532 533 if (!mlx5_task->inplace) { 534 SPDK_DEBUGLOG(accel_mlx5, "task %p, dst sge, domain %p, len %u\n", task, task->dst_domain, length); 535 rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, length, &remaining, 536 task->dst_domain, task->dst_domain_ctx); 537 if (spdk_unlikely(rc <= 0)) { 538 if (rc == 0) { 539 rc = -EINVAL; 540 } 541 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 542 return rc; 543 } 544 sge->dst_sge_count = rc; 545 if (spdk_unlikely(remaining)) { 546 uint32_t new_len = length - remaining; 547 uint32_t aligned_len, updated_num_blocks, dt; 548 549 SPDK_DEBUGLOG(accel_mlx5, "Incorrect dst iovs, handled %u out of %u bytes\n", new_len, length); 550 if (new_len < block_size) { 551 /* We need to process at least 1 block. If buffer is too fragmented, we can't do 552 * anything */ 553 return -ERANGE; 554 } 555 556 /* Regular integer division, we need to round down to prev block size */ 557 updated_num_blocks = new_len / block_size; 558 assert(updated_num_blocks); 559 assert(updated_num_blocks < num_blocks); 560 aligned_len = updated_num_blocks * block_size; 561 562 if (aligned_len < new_len) { 563 dt = new_len - aligned_len; 564 assert(dt > 0 && dt < length); 565 /* We can't process part of block, need to unwind src and dst iov_sgl and sge to the 566 * prev block boundary */ 567 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind dst sge for %u bytes\n", task, dt); 568 accel_mlx5_iov_sgl_unwind(&mlx5_task->dst, task->d.iovcnt, dt); 569 sge->dst_sge_count = accel_mlx5_sge_unwind(sge->dst_sge, sge->dst_sge_count, dt); 570 assert(sge->dst_sge_count > 0 && sge->dst_sge_count <= ACCEL_MLX5_MAX_SGE); 571 if (!sge->dst_sge_count) { 572 return -ERANGE; 573 } 574 } 575 assert(length > aligned_len); 576 dt = length - aligned_len; 577 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt); 578 /* The same for src iov_sgl and sge. In worst case we can unwind SRC 2 times */ 579 accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt); 580 sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt); 581 assert(sge->src_sge_count > 0 && sge->src_sge_count <= ACCEL_MLX5_MAX_SGE); 582 if (!sge->src_sge_count) { 583 return -ERANGE; 584 } 585 SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len); 586 length = aligned_len; 587 num_blocks = updated_num_blocks; 588 } 589 } 590 591 SPDK_DEBUGLOG(accel_mlx5, 592 "task %p: bs %u, iv %"PRIu64", enc_on_tx %d, tweak_mode %d, len %u, mkey %x, blocks %u\n", 593 mlx5_task, task->block_size, cattr.xts_iv, mlx5_task->enc_order, cattr.tweak_mode, length, mkey, 594 num_blocks); 595 596 umr_attr.sge_count = sge->src_sge_count; 597 umr_attr.umr_len = length; 598 assert((uint32_t)mlx5_task->num_processed_blocks + num_blocks <= UINT16_MAX); 599 mlx5_task->num_processed_blocks += num_blocks; 600 601 rc = spdk_mlx5_umr_configure_crypto(qp->qp, &umr_attr, &cattr, 0, 0); 602 603 return rc; 604 } 605 606 static inline int 607 accel_mlx5_crypto_task_process(struct accel_mlx5_task *mlx5_task) 608 { 609 struct accel_mlx5_sge sges[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 610 struct spdk_mlx5_crypto_dek_data dek_data; 611 struct accel_mlx5_qp *qp = mlx5_task->qp; 612 struct accel_mlx5_dev *dev = qp->dev; 613 /* First RDMA after UMR must have a SMALL_FENCE */ 614 uint32_t first_rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE; 615 uint16_t num_blocks; 616 uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs, 617 mlx5_task->num_ops); 618 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 619 uint16_t i; 620 int rc; 621 622 assert(qp_slot > 1); 623 num_ops = spdk_min(num_ops, qp_slot >> 1); 624 if (spdk_unlikely(!num_ops)) { 625 return -EINVAL; 626 } 627 628 rc = spdk_mlx5_crypto_get_dek_data(mlx5_task->base.crypto_key->priv, dev->dev_ctx->pd, &dek_data); 629 if (spdk_unlikely(rc)) { 630 return rc; 631 } 632 633 mlx5_task->num_wrs = 0; 634 SPDK_DEBUGLOG(accel_mlx5, "begin, task, %p, reqs: total %u, submitted %u, completed %u\n", 635 mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 636 for (i = 0; i < num_ops; i++) { 637 if (mlx5_task->num_submitted_reqs + i + 1 == mlx5_task->num_reqs) { 638 /* Last request may consume less than calculated if crypto_multi_block is true */ 639 assert(mlx5_task->num_blocks > mlx5_task->num_submitted_reqs); 640 num_blocks = mlx5_task->num_blocks - mlx5_task->num_processed_blocks; 641 } else { 642 num_blocks = mlx5_task->blocks_per_req; 643 } 644 645 rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sges[i], mlx5_task->mkeys[i]->mkey, num_blocks, 646 &dek_data); 647 if (spdk_unlikely(rc)) { 648 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 649 return rc; 650 } 651 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 652 dev->stats.crypto_umrs++; 653 } 654 655 /* Loop `num_ops - 1` for easy flags handling */ 656 for (i = 0; i < num_ops - 1; i++) { 657 /* UMR is used as a destination for RDMA_READ - from UMR to sge */ 658 if (mlx5_task->inplace) { 659 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0, 660 mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence); 661 } else { 662 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0, 663 mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence); 664 } 665 if (spdk_unlikely(rc)) { 666 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 667 return rc; 668 } 669 670 first_rdma_fence = 0; 671 assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs); 672 assert(mlx5_task->num_submitted_reqs < UINT16_MAX); 673 mlx5_task->num_submitted_reqs++; 674 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 675 dev->stats.rdma_reads++; 676 } 677 678 if (mlx5_task->inplace) { 679 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0, 680 mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 681 } else { 682 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0, 683 mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 684 } 685 if (spdk_unlikely(rc)) { 686 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 687 return rc; 688 } 689 690 assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs); 691 assert(mlx5_task->num_submitted_reqs < UINT16_MAX); 692 mlx5_task->num_submitted_reqs++; 693 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 694 dev->stats.rdma_reads++; 695 STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link); 696 697 if (spdk_unlikely(mlx5_task->num_submitted_reqs == mlx5_task->num_reqs && 698 mlx5_task->num_blocks > mlx5_task->num_processed_blocks)) { 699 /* We hit "out of sge 700 * entries" case with highly fragmented payload. In that case 701 * accel_mlx5_configure_crypto_umr function handled fewer data blocks than expected 702 * That means we need at least 1 more request to complete this task, this request will be 703 * executed once all submitted ones are completed */ 704 SPDK_DEBUGLOG(accel_mlx5, "task %p, processed %u/%u blocks, add extra req\n", mlx5_task, 705 mlx5_task->num_processed_blocks, mlx5_task->num_blocks); 706 mlx5_task->num_reqs++; 707 } 708 709 SPDK_DEBUGLOG(accel_mlx5, "end, task, %p, reqs: total %u, submitted %u, completed %u\n", mlx5_task, 710 mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 711 712 return 0; 713 } 714 715 static inline int 716 accel_mlx5_crypto_task_continue(struct accel_mlx5_task *task) 717 { 718 struct accel_mlx5_qp *qp = task->qp; 719 struct accel_mlx5_dev *dev = qp->dev; 720 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 721 722 assert(task->num_reqs > task->num_completed_reqs); 723 if (task->num_ops == 0) { 724 /* No mkeys allocated, try to allocate now */ 725 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task, dev->crypto_mkeys))) { 726 /* Pool is empty, queue this task */ 727 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 728 dev->stats.nomem_mkey++; 729 return -ENOMEM; 730 } 731 } 732 /* We need to post at least 1 UMR and 1 RDMA operation */ 733 if (spdk_unlikely(qp_slot < 2)) { 734 /* QP is full, queue this task */ 735 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 736 task->qp->dev->stats.nomem_qdepth++; 737 return -ENOMEM; 738 } 739 740 return accel_mlx5_crypto_task_process(task); 741 } 742 743 static inline int 744 accel_mlx5_crypto_task_init(struct accel_mlx5_task *mlx5_task) 745 { 746 struct spdk_accel_task *task = &mlx5_task->base; 747 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 748 uint64_t src_nbytes = task->nbytes; 749 #ifdef DEBUG 750 uint64_t dst_nbytes; 751 uint32_t i; 752 #endif 753 bool crypto_key_ok; 754 755 crypto_key_ok = (task->crypto_key && task->crypto_key->module_if == &g_accel_mlx5.module && 756 task->crypto_key->priv); 757 if (spdk_unlikely((task->nbytes % mlx5_task->base.block_size != 0) || !crypto_key_ok)) { 758 if (crypto_key_ok) { 759 SPDK_ERRLOG("src length %"PRIu64" is not a multiple of the block size %u\n", task->nbytes, 760 mlx5_task->base.block_size); 761 } else { 762 SPDK_ERRLOG("Wrong crypto key provided\n"); 763 } 764 return -EINVAL; 765 } 766 767 assert(src_nbytes / mlx5_task->base.block_size <= UINT16_MAX); 768 mlx5_task->num_blocks = src_nbytes / mlx5_task->base.block_size; 769 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 770 if (task->d.iovcnt == 0 || (task->d.iovcnt == task->s.iovcnt && 771 accel_mlx5_compare_iovs(task->d.iovs, task->s.iovs, task->s.iovcnt))) { 772 mlx5_task->inplace = 1; 773 } else { 774 #ifdef DEBUG 775 dst_nbytes = 0; 776 for (i = 0; i < task->d.iovcnt; i++) { 777 dst_nbytes += task->d.iovs[i].iov_len; 778 } 779 780 if (spdk_unlikely(src_nbytes != dst_nbytes)) { 781 return -EINVAL; 782 } 783 #endif 784 mlx5_task->inplace = 0; 785 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 786 } 787 788 if (dev->crypto_multi_block) { 789 if (dev->crypto_split_blocks) { 790 assert(SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks) <= UINT16_MAX); 791 mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks); 792 /* Last req may consume less blocks */ 793 mlx5_task->blocks_per_req = spdk_min(mlx5_task->num_blocks, dev->crypto_split_blocks); 794 } else { 795 if (task->s.iovcnt > ACCEL_MLX5_MAX_SGE || task->d.iovcnt > ACCEL_MLX5_MAX_SGE) { 796 uint32_t max_sge_count = spdk_max(task->s.iovcnt, task->d.iovcnt); 797 798 assert(SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE) <= UINT16_MAX); 799 mlx5_task->num_reqs = SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE); 800 mlx5_task->blocks_per_req = SPDK_CEIL_DIV(mlx5_task->num_blocks, mlx5_task->num_reqs); 801 } else { 802 mlx5_task->num_reqs = 1; 803 mlx5_task->blocks_per_req = mlx5_task->num_blocks; 804 } 805 } 806 } else { 807 mlx5_task->num_reqs = mlx5_task->num_blocks; 808 mlx5_task->blocks_per_req = 1; 809 } 810 811 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(mlx5_task, dev->crypto_mkeys))) { 812 /* Pool is empty, queue this task */ 813 SPDK_DEBUGLOG(accel_mlx5, "no reqs in pool, dev %s\n", dev->dev_ctx->context->device->name); 814 dev->stats.nomem_mkey++; 815 return -ENOMEM; 816 } 817 if (spdk_unlikely(accel_mlx5_dev_get_available_slots(dev, &dev->qp) < 2)) { 818 /* Queue is full, queue this task */ 819 SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", dev->dev_ctx->context->device->name, 820 mlx5_task->qp); 821 dev->stats.nomem_qdepth++; 822 return -ENOMEM; 823 } 824 825 SPDK_DEBUGLOG(accel_mlx5, "task %p, src_iovs %u, dst_iovs %u, num_reqs %u, " 826 "blocks/req %u, blocks %u, inplace %d\n", task, task->s.iovcnt, task->d.iovcnt, 827 mlx5_task->num_reqs, mlx5_task->blocks_per_req, mlx5_task->num_blocks, mlx5_task->inplace); 828 829 return 0; 830 } 831 832 static inline void 833 accel_mlx5_copy_task_complete(struct accel_mlx5_task *mlx5_task) 834 { 835 spdk_accel_task_complete(&mlx5_task->base, 0); 836 } 837 838 static inline int 839 accel_mlx5_copy_task_process_one(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_qp *qp, 840 uint64_t wrid, uint32_t fence) 841 { 842 struct spdk_accel_task *task = &mlx5_task->base; 843 struct accel_mlx5_sge sge; 844 uint32_t remaining = 0; 845 uint32_t dst_len; 846 int rc; 847 848 /* Limit one RDMA_WRITE by length of dst buffer. Not all src buffers may fit into one dst buffer due to 849 * limitation on ACCEL_MLX5_MAX_SGE. If this is the case then remaining is not zero */ 850 assert(mlx5_task->dst.iov->iov_len > mlx5_task->dst.iov_offset); 851 dst_len = mlx5_task->dst.iov->iov_len - mlx5_task->dst.iov_offset; 852 rc = accel_mlx5_fill_block_sge(qp->dev, sge.src_sge, &mlx5_task->src, dst_len, &remaining, 853 task->src_domain, task->src_domain_ctx); 854 if (spdk_unlikely(rc <= 0)) { 855 if (rc == 0) { 856 rc = -EINVAL; 857 } 858 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 859 return rc; 860 } 861 sge.src_sge_count = rc; 862 assert(dst_len > remaining); 863 dst_len -= remaining; 864 865 rc = accel_mlx5_fill_block_sge(qp->dev, sge.dst_sge, &mlx5_task->dst, dst_len, &remaining, 866 task->dst_domain, task->dst_domain_ctx); 867 if (spdk_unlikely(rc != 1)) { 868 /* We use single dst entry, any result other than 1 is an error */ 869 if (rc == 0) { 870 rc = -EINVAL; 871 } 872 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 873 return rc; 874 } 875 if (spdk_unlikely(remaining)) { 876 SPDK_ERRLOG("Incorrect dst length, remaining %u\n", remaining); 877 assert(0); 878 return -EINVAL; 879 } 880 881 rc = spdk_mlx5_qp_rdma_write(mlx5_task->qp->qp, sge.src_sge, sge.src_sge_count, 882 sge.dst_sge[0].addr, sge.dst_sge[0].lkey, wrid, fence); 883 if (spdk_unlikely(rc)) { 884 SPDK_ERRLOG("new RDMA WRITE failed with %d\n", rc); 885 return rc; 886 } 887 qp->dev->stats.rdma_writes++; 888 889 return 0; 890 } 891 892 static inline int 893 accel_mlx5_copy_task_process(struct accel_mlx5_task *mlx5_task) 894 { 895 896 struct accel_mlx5_qp *qp = mlx5_task->qp; 897 struct accel_mlx5_dev *dev = qp->dev; 898 uint16_t i; 899 int rc; 900 901 mlx5_task->num_wrs = 0; 902 assert(mlx5_task->num_reqs > 0); 903 assert(mlx5_task->num_ops > 0); 904 905 /* Handle n-1 reqs in order to simplify wrid and fence handling */ 906 for (i = 0; i < mlx5_task->num_ops - 1; i++) { 907 rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, 0, 0); 908 if (spdk_unlikely(rc)) { 909 return rc; 910 } 911 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 912 mlx5_task->num_submitted_reqs++; 913 } 914 915 rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, (uint64_t)mlx5_task, 916 SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 917 if (spdk_unlikely(rc)) { 918 return rc; 919 } 920 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 921 mlx5_task->num_submitted_reqs++; 922 STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link); 923 924 SPDK_DEBUGLOG(accel_mlx5, "end, copy task, %p\n", mlx5_task); 925 926 return 0; 927 } 928 929 static inline int 930 accel_mlx5_copy_task_continue(struct accel_mlx5_task *task) 931 { 932 struct accel_mlx5_qp *qp = task->qp; 933 struct accel_mlx5_dev *dev = qp->dev; 934 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 935 936 task->num_ops = spdk_min(qp_slot, task->num_reqs - task->num_completed_reqs); 937 if (spdk_unlikely(task->num_ops == 0)) { 938 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 939 dev->stats.nomem_qdepth++; 940 return -ENOMEM; 941 } 942 return accel_mlx5_copy_task_process(task); 943 } 944 945 static inline uint32_t 946 accel_mlx5_get_copy_task_count(struct iovec *src_iov, uint32_t src_iovcnt, 947 struct iovec *dst_iov, uint32_t dst_iovcnt) 948 { 949 uint32_t src = 0; 950 uint32_t dst = 0; 951 uint64_t src_offset = 0; 952 uint64_t dst_offset = 0; 953 uint32_t num_ops = 0; 954 uint32_t src_sge_count = 0; 955 956 while (src < src_iovcnt && dst < dst_iovcnt) { 957 uint64_t src_len = src_iov[src].iov_len - src_offset; 958 uint64_t dst_len = dst_iov[dst].iov_len - dst_offset; 959 960 if (dst_len < src_len) { 961 dst_offset = 0; 962 src_offset += dst_len; 963 dst++; 964 num_ops++; 965 src_sge_count = 0; 966 } else if (src_len < dst_len) { 967 dst_offset += src_len; 968 src_offset = 0; 969 src++; 970 if (++src_sge_count >= ACCEL_MLX5_MAX_SGE) { 971 num_ops++; 972 src_sge_count = 0; 973 } 974 } else { 975 dst_offset = 0; 976 src_offset = 0; 977 dst++; 978 src++; 979 num_ops++; 980 src_sge_count = 0; 981 } 982 } 983 984 assert(src == src_iovcnt); 985 assert(dst == dst_iovcnt); 986 assert(src_offset == 0); 987 assert(dst_offset == 0); 988 return num_ops; 989 } 990 991 static inline int 992 accel_mlx5_copy_task_init(struct accel_mlx5_task *mlx5_task) 993 { 994 struct spdk_accel_task *task = &mlx5_task->base; 995 struct accel_mlx5_qp *qp = mlx5_task->qp; 996 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp); 997 998 if (spdk_likely(task->s.iovcnt <= ACCEL_MLX5_MAX_SGE)) { 999 mlx5_task->num_reqs = task->d.iovcnt; 1000 } else if (task->d.iovcnt == 1) { 1001 mlx5_task->num_reqs = SPDK_CEIL_DIV(task->s.iovcnt, ACCEL_MLX5_MAX_SGE); 1002 } else { 1003 mlx5_task->num_reqs = accel_mlx5_get_copy_task_count(task->s.iovs, task->s.iovcnt, 1004 task->d.iovs, task->d.iovcnt); 1005 } 1006 mlx5_task->inplace = 0; 1007 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 1008 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 1009 mlx5_task->num_ops = spdk_min(qp_slot, mlx5_task->num_reqs); 1010 if (spdk_unlikely(!mlx5_task->num_ops)) { 1011 qp->dev->stats.nomem_qdepth++; 1012 return -ENOMEM; 1013 } 1014 SPDK_DEBUGLOG(accel_mlx5, "copy task num_reqs %u, num_ops %u\n", mlx5_task->num_reqs, 1015 mlx5_task->num_ops); 1016 1017 return 0; 1018 } 1019 1020 static inline uint32_t 1021 accel_mlx5_advance_iovec(struct iovec *iov, uint32_t iovcnt, size_t *iov_offset, size_t *len) 1022 { 1023 uint32_t i; 1024 size_t iov_len; 1025 1026 for (i = 0; *len != 0 && i < iovcnt; i++) { 1027 iov_len = iov[i].iov_len - *iov_offset; 1028 1029 if (iov_len < *len) { 1030 *iov_offset = 0; 1031 *len -= iov_len; 1032 continue; 1033 } 1034 if (iov_len == *len) { 1035 *iov_offset = 0; 1036 i++; 1037 } else { /* iov_len > *len */ 1038 *iov_offset += *len; 1039 } 1040 *len = 0; 1041 break; 1042 } 1043 1044 return i; 1045 } 1046 1047 static inline void 1048 accel_mlx5_crc_task_complete(struct accel_mlx5_task *mlx5_task) 1049 { 1050 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 1051 1052 *mlx5_task->base.crc_dst = mlx5_task->psv->crc ^ UINT32_MAX; 1053 /* Normal task completion without allocated mkeys is not possible */ 1054 assert(mlx5_task->num_ops); 1055 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, mlx5_task->mkeys, mlx5_task->num_ops); 1056 spdk_mempool_put(dev->dev_ctx->psv_pool, mlx5_task->psv); 1057 spdk_accel_task_complete(&mlx5_task->base, 0); 1058 } 1059 1060 static inline int 1061 accel_mlx5_crc_task_configure_umr(struct accel_mlx5_task *mlx5_task, struct ibv_sge *sge, 1062 uint32_t sge_count, struct spdk_mlx5_mkey_pool_obj *mkey, 1063 enum spdk_mlx5_umr_sig_domain sig_domain, uint32_t umr_len, 1064 bool sig_init, bool sig_check_gen) 1065 { 1066 struct spdk_mlx5_umr_sig_attr sattr = { 1067 .seed = mlx5_task->base.seed ^ UINT32_MAX, 1068 .psv_index = mlx5_task->psv->psv_index, 1069 .domain = sig_domain, 1070 .sigerr_count = mkey->sig.sigerr_count, 1071 .raw_data_size = umr_len, 1072 .init = sig_init, 1073 .check_gen = sig_check_gen, 1074 }; 1075 struct spdk_mlx5_umr_attr umr_attr = { 1076 .mkey = mkey->mkey, 1077 .umr_len = umr_len, 1078 .sge_count = sge_count, 1079 .sge = sge, 1080 }; 1081 1082 return spdk_mlx5_umr_configure_sig(mlx5_task->qp->qp, &umr_attr, &sattr, 0, 0); 1083 } 1084 1085 static inline int 1086 accel_mlx5_crc_task_fill_sge(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge) 1087 { 1088 struct spdk_accel_task *task = &mlx5_task->base; 1089 struct accel_mlx5_qp *qp = mlx5_task->qp; 1090 struct accel_mlx5_dev *dev = qp->dev; 1091 uint32_t remaining; 1092 int rc; 1093 1094 rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src, task->nbytes, &remaining, 1095 task->src_domain, task->src_domain_ctx); 1096 if (spdk_unlikely(rc <= 0)) { 1097 if (rc == 0) { 1098 rc = -EINVAL; 1099 } 1100 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 1101 return rc; 1102 } 1103 assert(remaining == 0); 1104 sge->src_sge_count = rc; 1105 1106 if (!mlx5_task->inplace) { 1107 rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, task->nbytes, &remaining, 1108 task->dst_domain, task->dst_domain_ctx); 1109 if (spdk_unlikely(rc <= 0)) { 1110 if (rc == 0) { 1111 rc = -EINVAL; 1112 } 1113 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 1114 return rc; 1115 } 1116 assert(remaining == 0); 1117 sge->dst_sge_count = rc; 1118 } 1119 1120 return 0; 1121 } 1122 1123 static inline int 1124 accel_mlx5_crc_task_process_one_req(struct accel_mlx5_task *mlx5_task) 1125 { 1126 struct accel_mlx5_sge sges; 1127 struct accel_mlx5_qp *qp = mlx5_task->qp; 1128 struct accel_mlx5_dev *dev = qp->dev; 1129 uint32_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs, 1130 mlx5_task->num_ops); 1131 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1132 uint32_t rdma_fence = SPDK_MLX5_WQE_CTRL_STRONG_ORDERING; 1133 struct ibv_sge *sge; 1134 int rc; 1135 uint16_t sge_count; 1136 1137 num_ops = spdk_min(num_ops, qp_slot >> 1); 1138 if (spdk_unlikely(!num_ops)) { 1139 return -EINVAL; 1140 } 1141 1142 mlx5_task->num_wrs = 0; 1143 /* At this moment we have as many requests as can be submitted to a qp */ 1144 rc = accel_mlx5_crc_task_fill_sge(mlx5_task, &sges); 1145 if (spdk_unlikely(rc)) { 1146 return rc; 1147 } 1148 rc = accel_mlx5_crc_task_configure_umr(mlx5_task, sges.src_sge, sges.src_sge_count, 1149 mlx5_task->mkeys[0], SPDK_MLX5_UMR_SIG_DOMAIN_WIRE, mlx5_task->base.nbytes, true, true); 1150 if (spdk_unlikely(rc)) { 1151 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 1152 return rc; 1153 } 1154 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1155 dev->stats.sig_umrs++; 1156 1157 if (mlx5_task->inplace) { 1158 sge = sges.src_sge; 1159 sge_count = sges.src_sge_count; 1160 } else { 1161 sge = sges.dst_sge; 1162 sge_count = sges.dst_sge_count; 1163 } 1164 1165 /* 1166 * Add the crc destination to the end of sges. A free entry must be available for CRC 1167 * because the task init function reserved it. 1168 */ 1169 assert(sge_count < ACCEL_MLX5_MAX_SGE); 1170 sge[sge_count].lkey = mlx5_task->psv->crc_lkey; 1171 sge[sge_count].addr = (uintptr_t)&mlx5_task->psv->crc; 1172 sge[sge_count++].length = sizeof(uint32_t); 1173 1174 if (spdk_unlikely(mlx5_task->psv->bits.error)) { 1175 rc = spdk_mlx5_qp_set_psv(qp->qp, mlx5_task->psv->psv_index, *mlx5_task->base.crc_dst, 0, 0); 1176 if (spdk_unlikely(rc)) { 1177 SPDK_ERRLOG("SET_PSV failed with %d\n", rc); 1178 return rc; 1179 } 1180 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1181 } 1182 1183 rc = spdk_mlx5_qp_rdma_read(qp->qp, sge, sge_count, 0, mlx5_task->mkeys[0]->mkey, 1184 (uint64_t)mlx5_task, rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 1185 if (spdk_unlikely(rc)) { 1186 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 1187 return rc; 1188 } 1189 mlx5_task->num_submitted_reqs++; 1190 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 1191 dev->stats.rdma_reads++; 1192 1193 return 0; 1194 } 1195 1196 static inline int 1197 accel_mlx5_crc_task_fill_umr_sge(struct accel_mlx5_qp *qp, struct ibv_sge *sge, 1198 struct accel_mlx5_iov_sgl *umr_iovs, struct spdk_memory_domain *domain, 1199 void *domain_ctx, struct accel_mlx5_iov_sgl *rdma_iovs, size_t *len) 1200 { 1201 int umr_idx = 0; 1202 int rdma_idx = 0; 1203 int umr_iovcnt = spdk_min(umr_iovs->iovcnt, (int)ACCEL_MLX5_MAX_SGE); 1204 int rdma_iovcnt = spdk_min(rdma_iovs->iovcnt, (int)ACCEL_MLX5_MAX_SGE); 1205 size_t umr_iov_offset; 1206 size_t rdma_iov_offset; 1207 size_t umr_len = 0; 1208 void *sge_addr; 1209 size_t sge_len; 1210 size_t umr_sge_len; 1211 size_t rdma_sge_len; 1212 int rc; 1213 1214 umr_iov_offset = umr_iovs->iov_offset; 1215 rdma_iov_offset = rdma_iovs->iov_offset; 1216 1217 while (umr_idx < umr_iovcnt && rdma_idx < rdma_iovcnt) { 1218 umr_sge_len = umr_iovs->iov[umr_idx].iov_len - umr_iov_offset; 1219 rdma_sge_len = rdma_iovs->iov[rdma_idx].iov_len - rdma_iov_offset; 1220 sge_addr = umr_iovs->iov[umr_idx].iov_base + umr_iov_offset; 1221 1222 if (umr_sge_len == rdma_sge_len) { 1223 rdma_idx++; 1224 umr_iov_offset = 0; 1225 rdma_iov_offset = 0; 1226 sge_len = umr_sge_len; 1227 } else if (umr_sge_len < rdma_sge_len) { 1228 umr_iov_offset = 0; 1229 rdma_iov_offset += umr_sge_len; 1230 sge_len = umr_sge_len; 1231 } else { 1232 size_t remaining; 1233 1234 remaining = umr_sge_len - rdma_sge_len; 1235 while (remaining) { 1236 rdma_idx++; 1237 if (rdma_idx == (int)ACCEL_MLX5_MAX_SGE) { 1238 break; 1239 } 1240 rdma_sge_len = rdma_iovs->iov[rdma_idx].iov_len; 1241 if (remaining == rdma_sge_len) { 1242 rdma_idx++; 1243 rdma_iov_offset = 0; 1244 umr_iov_offset = 0; 1245 remaining = 0; 1246 break; 1247 } 1248 if (remaining < rdma_sge_len) { 1249 rdma_iov_offset = remaining; 1250 umr_iov_offset = 0; 1251 remaining = 0; 1252 break; 1253 } 1254 remaining -= rdma_sge_len; 1255 } 1256 sge_len = umr_sge_len - remaining; 1257 } 1258 rc = accel_mlx5_translate_addr(sge_addr, sge_len, domain, domain_ctx, qp->dev, &sge[umr_idx]); 1259 if (spdk_unlikely(rc)) { 1260 return -EINVAL; 1261 } 1262 SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d] lkey %u, addr %p, len %u\n", umr_idx, sge[umr_idx].lkey, 1263 (void *)sge[umr_idx].addr, sge[umr_idx].length); 1264 umr_len += sge_len; 1265 umr_idx++; 1266 } 1267 accel_mlx5_iov_sgl_advance(umr_iovs, umr_len); 1268 accel_mlx5_iov_sgl_advance(rdma_iovs, umr_len); 1269 *len = umr_len; 1270 1271 return umr_idx; 1272 } 1273 1274 static inline int 1275 accel_mlx5_crc_task_process_multi_req(struct accel_mlx5_task *mlx5_task) 1276 { 1277 size_t umr_len[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 1278 struct ibv_sge sges[ACCEL_MLX5_MAX_SGE]; 1279 struct spdk_accel_task *task = &mlx5_task->base; 1280 struct accel_mlx5_qp *qp = mlx5_task->qp; 1281 struct accel_mlx5_dev *dev = qp->dev; 1282 struct accel_mlx5_iov_sgl umr_sgl; 1283 struct accel_mlx5_iov_sgl *umr_sgl_ptr; 1284 struct accel_mlx5_iov_sgl rdma_sgl; 1285 uint64_t umr_offset; 1286 uint32_t rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE; 1287 int sge_count; 1288 uint32_t remaining; 1289 int rc; 1290 uint16_t i; 1291 uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs, 1292 mlx5_task->num_ops); 1293 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1294 bool sig_init, sig_check_gen = false; 1295 1296 num_ops = spdk_min(num_ops, qp_slot >> 1); 1297 if (spdk_unlikely(!num_ops)) { 1298 return -EINVAL; 1299 } 1300 /* Init signature on the first UMR */ 1301 sig_init = !mlx5_task->num_submitted_reqs; 1302 1303 /* 1304 * accel_mlx5_crc_task_fill_umr_sge() and accel_mlx5_fill_block_sge() advance an IOV during iteration 1305 * on it. We must copy accel_mlx5_iov_sgl to iterate twice or more on the same IOV. 1306 * 1307 * In the in-place case, we iterate on the source IOV three times. That's why we need two copies of 1308 * the source accel_mlx5_iov_sgl. 1309 * 1310 * In the out-of-place case, we iterate on the source IOV once and on the destination IOV two times. 1311 * So, we need one copy of the destination accel_mlx5_iov_sgl. 1312 */ 1313 if (mlx5_task->inplace) { 1314 accel_mlx5_iov_sgl_init(&umr_sgl, mlx5_task->src.iov, mlx5_task->src.iovcnt); 1315 umr_sgl_ptr = &umr_sgl; 1316 accel_mlx5_iov_sgl_init(&rdma_sgl, mlx5_task->src.iov, mlx5_task->src.iovcnt); 1317 } else { 1318 umr_sgl_ptr = &mlx5_task->src; 1319 accel_mlx5_iov_sgl_init(&rdma_sgl, mlx5_task->dst.iov, mlx5_task->dst.iovcnt); 1320 } 1321 mlx5_task->num_wrs = 0; 1322 for (i = 0; i < num_ops; i++) { 1323 /* 1324 * The last request may have only CRC. Skip UMR in this case because the MKey from 1325 * the previous request is used. 1326 */ 1327 if (umr_sgl_ptr->iovcnt == 0) { 1328 assert((mlx5_task->num_completed_reqs + i + 1) == mlx5_task->num_reqs); 1329 break; 1330 } 1331 sge_count = accel_mlx5_crc_task_fill_umr_sge(qp, sges, umr_sgl_ptr, task->src_domain, 1332 task->src_domain_ctx, &rdma_sgl, &umr_len[i]); 1333 if (spdk_unlikely(sge_count <= 0)) { 1334 rc = (sge_count == 0) ? -EINVAL : sge_count; 1335 SPDK_ERRLOG("failed set UMR sge, rc %d\n", rc); 1336 return rc; 1337 } 1338 if (umr_sgl_ptr->iovcnt == 0) { 1339 /* 1340 * We post RDMA without UMR if the last request has only CRC. We use an MKey from 1341 * the last UMR in this case. Since the last request can be postponed to the next 1342 * call of this function, we must save the MKey to the task structure. 1343 */ 1344 mlx5_task->last_umr_len = umr_len[i]; 1345 mlx5_task->last_mkey_idx = i; 1346 sig_check_gen = true; 1347 } 1348 rc = accel_mlx5_crc_task_configure_umr(mlx5_task, sges, sge_count, mlx5_task->mkeys[i], 1349 SPDK_MLX5_UMR_SIG_DOMAIN_WIRE, umr_len[i], sig_init, 1350 sig_check_gen); 1351 if (spdk_unlikely(rc)) { 1352 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 1353 return rc; 1354 } 1355 sig_init = false; 1356 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1357 dev->stats.sig_umrs++; 1358 } 1359 1360 if (spdk_unlikely(mlx5_task->psv->bits.error)) { 1361 rc = spdk_mlx5_qp_set_psv(qp->qp, mlx5_task->psv->psv_index, *mlx5_task->base.crc_dst, 0, 0); 1362 if (spdk_unlikely(rc)) { 1363 SPDK_ERRLOG("SET_PSV failed with %d\n", rc); 1364 return rc; 1365 } 1366 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1367 } 1368 1369 for (i = 0; i < num_ops - 1; i++) { 1370 if (mlx5_task->inplace) { 1371 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->src, umr_len[i], &remaining, 1372 task->src_domain, task->src_domain_ctx); 1373 } else { 1374 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->dst, umr_len[i], &remaining, 1375 task->dst_domain, task->dst_domain_ctx); 1376 } 1377 if (spdk_unlikely(sge_count <= 0)) { 1378 rc = (sge_count == 0) ? -EINVAL : sge_count; 1379 SPDK_ERRLOG("failed set RDMA sge, rc %d\n", rc); 1380 return rc; 1381 } 1382 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges, sge_count, 0, mlx5_task->mkeys[i]->mkey, 1383 0, rdma_fence); 1384 if (spdk_unlikely(rc)) { 1385 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 1386 return rc; 1387 } 1388 mlx5_task->num_submitted_reqs++; 1389 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1390 dev->stats.rdma_reads++; 1391 rdma_fence = SPDK_MLX5_WQE_CTRL_STRONG_ORDERING; 1392 } 1393 if ((mlx5_task->inplace && mlx5_task->src.iovcnt == 0) || (!mlx5_task->inplace && 1394 mlx5_task->dst.iovcnt == 0)) { 1395 /* 1396 * The last RDMA does not have any data, only CRC. It also does not have a paired Mkey. 1397 * The CRC is handled in the previous MKey in this case. 1398 */ 1399 sge_count = 0; 1400 umr_offset = mlx5_task->last_umr_len; 1401 } else { 1402 umr_offset = 0; 1403 mlx5_task->last_mkey_idx = i; 1404 if (mlx5_task->inplace) { 1405 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->src, umr_len[i], &remaining, 1406 task->src_domain, task->src_domain_ctx); 1407 } else { 1408 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->dst, umr_len[i], &remaining, 1409 task->dst_domain, task->dst_domain_ctx); 1410 } 1411 if (spdk_unlikely(sge_count <= 0)) { 1412 rc = (sge_count == 0) ? -EINVAL : sge_count; 1413 SPDK_ERRLOG("failed set RDMA sge, rc %d\n", rc); 1414 return rc; 1415 } 1416 assert(remaining == 0); 1417 } 1418 if ((mlx5_task->num_completed_reqs + i + 1) == mlx5_task->num_reqs) { 1419 /* Ensure that there is a free sge for the CRC destination. */ 1420 assert(sge_count < (int)ACCEL_MLX5_MAX_SGE); 1421 /* Add the crc destination to the end of sges. */ 1422 sges[sge_count].lkey = mlx5_task->psv->crc_lkey; 1423 sges[sge_count].addr = (uintptr_t)&mlx5_task->psv->crc; 1424 sges[sge_count++].length = sizeof(uint32_t); 1425 } 1426 rdma_fence |= SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE; 1427 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges, sge_count, umr_offset, 1428 mlx5_task->mkeys[mlx5_task->last_mkey_idx]->mkey, 1429 (uint64_t)mlx5_task, rdma_fence); 1430 if (spdk_unlikely(rc)) { 1431 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 1432 return rc; 1433 } 1434 mlx5_task->num_submitted_reqs++; 1435 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 1436 dev->stats.rdma_reads++; 1437 1438 return 0; 1439 } 1440 1441 static inline int 1442 accel_mlx5_crc_task_process(struct accel_mlx5_task *mlx5_task) 1443 { 1444 int rc; 1445 1446 assert(mlx5_task->mlx5_opcode == ACCEL_MLX5_OPC_CRC32C); 1447 1448 SPDK_DEBUGLOG(accel_mlx5, "begin, crc task, %p, reqs: total %u, submitted %u, completed %u\n", 1449 mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 1450 1451 if (mlx5_task->num_reqs == 1) { 1452 rc = accel_mlx5_crc_task_process_one_req(mlx5_task); 1453 } else { 1454 rc = accel_mlx5_crc_task_process_multi_req(mlx5_task); 1455 } 1456 1457 if (rc == 0) { 1458 STAILQ_INSERT_TAIL(&mlx5_task->qp->in_hw, mlx5_task, link); 1459 SPDK_DEBUGLOG(accel_mlx5, "end, crc task, %p, reqs: total %u, submitted %u, completed %u\n", 1460 mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, 1461 mlx5_task->num_completed_reqs); 1462 } 1463 1464 return rc; 1465 } 1466 1467 static inline int 1468 accel_mlx5_task_alloc_crc_ctx(struct accel_mlx5_task *task, uint32_t qp_slot) 1469 { 1470 struct accel_mlx5_qp *qp = task->qp; 1471 struct accel_mlx5_dev *dev = qp->dev; 1472 1473 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task, dev->sig_mkeys))) { 1474 SPDK_DEBUGLOG(accel_mlx5, "no mkeys in signature mkey pool, dev %s\n", 1475 dev->dev_ctx->context->device->name); 1476 dev->stats.nomem_mkey++; 1477 return -ENOMEM; 1478 } 1479 task->psv = spdk_mempool_get(dev->dev_ctx->psv_pool); 1480 if (spdk_unlikely(!task->psv)) { 1481 SPDK_DEBUGLOG(accel_mlx5, "no reqs in psv pool, dev %s\n", dev->dev_ctx->context->device->name); 1482 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops); 1483 task->num_ops = 0; 1484 dev->stats.nomem_mkey++; 1485 return -ENOMEM; 1486 } 1487 /* One extra slot is needed for SET_PSV WQE to reset the error state in PSV. */ 1488 if (spdk_unlikely(task->psv->bits.error)) { 1489 uint32_t n_slots = task->num_ops * 2 + 1; 1490 1491 if (qp_slot < n_slots) { 1492 spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv); 1493 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops); 1494 dev->stats.nomem_qdepth++; 1495 task->num_ops = 0; 1496 return -ENOMEM; 1497 } 1498 } 1499 1500 return 0; 1501 } 1502 1503 static inline int 1504 accel_mlx5_crc_task_continue(struct accel_mlx5_task *task) 1505 { 1506 struct accel_mlx5_qp *qp = task->qp; 1507 struct accel_mlx5_dev *dev = qp->dev; 1508 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1509 int rc; 1510 1511 assert(task->num_reqs > task->num_completed_reqs); 1512 if (task->num_ops == 0) { 1513 /* No mkeys allocated, try to allocate now. */ 1514 rc = accel_mlx5_task_alloc_crc_ctx(task, qp_slot); 1515 if (spdk_unlikely(rc)) { 1516 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1517 return -ENOMEM; 1518 } 1519 } 1520 /* We need to post at least 1 UMR and 1 RDMA operation */ 1521 if (spdk_unlikely(qp_slot < 2)) { 1522 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1523 dev->stats.nomem_qdepth++; 1524 return -ENOMEM; 1525 } 1526 1527 return accel_mlx5_crc_task_process(task); 1528 } 1529 1530 static inline uint32_t 1531 accel_mlx5_get_crc_task_count(struct iovec *src_iov, uint32_t src_iovcnt, struct iovec *dst_iov, 1532 uint32_t dst_iovcnt) 1533 { 1534 uint32_t src_idx = 0; 1535 uint32_t dst_idx = 0; 1536 uint32_t num_ops = 1; 1537 uint32_t num_src_sge = 1; 1538 uint32_t num_dst_sge = 1; 1539 size_t src_offset = 0; 1540 size_t dst_offset = 0; 1541 uint32_t num_sge; 1542 size_t src_len; 1543 size_t dst_len; 1544 1545 /* One operation is enough if both iovs fit into ACCEL_MLX5_MAX_SGE. One SGE is reserved for CRC on dst_iov. */ 1546 if (src_iovcnt <= ACCEL_MLX5_MAX_SGE && (dst_iovcnt + 1) <= ACCEL_MLX5_MAX_SGE) { 1547 return 1; 1548 } 1549 1550 while (src_idx < src_iovcnt && dst_idx < dst_iovcnt) { 1551 if (num_src_sge > ACCEL_MLX5_MAX_SGE || num_dst_sge > ACCEL_MLX5_MAX_SGE) { 1552 num_ops++; 1553 num_src_sge = 1; 1554 num_dst_sge = 1; 1555 } 1556 src_len = src_iov[src_idx].iov_len - src_offset; 1557 dst_len = dst_iov[dst_idx].iov_len - dst_offset; 1558 1559 if (src_len == dst_len) { 1560 num_src_sge++; 1561 num_dst_sge++; 1562 src_offset = 0; 1563 dst_offset = 0; 1564 src_idx++; 1565 dst_idx++; 1566 continue; 1567 } 1568 if (src_len < dst_len) { 1569 /* Advance src_iov to reach the point that corresponds to the end of the current dst_iov. */ 1570 num_sge = accel_mlx5_advance_iovec(&src_iov[src_idx], 1571 spdk_min(ACCEL_MLX5_MAX_SGE + 1 - num_src_sge, 1572 src_iovcnt - src_idx), 1573 &src_offset, &dst_len); 1574 src_idx += num_sge; 1575 num_src_sge += num_sge; 1576 if (dst_len != 0) { 1577 /* 1578 * ACCEL_MLX5_MAX_SGE is reached on src_iov, and dst_len bytes 1579 * are left on the current dst_iov. 1580 */ 1581 dst_offset = dst_iov[dst_idx].iov_len - dst_len; 1582 } else { 1583 /* The src_iov advance is completed, shift to the next dst_iov. */ 1584 dst_idx++; 1585 num_dst_sge++; 1586 dst_offset = 0; 1587 } 1588 } else { /* src_len > dst_len */ 1589 /* Advance dst_iov to reach the point that corresponds to the end of the current src_iov. */ 1590 num_sge = accel_mlx5_advance_iovec(&dst_iov[dst_idx], 1591 spdk_min(ACCEL_MLX5_MAX_SGE + 1 - num_dst_sge, 1592 dst_iovcnt - dst_idx), 1593 &dst_offset, &src_len); 1594 dst_idx += num_sge; 1595 num_dst_sge += num_sge; 1596 if (src_len != 0) { 1597 /* 1598 * ACCEL_MLX5_MAX_SGE is reached on dst_iov, and src_len bytes 1599 * are left on the current src_iov. 1600 */ 1601 src_offset = src_iov[src_idx].iov_len - src_len; 1602 } else { 1603 /* The dst_iov advance is completed, shift to the next src_iov. */ 1604 src_idx++; 1605 num_src_sge++; 1606 src_offset = 0; 1607 } 1608 } 1609 } 1610 /* An extra operation is needed if no space is left on dst_iov because CRC takes one SGE. */ 1611 if (num_dst_sge > ACCEL_MLX5_MAX_SGE) { 1612 num_ops++; 1613 } 1614 1615 /* The above loop must reach the end of both iovs simultaneously because their size is the same. */ 1616 assert(src_idx == src_iovcnt); 1617 assert(dst_idx == dst_iovcnt); 1618 assert(src_offset == 0); 1619 assert(dst_offset == 0); 1620 1621 return num_ops; 1622 } 1623 1624 static inline int 1625 accel_mlx5_crc_task_init(struct accel_mlx5_task *mlx5_task) 1626 { 1627 struct spdk_accel_task *task = &mlx5_task->base; 1628 struct accel_mlx5_qp *qp = mlx5_task->qp; 1629 uint32_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp); 1630 int rc; 1631 1632 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 1633 if (mlx5_task->inplace) { 1634 /* One entry is reserved for CRC */ 1635 mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->src.iovcnt + 1, ACCEL_MLX5_MAX_SGE); 1636 } else { 1637 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 1638 mlx5_task->num_reqs = accel_mlx5_get_crc_task_count(mlx5_task->src.iov, mlx5_task->src.iovcnt, 1639 mlx5_task->dst.iov, mlx5_task->dst.iovcnt); 1640 } 1641 1642 rc = accel_mlx5_task_alloc_crc_ctx(mlx5_task, qp_slot); 1643 if (spdk_unlikely(rc)) { 1644 return rc; 1645 } 1646 1647 if (spdk_unlikely(qp_slot < 2)) { 1648 /* Queue is full, queue this task */ 1649 SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", qp->dev->dev_ctx->context->device->name, 1650 mlx5_task->qp); 1651 qp->dev->stats.nomem_qdepth++; 1652 return -ENOMEM; 1653 } 1654 return 0; 1655 } 1656 1657 static int 1658 accel_mlx5_task_op_not_implemented(struct accel_mlx5_task *mlx5_task) 1659 { 1660 SPDK_ERRLOG("wrong function called\n"); 1661 SPDK_UNREACHABLE(); 1662 } 1663 1664 static void 1665 accel_mlx5_task_op_not_implemented_v(struct accel_mlx5_task *mlx5_task) 1666 { 1667 SPDK_ERRLOG("wrong function called\n"); 1668 SPDK_UNREACHABLE(); 1669 } 1670 1671 static int 1672 accel_mlx5_task_op_not_supported(struct accel_mlx5_task *mlx5_task) 1673 { 1674 SPDK_ERRLOG("Unsupported opcode %d\n", mlx5_task->base.op_code); 1675 1676 return -ENOTSUP; 1677 } 1678 1679 static struct accel_mlx5_task_operations g_accel_mlx5_tasks_ops[] = { 1680 [ACCEL_MLX5_OPC_COPY] = { 1681 .init = accel_mlx5_copy_task_init, 1682 .process = accel_mlx5_copy_task_process, 1683 .cont = accel_mlx5_copy_task_continue, 1684 .complete = accel_mlx5_copy_task_complete, 1685 }, 1686 [ACCEL_MLX5_OPC_CRYPTO] = { 1687 .init = accel_mlx5_crypto_task_init, 1688 .process = accel_mlx5_crypto_task_process, 1689 .cont = accel_mlx5_crypto_task_continue, 1690 .complete = accel_mlx5_crypto_task_complete, 1691 }, 1692 [ACCEL_MLX5_OPC_CRC32C] = { 1693 .init = accel_mlx5_crc_task_init, 1694 .process = accel_mlx5_crc_task_process, 1695 .cont = accel_mlx5_crc_task_continue, 1696 .complete = accel_mlx5_crc_task_complete, 1697 }, 1698 [ACCEL_MLX5_OPC_LAST] = { 1699 .init = accel_mlx5_task_op_not_supported, 1700 .process = accel_mlx5_task_op_not_implemented, 1701 .cont = accel_mlx5_task_op_not_implemented, 1702 .complete = accel_mlx5_task_op_not_implemented_v 1703 }, 1704 }; 1705 1706 static inline void 1707 accel_mlx5_task_complete(struct accel_mlx5_task *task) 1708 { 1709 assert(task->num_reqs == task->num_completed_reqs); 1710 SPDK_DEBUGLOG(accel_mlx5, "Complete task %p, opc %d\n", task, task->base.op_code); 1711 1712 g_accel_mlx5_tasks_ops[task->mlx5_opcode].complete(task); 1713 } 1714 1715 static inline int 1716 accel_mlx5_task_continue(struct accel_mlx5_task *task) 1717 { 1718 struct accel_mlx5_qp *qp = task->qp; 1719 struct accel_mlx5_dev *dev = qp->dev; 1720 1721 if (spdk_unlikely(qp->recovering)) { 1722 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1723 return 0; 1724 } 1725 1726 return g_accel_mlx5_tasks_ops[task->mlx5_opcode].cont(task); 1727 } 1728 static inline void 1729 accel_mlx5_task_init_opcode(struct accel_mlx5_task *mlx5_task) 1730 { 1731 uint8_t base_opcode = mlx5_task->base.op_code; 1732 1733 switch (base_opcode) { 1734 case SPDK_ACCEL_OPC_COPY: 1735 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_COPY; 1736 break; 1737 case SPDK_ACCEL_OPC_ENCRYPT: 1738 assert(g_accel_mlx5.crypto_supported); 1739 mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE; 1740 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO; 1741 break; 1742 case SPDK_ACCEL_OPC_DECRYPT: 1743 assert(g_accel_mlx5.crypto_supported); 1744 mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_MEMORY; 1745 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO; 1746 break; 1747 case SPDK_ACCEL_OPC_CRC32C: 1748 mlx5_task->inplace = 1; 1749 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRC32C; 1750 break; 1751 case SPDK_ACCEL_OPC_COPY_CRC32C: 1752 mlx5_task->inplace = 0; 1753 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRC32C; 1754 break; 1755 default: 1756 SPDK_ERRLOG("wrong opcode %d\n", base_opcode); 1757 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_LAST; 1758 } 1759 } 1760 1761 static inline int 1762 _accel_mlx5_submit_tasks(struct accel_mlx5_io_channel *accel_ch, struct spdk_accel_task *task) 1763 { 1764 struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base); 1765 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 1766 int rc; 1767 1768 /* We should not receive any tasks if the module was not enabled */ 1769 assert(g_accel_mlx5.enabled); 1770 1771 dev->stats.opcodes[mlx5_task->mlx5_opcode]++; 1772 rc = g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].init(mlx5_task); 1773 if (spdk_unlikely(rc)) { 1774 if (rc == -ENOMEM) { 1775 SPDK_DEBUGLOG(accel_mlx5, "no reqs to handle new task %p (required %u), put to queue\n", mlx5_task, 1776 mlx5_task->num_reqs); 1777 STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link); 1778 return 0; 1779 } 1780 SPDK_ERRLOG("Task opc %d init failed, rc %d\n", task->op_code, rc); 1781 return rc; 1782 } 1783 1784 if (spdk_unlikely(mlx5_task->qp->recovering)) { 1785 STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link); 1786 return 0; 1787 } 1788 1789 return g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].process(mlx5_task); 1790 } 1791 1792 static inline void 1793 accel_mlx5_task_assign_qp(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_io_channel *accel_ch) 1794 { 1795 struct accel_mlx5_dev *dev; 1796 1797 dev = &accel_ch->devs[accel_ch->dev_idx]; 1798 accel_ch->dev_idx++; 1799 if (accel_ch->dev_idx == accel_ch->num_devs) { 1800 accel_ch->dev_idx = 0; 1801 } 1802 1803 mlx5_task->qp = &dev->qp; 1804 } 1805 1806 static inline void 1807 accel_mlx5_task_reset(struct accel_mlx5_task *mlx5_task) 1808 { 1809 mlx5_task->num_completed_reqs = 0; 1810 mlx5_task->num_submitted_reqs = 0; 1811 mlx5_task->num_ops = 0; 1812 mlx5_task->num_processed_blocks = 0; 1813 mlx5_task->raw = 0; 1814 } 1815 1816 static int 1817 accel_mlx5_submit_tasks(struct spdk_io_channel *ch, struct spdk_accel_task *task) 1818 { 1819 struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base); 1820 struct accel_mlx5_io_channel *accel_ch = spdk_io_channel_get_ctx(ch); 1821 1822 accel_mlx5_task_assign_qp(mlx5_task, accel_ch); 1823 accel_mlx5_task_reset(mlx5_task); 1824 accel_mlx5_task_init_opcode(mlx5_task); 1825 1826 return _accel_mlx5_submit_tasks(accel_ch, task); 1827 } 1828 1829 static void accel_mlx5_recover_qp(struct accel_mlx5_qp *qp); 1830 1831 static int 1832 accel_mlx5_recover_qp_poller(void *arg) 1833 { 1834 struct accel_mlx5_qp *qp = arg; 1835 1836 spdk_poller_unregister(&qp->recover_poller); 1837 accel_mlx5_recover_qp(qp); 1838 return SPDK_POLLER_BUSY; 1839 } 1840 1841 static void 1842 accel_mlx5_recover_qp(struct accel_mlx5_qp *qp) 1843 { 1844 struct accel_mlx5_dev *dev = qp->dev; 1845 struct spdk_mlx5_qp_attr mlx5_qp_attr = {}; 1846 int rc; 1847 1848 SPDK_NOTICELOG("Recovering qp %p, core %u\n", qp, spdk_env_get_current_core()); 1849 if (qp->qp) { 1850 spdk_mlx5_qp_destroy(qp->qp); 1851 qp->qp = NULL; 1852 } 1853 1854 mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size; 1855 mlx5_qp_attr.cap.max_recv_wr = 0; 1856 mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE; 1857 mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE; 1858 1859 rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp); 1860 if (rc) { 1861 SPDK_ERRLOG("Failed to create mlx5 dma QP, rc %d. Retry in %d usec\n", 1862 rc, ACCEL_MLX5_RECOVER_POLLER_PERIOD_US); 1863 qp->recover_poller = SPDK_POLLER_REGISTER(accel_mlx5_recover_qp_poller, qp, 1864 ACCEL_MLX5_RECOVER_POLLER_PERIOD_US); 1865 return; 1866 } 1867 1868 qp->recovering = false; 1869 } 1870 1871 static inline void 1872 accel_mlx5_process_error_cpl(struct spdk_mlx5_cq_completion *wc, struct accel_mlx5_task *task) 1873 { 1874 struct accel_mlx5_qp *qp = task->qp; 1875 1876 if (wc->status != IBV_WC_WR_FLUSH_ERR) { 1877 SPDK_WARNLOG("RDMA: qp %p, task %p, WC status %d, core %u\n", 1878 qp, task, wc->status, spdk_env_get_current_core()); 1879 } else { 1880 SPDK_DEBUGLOG(accel_mlx5, 1881 "RDMA: qp %p, task %p, WC status %d, core %u\n", 1882 qp, task, wc->status, spdk_env_get_current_core()); 1883 } 1884 1885 qp->recovering = true; 1886 assert(task->num_completed_reqs <= task->num_submitted_reqs); 1887 if (task->num_completed_reqs == task->num_submitted_reqs) { 1888 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 1889 accel_mlx5_task_fail(task, -EIO); 1890 } 1891 } 1892 1893 static inline int64_t 1894 accel_mlx5_poll_cq(struct accel_mlx5_dev *dev) 1895 { 1896 struct spdk_mlx5_cq_completion wc[ACCEL_MLX5_MAX_WC]; 1897 struct accel_mlx5_task *task; 1898 struct accel_mlx5_qp *qp; 1899 int reaped, i, rc; 1900 uint16_t completed; 1901 1902 dev->stats.polls++; 1903 reaped = spdk_mlx5_cq_poll_completions(dev->cq, wc, ACCEL_MLX5_MAX_WC); 1904 if (spdk_unlikely(reaped < 0)) { 1905 SPDK_ERRLOG("Error polling CQ! (%d): %s\n", errno, spdk_strerror(errno)); 1906 return reaped; 1907 } else if (reaped == 0) { 1908 dev->stats.idle_polls++; 1909 return 0; 1910 } 1911 dev->stats.completions += reaped; 1912 1913 SPDK_DEBUGLOG(accel_mlx5, "Reaped %d cpls on dev %s\n", reaped, 1914 dev->dev_ctx->context->device->name); 1915 1916 for (i = 0; i < reaped; i++) { 1917 if (spdk_unlikely(!wc[i].wr_id)) { 1918 /* Unsignaled completion with error, ignore */ 1919 continue; 1920 } 1921 task = (struct accel_mlx5_task *)wc[i].wr_id; 1922 qp = task->qp; 1923 assert(task == STAILQ_FIRST(&qp->in_hw) && "submission mismatch"); 1924 assert(task->num_submitted_reqs > task->num_completed_reqs); 1925 completed = task->num_submitted_reqs - task->num_completed_reqs; 1926 assert((uint32_t)task->num_completed_reqs + completed <= UINT16_MAX); 1927 task->num_completed_reqs += completed; 1928 assert(qp->wrs_submitted >= task->num_wrs); 1929 qp->wrs_submitted -= task->num_wrs; 1930 assert(dev->wrs_in_cq > 0); 1931 dev->wrs_in_cq--; 1932 1933 if (spdk_unlikely(wc[i].status)) { 1934 accel_mlx5_process_error_cpl(&wc[i], task); 1935 if (qp->wrs_submitted == 0) { 1936 assert(STAILQ_EMPTY(&qp->in_hw)); 1937 accel_mlx5_recover_qp(qp); 1938 } 1939 continue; 1940 } 1941 1942 SPDK_DEBUGLOG(accel_mlx5, "task %p, remaining %u\n", task, 1943 task->num_reqs - task->num_completed_reqs); 1944 if (task->num_completed_reqs == task->num_reqs) { 1945 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 1946 accel_mlx5_task_complete(task); 1947 } else { 1948 assert(task->num_submitted_reqs < task->num_reqs); 1949 assert(task->num_completed_reqs == task->num_submitted_reqs); 1950 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 1951 rc = accel_mlx5_task_continue(task); 1952 if (spdk_unlikely(rc)) { 1953 if (rc != -ENOMEM) { 1954 accel_mlx5_task_fail(task, rc); 1955 } 1956 } 1957 } 1958 } 1959 1960 return reaped; 1961 } 1962 1963 static inline void 1964 accel_mlx5_resubmit_nomem_tasks(struct accel_mlx5_dev *dev) 1965 { 1966 struct accel_mlx5_task *task, *tmp, *last; 1967 int rc; 1968 1969 last = STAILQ_LAST(&dev->nomem, accel_mlx5_task, link); 1970 STAILQ_FOREACH_SAFE(task, &dev->nomem, link, tmp) { 1971 STAILQ_REMOVE_HEAD(&dev->nomem, link); 1972 rc = accel_mlx5_task_continue(task); 1973 if (spdk_unlikely(rc)) { 1974 if (rc != -ENOMEM) { 1975 accel_mlx5_task_fail(task, rc); 1976 } 1977 break; 1978 } 1979 /* If qpair is recovering, task is added back to the nomem list and 0 is returned. In that case we 1980 * need a special condition to iterate the list once and stop this FOREACH loop */ 1981 if (task == last) { 1982 break; 1983 } 1984 } 1985 } 1986 1987 static int 1988 accel_mlx5_poller(void *ctx) 1989 { 1990 struct accel_mlx5_io_channel *ch = ctx; 1991 struct accel_mlx5_dev *dev; 1992 1993 int64_t completions = 0, rc; 1994 uint32_t i; 1995 1996 for (i = 0; i < ch->num_devs; i++) { 1997 dev = &ch->devs[i]; 1998 if (dev->wrs_in_cq) { 1999 rc = accel_mlx5_poll_cq(dev); 2000 if (spdk_unlikely(rc < 0)) { 2001 SPDK_ERRLOG("Error %"PRId64" on CQ, dev %s\n", rc, dev->dev_ctx->context->device->name); 2002 } 2003 completions += rc; 2004 if (dev->qp.wrs_submitted) { 2005 spdk_mlx5_qp_complete_send(dev->qp.qp); 2006 } 2007 } 2008 if (!STAILQ_EMPTY(&dev->nomem)) { 2009 accel_mlx5_resubmit_nomem_tasks(dev); 2010 } 2011 } 2012 2013 return !!completions; 2014 } 2015 2016 static bool 2017 accel_mlx5_supports_opcode(enum spdk_accel_opcode opc) 2018 { 2019 assert(g_accel_mlx5.enabled); 2020 2021 switch (opc) { 2022 case SPDK_ACCEL_OPC_COPY: 2023 return true; 2024 case SPDK_ACCEL_OPC_ENCRYPT: 2025 case SPDK_ACCEL_OPC_DECRYPT: 2026 return g_accel_mlx5.crypto_supported; 2027 case SPDK_ACCEL_OPC_CRC32C: 2028 case SPDK_ACCEL_OPC_COPY_CRC32C: 2029 return g_accel_mlx5.crc32c_supported; 2030 default: 2031 return false; 2032 } 2033 } 2034 2035 static struct spdk_io_channel * 2036 accel_mlx5_get_io_channel(void) 2037 { 2038 assert(g_accel_mlx5.enabled); 2039 return spdk_get_io_channel(&g_accel_mlx5); 2040 } 2041 2042 static int 2043 accel_mlx5_create_qp(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp) 2044 { 2045 struct spdk_mlx5_qp_attr mlx5_qp_attr = {}; 2046 int rc; 2047 2048 mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size; 2049 mlx5_qp_attr.cap.max_recv_wr = 0; 2050 mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE; 2051 mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE; 2052 2053 rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp); 2054 if (rc) { 2055 return rc; 2056 } 2057 2058 STAILQ_INIT(&qp->in_hw); 2059 qp->dev = dev; 2060 qp->verbs_qp = spdk_mlx5_qp_get_verbs_qp(qp->qp); 2061 assert(qp->verbs_qp); 2062 qp->wrs_max = g_accel_mlx5.attr.qp_size; 2063 2064 return 0; 2065 } 2066 2067 static void 2068 accel_mlx5_add_stats(struct accel_mlx5_stats *stats, const struct accel_mlx5_stats *to_add) 2069 { 2070 int i; 2071 2072 stats->crypto_umrs += to_add->crypto_umrs; 2073 stats->sig_umrs += to_add->sig_umrs; 2074 stats->rdma_reads += to_add->rdma_reads; 2075 stats->rdma_writes += to_add->rdma_writes; 2076 stats->polls += to_add->polls; 2077 stats->idle_polls += to_add->idle_polls; 2078 stats->completions += to_add->completions; 2079 stats->nomem_qdepth += to_add->nomem_qdepth; 2080 stats->nomem_mkey += to_add->nomem_mkey; 2081 for (i = 0; i < ACCEL_MLX5_OPC_LAST; i++) { 2082 stats->opcodes[i] += to_add->opcodes[i]; 2083 } 2084 } 2085 2086 static void 2087 accel_mlx5_destroy_cb(void *io_device, void *ctx_buf) 2088 { 2089 struct accel_mlx5_io_channel *ch = ctx_buf; 2090 struct accel_mlx5_dev *dev; 2091 uint32_t i; 2092 2093 spdk_poller_unregister(&ch->poller); 2094 for (i = 0; i < ch->num_devs; i++) { 2095 dev = &ch->devs[i]; 2096 spdk_mlx5_qp_destroy(dev->qp.qp); 2097 if (dev->cq) { 2098 spdk_mlx5_cq_destroy(dev->cq); 2099 } 2100 spdk_poller_unregister(&dev->qp.recover_poller); 2101 if (dev->crypto_mkeys) { 2102 spdk_mlx5_mkey_pool_put_ref(dev->crypto_mkeys); 2103 } 2104 if (dev->sig_mkeys) { 2105 spdk_mlx5_mkey_pool_put_ref(dev->sig_mkeys); 2106 } 2107 spdk_rdma_utils_free_mem_map(&dev->mmap); 2108 spdk_spin_lock(&g_accel_mlx5.lock); 2109 accel_mlx5_add_stats(&g_accel_mlx5.stats, &dev->stats); 2110 spdk_spin_unlock(&g_accel_mlx5.lock); 2111 } 2112 free(ch->devs); 2113 } 2114 2115 static int 2116 accel_mlx5_create_cb(void *io_device, void *ctx_buf) 2117 { 2118 struct spdk_mlx5_cq_attr cq_attr = {}; 2119 struct accel_mlx5_io_channel *ch = ctx_buf; 2120 struct accel_mlx5_dev_ctx *dev_ctx; 2121 struct accel_mlx5_dev *dev; 2122 uint32_t i; 2123 int rc; 2124 2125 ch->devs = calloc(g_accel_mlx5.num_ctxs, sizeof(*ch->devs)); 2126 if (!ch->devs) { 2127 SPDK_ERRLOG("Memory allocation failed\n"); 2128 return -ENOMEM; 2129 } 2130 2131 for (i = 0; i < g_accel_mlx5.num_ctxs; i++) { 2132 dev_ctx = &g_accel_mlx5.dev_ctxs[i]; 2133 dev = &ch->devs[i]; 2134 dev->dev_ctx = dev_ctx; 2135 2136 if (dev_ctx->crypto_mkeys) { 2137 dev->crypto_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO); 2138 if (!dev->crypto_mkeys) { 2139 SPDK_ERRLOG("Failed to get crypto mkey pool channel, dev %s\n", dev_ctx->context->device->name); 2140 /* Should not happen since mkey pool is created on accel_mlx5 initialization. 2141 * We should not be here if pool creation failed */ 2142 assert(0); 2143 goto err_out; 2144 } 2145 } 2146 if (dev_ctx->sig_mkeys) { 2147 dev->sig_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE); 2148 if (!dev->sig_mkeys) { 2149 SPDK_ERRLOG("Failed to get sig mkey pool channel, dev %s\n", dev_ctx->context->device->name); 2150 /* Should not happen since mkey pool is created on accel_mlx5 initialization. 2151 * We should not be here if pool creation failed */ 2152 assert(0); 2153 goto err_out; 2154 } 2155 } 2156 2157 memset(&cq_attr, 0, sizeof(cq_attr)); 2158 cq_attr.cqe_cnt = g_accel_mlx5.attr.qp_size; 2159 cq_attr.cqe_size = 64; 2160 cq_attr.cq_context = dev; 2161 2162 ch->num_devs++; 2163 rc = spdk_mlx5_cq_create(dev_ctx->pd, &cq_attr, &dev->cq); 2164 if (rc) { 2165 SPDK_ERRLOG("Failed to create mlx5 CQ, rc %d\n", rc); 2166 goto err_out; 2167 } 2168 2169 rc = accel_mlx5_create_qp(dev, &dev->qp); 2170 if (rc) { 2171 SPDK_ERRLOG("Failed to create mlx5 QP, rc %d\n", rc); 2172 goto err_out; 2173 } 2174 2175 dev->mmap = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL, 2176 IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE); 2177 if (!dev->mmap) { 2178 SPDK_ERRLOG("Failed to create memory map\n"); 2179 rc = -ENOMEM; 2180 goto err_out; 2181 } 2182 dev->crypto_multi_block = dev_ctx->crypto_multi_block; 2183 dev->crypto_split_blocks = dev_ctx->crypto_multi_block ? g_accel_mlx5.attr.crypto_split_blocks : 0; 2184 dev->wrs_in_cq_max = g_accel_mlx5.attr.qp_size; 2185 STAILQ_INIT(&dev->nomem); 2186 } 2187 2188 ch->poller = SPDK_POLLER_REGISTER(accel_mlx5_poller, ch, 0); 2189 2190 return 0; 2191 2192 err_out: 2193 accel_mlx5_destroy_cb(&g_accel_mlx5, ctx_buf); 2194 return rc; 2195 } 2196 2197 void 2198 accel_mlx5_get_default_attr(struct accel_mlx5_attr *attr) 2199 { 2200 assert(attr); 2201 2202 attr->qp_size = ACCEL_MLX5_QP_SIZE; 2203 attr->num_requests = ACCEL_MLX5_NUM_REQUESTS; 2204 attr->allowed_devs = NULL; 2205 attr->crypto_split_blocks = 0; 2206 } 2207 2208 static void 2209 accel_mlx5_allowed_devs_free(void) 2210 { 2211 size_t i; 2212 2213 if (!g_accel_mlx5.allowed_devs) { 2214 return; 2215 } 2216 2217 for (i = 0; i < g_accel_mlx5.allowed_devs_count; i++) { 2218 free(g_accel_mlx5.allowed_devs[i]); 2219 } 2220 free(g_accel_mlx5.attr.allowed_devs); 2221 free(g_accel_mlx5.allowed_devs); 2222 g_accel_mlx5.attr.allowed_devs = NULL; 2223 g_accel_mlx5.allowed_devs = NULL; 2224 g_accel_mlx5.allowed_devs_count = 0; 2225 } 2226 2227 static int 2228 accel_mlx5_allowed_devs_parse(const char *allowed_devs) 2229 { 2230 char *str, *tmp, *tok; 2231 size_t devs_count = 0; 2232 2233 str = strdup(allowed_devs); 2234 if (!str) { 2235 return -ENOMEM; 2236 } 2237 2238 accel_mlx5_allowed_devs_free(); 2239 2240 tmp = str; 2241 while ((tmp = strchr(tmp, ',')) != NULL) { 2242 tmp++; 2243 devs_count++; 2244 } 2245 devs_count++; 2246 2247 g_accel_mlx5.allowed_devs = calloc(devs_count, sizeof(char *)); 2248 if (!g_accel_mlx5.allowed_devs) { 2249 free(str); 2250 return -ENOMEM; 2251 } 2252 2253 devs_count = 0; 2254 tok = strtok(str, ","); 2255 while (tok) { 2256 g_accel_mlx5.allowed_devs[devs_count] = strdup(tok); 2257 if (!g_accel_mlx5.allowed_devs[devs_count]) { 2258 free(str); 2259 accel_mlx5_allowed_devs_free(); 2260 return -ENOMEM; 2261 } 2262 tok = strtok(NULL, ","); 2263 devs_count++; 2264 g_accel_mlx5.allowed_devs_count++; 2265 } 2266 2267 free(str); 2268 2269 return 0; 2270 } 2271 2272 int 2273 accel_mlx5_enable(struct accel_mlx5_attr *attr) 2274 { 2275 int rc; 2276 2277 if (g_accel_mlx5.enabled) { 2278 return -EEXIST; 2279 } 2280 if (attr) { 2281 if (attr->num_requests / spdk_env_get_core_count() < ACCEL_MLX5_MAX_MKEYS_IN_TASK) { 2282 SPDK_ERRLOG("num requests per core must not be less than %u, current value %u\n", 2283 ACCEL_MLX5_MAX_MKEYS_IN_TASK, attr->num_requests / spdk_env_get_core_count()); 2284 return -EINVAL; 2285 } 2286 if (attr->qp_size < 8) { 2287 SPDK_ERRLOG("qp_size must be at least 8\n"); 2288 return -EINVAL; 2289 } 2290 g_accel_mlx5.attr = *attr; 2291 g_accel_mlx5.attr.allowed_devs = NULL; 2292 2293 if (attr->allowed_devs) { 2294 /* Contains a copy of user's string */ 2295 g_accel_mlx5.attr.allowed_devs = strndup(attr->allowed_devs, ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN); 2296 if (!g_accel_mlx5.attr.allowed_devs) { 2297 return -ENOMEM; 2298 } 2299 rc = accel_mlx5_allowed_devs_parse(g_accel_mlx5.attr.allowed_devs); 2300 if (rc) { 2301 return rc; 2302 } 2303 rc = spdk_mlx5_crypto_devs_allow((const char *const *)g_accel_mlx5.allowed_devs, 2304 g_accel_mlx5.allowed_devs_count); 2305 if (rc) { 2306 accel_mlx5_allowed_devs_free(); 2307 return rc; 2308 } 2309 } 2310 } else { 2311 accel_mlx5_get_default_attr(&g_accel_mlx5.attr); 2312 } 2313 2314 g_accel_mlx5.enabled = true; 2315 spdk_accel_module_list_add(&g_accel_mlx5.module); 2316 2317 return 0; 2318 } 2319 2320 static void 2321 accel_mlx5_psvs_release(struct accel_mlx5_dev_ctx *dev_ctx) 2322 { 2323 uint32_t i, num_psvs, num_psvs_in_pool; 2324 2325 if (!dev_ctx->psvs) { 2326 return; 2327 } 2328 2329 num_psvs = g_accel_mlx5.attr.num_requests; 2330 2331 for (i = 0; i < num_psvs; i++) { 2332 if (dev_ctx->psvs[i]) { 2333 spdk_mlx5_destroy_psv(dev_ctx->psvs[i]); 2334 dev_ctx->psvs[i] = NULL; 2335 } 2336 } 2337 free(dev_ctx->psvs); 2338 2339 if (!dev_ctx->psv_pool) { 2340 return; 2341 } 2342 num_psvs_in_pool = spdk_mempool_count(dev_ctx->psv_pool); 2343 if (num_psvs_in_pool != num_psvs) { 2344 SPDK_ERRLOG("Expected %u reqs in the pool, but got only %u\n", num_psvs, num_psvs_in_pool); 2345 } 2346 spdk_mempool_free(dev_ctx->psv_pool); 2347 } 2348 2349 static void 2350 accel_mlx5_free_resources(void) 2351 { 2352 struct accel_mlx5_dev_ctx *dev_ctx; 2353 uint32_t i; 2354 2355 for (i = 0; i < g_accel_mlx5.num_ctxs; i++) { 2356 dev_ctx = &g_accel_mlx5.dev_ctxs[i]; 2357 accel_mlx5_psvs_release(dev_ctx); 2358 if (dev_ctx->pd) { 2359 if (dev_ctx->crypto_mkeys) { 2360 spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO, dev_ctx->pd); 2361 } 2362 if (dev_ctx->sig_mkeys) { 2363 spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE, dev_ctx->pd); 2364 } 2365 spdk_rdma_utils_put_pd(dev_ctx->pd); 2366 } 2367 if (dev_ctx->domain) { 2368 spdk_rdma_utils_put_memory_domain(dev_ctx->domain); 2369 } 2370 } 2371 2372 free(g_accel_mlx5.dev_ctxs); 2373 g_accel_mlx5.dev_ctxs = NULL; 2374 g_accel_mlx5.initialized = false; 2375 } 2376 2377 static void 2378 accel_mlx5_deinit_cb(void *ctx) 2379 { 2380 accel_mlx5_free_resources(); 2381 spdk_spin_destroy(&g_accel_mlx5.lock); 2382 spdk_accel_module_finish(); 2383 } 2384 2385 static void 2386 accel_mlx5_deinit(void *ctx) 2387 { 2388 if (g_accel_mlx5.allowed_devs) { 2389 accel_mlx5_allowed_devs_free(); 2390 } 2391 spdk_mlx5_crypto_devs_allow(NULL, 0); 2392 if (g_accel_mlx5.initialized) { 2393 spdk_io_device_unregister(&g_accel_mlx5, accel_mlx5_deinit_cb); 2394 } else { 2395 spdk_accel_module_finish(); 2396 } 2397 } 2398 2399 static int 2400 accel_mlx5_mkeys_create(struct ibv_pd *pd, uint32_t num_mkeys, uint32_t flags) 2401 { 2402 struct spdk_mlx5_mkey_pool_param pool_param = {}; 2403 2404 pool_param.mkey_count = num_mkeys; 2405 pool_param.cache_per_thread = num_mkeys * 3 / 4 / spdk_env_get_core_count(); 2406 pool_param.flags = flags; 2407 2408 return spdk_mlx5_mkey_pool_init(&pool_param, pd); 2409 } 2410 2411 static void 2412 accel_mlx5_set_psv_in_pool(struct spdk_mempool *mp, void *cb_arg, void *_psv, unsigned obj_idx) 2413 { 2414 struct spdk_rdma_utils_memory_translation translation = {}; 2415 struct accel_mlx5_psv_pool_iter_cb_args *args = cb_arg; 2416 struct accel_mlx5_psv_wrapper *wrapper = _psv; 2417 struct accel_mlx5_dev_ctx *dev_ctx = args->dev; 2418 int rc; 2419 2420 if (args->rc) { 2421 return; 2422 } 2423 assert(obj_idx < g_accel_mlx5.attr.num_requests); 2424 assert(dev_ctx->psvs[obj_idx] != NULL); 2425 memset(wrapper, 0, sizeof(*wrapper)); 2426 wrapper->psv_index = dev_ctx->psvs[obj_idx]->index; 2427 2428 rc = spdk_rdma_utils_get_translation(args->map, &wrapper->crc, sizeof(uint32_t), &translation); 2429 if (rc) { 2430 SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", &wrapper->crc, sizeof(uint32_t)); 2431 args->rc = -EINVAL; 2432 } else { 2433 wrapper->crc_lkey = spdk_rdma_utils_memory_translation_get_lkey(&translation); 2434 } 2435 } 2436 2437 static int 2438 accel_mlx5_psvs_create(struct accel_mlx5_dev_ctx *dev_ctx) 2439 { 2440 struct accel_mlx5_psv_pool_iter_cb_args args = { 2441 .dev = dev_ctx 2442 }; 2443 char pool_name[32]; 2444 uint32_t i; 2445 uint32_t num_psvs = g_accel_mlx5.attr.num_requests; 2446 uint32_t cache_size; 2447 int rc; 2448 2449 dev_ctx->psvs = calloc(num_psvs, (sizeof(struct spdk_mlx5_psv *))); 2450 if (!dev_ctx->psvs) { 2451 SPDK_ERRLOG("Failed to alloc PSVs array\n"); 2452 return -ENOMEM; 2453 } 2454 for (i = 0; i < num_psvs; i++) { 2455 dev_ctx->psvs[i] = spdk_mlx5_create_psv(dev_ctx->pd); 2456 if (!dev_ctx->psvs[i]) { 2457 SPDK_ERRLOG("Failed to create PSV on dev %s\n", dev_ctx->context->device->name); 2458 return -EINVAL; 2459 } 2460 } 2461 2462 rc = snprintf(pool_name, sizeof(pool_name), "accel_psv_%s", dev_ctx->context->device->name); 2463 if (rc < 0) { 2464 assert(0); 2465 return -EINVAL; 2466 } 2467 cache_size = num_psvs * 3 / 4 / spdk_env_get_core_count(); 2468 args.map = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL, 2469 IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE); 2470 if (!args.map) { 2471 return -ENOMEM; 2472 } 2473 dev_ctx->psv_pool = spdk_mempool_create_ctor(pool_name, num_psvs, 2474 sizeof(struct accel_mlx5_psv_wrapper), 2475 cache_size, SPDK_ENV_SOCKET_ID_ANY, 2476 accel_mlx5_set_psv_in_pool, &args); 2477 spdk_rdma_utils_free_mem_map(&args.map); 2478 if (!dev_ctx->psv_pool) { 2479 SPDK_ERRLOG("Failed to create PSV memory pool\n"); 2480 return -ENOMEM; 2481 } 2482 if (args.rc) { 2483 SPDK_ERRLOG("Failed to init PSV memory pool objects, rc %d\n", args.rc); 2484 return args.rc; 2485 } 2486 2487 return 0; 2488 } 2489 2490 2491 static int 2492 accel_mlx5_dev_ctx_init(struct accel_mlx5_dev_ctx *dev_ctx, struct ibv_context *dev, 2493 struct spdk_mlx5_device_caps *caps) 2494 { 2495 struct ibv_pd *pd; 2496 int rc; 2497 2498 pd = spdk_rdma_utils_get_pd(dev); 2499 if (!pd) { 2500 SPDK_ERRLOG("Failed to get PD for context %p, dev %s\n", dev, dev->device->name); 2501 return -EINVAL; 2502 } 2503 dev_ctx->context = dev; 2504 dev_ctx->pd = pd; 2505 dev_ctx->domain = spdk_rdma_utils_get_memory_domain(pd); 2506 if (!dev_ctx->domain) { 2507 return -ENOMEM; 2508 } 2509 2510 if (g_accel_mlx5.crypto_supported) { 2511 dev_ctx->crypto_multi_block = caps->crypto.multi_block_be_tweak; 2512 if (!dev_ctx->crypto_multi_block && g_accel_mlx5.attr.crypto_split_blocks) { 2513 SPDK_WARNLOG("\"crypto_split_blocks\" is set but dev %s doesn't support multi block crypto\n", 2514 dev->device->name); 2515 } 2516 rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO); 2517 if (rc) { 2518 SPDK_ERRLOG("Failed to create crypto mkeys pool, rc %d, dev %s\n", rc, dev->device->name); 2519 return rc; 2520 } 2521 dev_ctx->crypto_mkeys = true; 2522 } 2523 if (g_accel_mlx5.crc32c_supported) { 2524 rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, 2525 SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE); 2526 if (rc) { 2527 SPDK_ERRLOG("Failed to create signature mkeys pool, rc %d, dev %s\n", rc, dev->device->name); 2528 return rc; 2529 } 2530 dev_ctx->sig_mkeys = true; 2531 rc = accel_mlx5_psvs_create(dev_ctx); 2532 if (rc) { 2533 SPDK_ERRLOG("Failed to create PSVs pool, rc %d, dev %s\n", rc, dev->device->name); 2534 return rc; 2535 } 2536 } 2537 2538 return 0; 2539 } 2540 2541 static struct ibv_context ** 2542 accel_mlx5_get_devices(int *_num_devs) 2543 { 2544 struct ibv_context **rdma_devs, **rdma_devs_out = NULL, *dev; 2545 struct ibv_device_attr dev_attr; 2546 size_t j; 2547 int num_devs = 0, i, rc; 2548 int num_devs_out = 0; 2549 bool dev_allowed; 2550 2551 rdma_devs = rdma_get_devices(&num_devs); 2552 if (!rdma_devs || !num_devs) { 2553 *_num_devs = 0; 2554 return NULL; 2555 } 2556 2557 rdma_devs_out = calloc(num_devs + 1, sizeof(struct ibv_context *)); 2558 if (!rdma_devs_out) { 2559 SPDK_ERRLOG("Memory allocation failed\n"); 2560 rdma_free_devices(rdma_devs); 2561 *_num_devs = 0; 2562 return NULL; 2563 } 2564 2565 for (i = 0; i < num_devs; i++) { 2566 dev = rdma_devs[i]; 2567 rc = ibv_query_device(dev, &dev_attr); 2568 if (rc) { 2569 SPDK_ERRLOG("Failed to query dev %s, skipping\n", dev->device->name); 2570 continue; 2571 } 2572 if (dev_attr.vendor_id != SPDK_MLX5_VENDOR_ID_MELLANOX) { 2573 SPDK_DEBUGLOG(accel_mlx5, "dev %s is not Mellanox device, skipping\n", dev->device->name); 2574 continue; 2575 } 2576 2577 if (g_accel_mlx5.allowed_devs_count) { 2578 dev_allowed = false; 2579 for (j = 0; j < g_accel_mlx5.allowed_devs_count; j++) { 2580 if (strcmp(g_accel_mlx5.allowed_devs[j], dev->device->name) == 0) { 2581 dev_allowed = true; 2582 break; 2583 } 2584 } 2585 if (!dev_allowed) { 2586 continue; 2587 } 2588 } 2589 2590 rdma_devs_out[num_devs_out] = dev; 2591 num_devs_out++; 2592 } 2593 2594 rdma_free_devices(rdma_devs); 2595 *_num_devs = num_devs_out; 2596 2597 return rdma_devs_out; 2598 } 2599 2600 static inline bool 2601 accel_mlx5_dev_supports_crypto(struct spdk_mlx5_device_caps *caps) 2602 { 2603 return caps->crypto_supported && !caps->crypto.wrapped_import_method_aes_xts && 2604 (caps->crypto.single_block_le_tweak || 2605 caps->crypto.multi_block_le_tweak || caps->crypto.multi_block_be_tweak); 2606 } 2607 2608 static int 2609 accel_mlx5_init(void) 2610 { 2611 struct spdk_mlx5_device_caps *caps; 2612 struct ibv_context **rdma_devs, *dev; 2613 int num_devs = 0, rc = 0, i; 2614 int best_dev = -1, first_dev = 0; 2615 int best_dev_stat = 0, dev_stat; 2616 bool supports_crypto; 2617 bool find_best_dev = g_accel_mlx5.allowed_devs_count == 0; 2618 2619 if (!g_accel_mlx5.enabled) { 2620 return -EINVAL; 2621 } 2622 2623 spdk_spin_init(&g_accel_mlx5.lock); 2624 rdma_devs = accel_mlx5_get_devices(&num_devs); 2625 if (!rdma_devs || !num_devs) { 2626 return -ENODEV; 2627 } 2628 caps = calloc(num_devs, sizeof(*caps)); 2629 if (!caps) { 2630 rc = -ENOMEM; 2631 goto cleanup; 2632 } 2633 2634 g_accel_mlx5.crypto_supported = true; 2635 g_accel_mlx5.crc32c_supported = true; 2636 g_accel_mlx5.num_ctxs = 0; 2637 2638 /* Iterate devices. We support an offload if all devices support it */ 2639 for (i = 0; i < num_devs; i++) { 2640 dev = rdma_devs[i]; 2641 2642 rc = spdk_mlx5_device_query_caps(dev, &caps[i]); 2643 if (rc) { 2644 SPDK_ERRLOG("Failed to get crypto caps, dev %s\n", dev->device->name); 2645 goto cleanup; 2646 } 2647 supports_crypto = accel_mlx5_dev_supports_crypto(&caps[i]); 2648 if (!supports_crypto) { 2649 SPDK_DEBUGLOG(accel_mlx5, "Disable crypto support because dev %s doesn't support it\n", 2650 rdma_devs[i]->device->name); 2651 g_accel_mlx5.crypto_supported = false; 2652 } 2653 if (!caps[i].crc32c_supported) { 2654 SPDK_DEBUGLOG(accel_mlx5, "Disable crc32c support because dev %s doesn't support it\n", 2655 rdma_devs[i]->device->name); 2656 g_accel_mlx5.crc32c_supported = false; 2657 } 2658 if (find_best_dev) { 2659 /* Find device which supports max number of offloads */ 2660 dev_stat = (int)supports_crypto + (int)caps[i].crc32c_supported; 2661 if (dev_stat > best_dev_stat) { 2662 best_dev_stat = dev_stat; 2663 best_dev = i; 2664 } 2665 } 2666 } 2667 2668 /* User didn't specify devices to use, try to select the best one */ 2669 if (find_best_dev) { 2670 if (best_dev == -1) { 2671 best_dev = 0; 2672 } 2673 g_accel_mlx5.crypto_supported = accel_mlx5_dev_supports_crypto(&caps[best_dev]); 2674 g_accel_mlx5.crc32c_supported = caps[best_dev].crc32c_supported; 2675 SPDK_NOTICELOG("Select dev %s, crypto %d, crc32c %d\n", rdma_devs[best_dev]->device->name, 2676 g_accel_mlx5.crypto_supported, g_accel_mlx5.crc32c_supported); 2677 first_dev = best_dev; 2678 num_devs = 1; 2679 if (g_accel_mlx5.crypto_supported) { 2680 const char *const dev_name[] = { rdma_devs[best_dev]->device->name }; 2681 /* Let mlx5 library know which device to use */ 2682 spdk_mlx5_crypto_devs_allow(dev_name, 1); 2683 } 2684 } else { 2685 SPDK_NOTICELOG("Found %d devices, crypto %d\n", num_devs, g_accel_mlx5.crypto_supported); 2686 } 2687 2688 g_accel_mlx5.dev_ctxs = calloc(num_devs, sizeof(*g_accel_mlx5.dev_ctxs)); 2689 if (!g_accel_mlx5.dev_ctxs) { 2690 SPDK_ERRLOG("Memory allocation failed\n"); 2691 rc = -ENOMEM; 2692 goto cleanup; 2693 } 2694 2695 for (i = first_dev; i < first_dev + num_devs; i++) { 2696 rc = accel_mlx5_dev_ctx_init(&g_accel_mlx5.dev_ctxs[g_accel_mlx5.num_ctxs++], 2697 rdma_devs[i], &caps[i]); 2698 if (rc) { 2699 goto cleanup; 2700 } 2701 } 2702 2703 SPDK_NOTICELOG("Accel framework mlx5 initialized, found %d devices.\n", num_devs); 2704 spdk_io_device_register(&g_accel_mlx5, accel_mlx5_create_cb, accel_mlx5_destroy_cb, 2705 sizeof(struct accel_mlx5_io_channel), "accel_mlx5"); 2706 g_accel_mlx5.initialized = true; 2707 free(rdma_devs); 2708 free(caps); 2709 2710 return 0; 2711 2712 cleanup: 2713 free(rdma_devs); 2714 free(caps); 2715 accel_mlx5_free_resources(); 2716 spdk_spin_destroy(&g_accel_mlx5.lock); 2717 2718 return rc; 2719 } 2720 2721 static void 2722 accel_mlx5_write_config_json(struct spdk_json_write_ctx *w) 2723 { 2724 if (g_accel_mlx5.enabled) { 2725 spdk_json_write_object_begin(w); 2726 spdk_json_write_named_string(w, "method", "mlx5_scan_accel_module"); 2727 spdk_json_write_named_object_begin(w, "params"); 2728 spdk_json_write_named_uint16(w, "qp_size", g_accel_mlx5.attr.qp_size); 2729 spdk_json_write_named_uint32(w, "num_requests", g_accel_mlx5.attr.num_requests); 2730 if (g_accel_mlx5.attr.allowed_devs) { 2731 spdk_json_write_named_string(w, "allowed_devs", g_accel_mlx5.attr.allowed_devs); 2732 } 2733 spdk_json_write_named_uint16(w, "crypto_split_blocks", g_accel_mlx5.attr.crypto_split_blocks); 2734 spdk_json_write_object_end(w); 2735 spdk_json_write_object_end(w); 2736 } 2737 } 2738 2739 static size_t 2740 accel_mlx5_get_ctx_size(void) 2741 { 2742 return sizeof(struct accel_mlx5_task); 2743 } 2744 2745 static int 2746 accel_mlx5_crypto_key_init(struct spdk_accel_crypto_key *key) 2747 { 2748 struct spdk_mlx5_crypto_dek_create_attr attr = {}; 2749 struct spdk_mlx5_crypto_keytag *keytag; 2750 int rc; 2751 2752 if (!key || !key->key || !key->key2 || !key->key_size || !key->key2_size) { 2753 return -EINVAL; 2754 } 2755 2756 attr.dek = calloc(1, key->key_size + key->key2_size); 2757 if (!attr.dek) { 2758 return -ENOMEM; 2759 } 2760 2761 memcpy(attr.dek, key->key, key->key_size); 2762 memcpy(attr.dek + key->key_size, key->key2, key->key2_size); 2763 attr.dek_len = key->key_size + key->key2_size; 2764 2765 rc = spdk_mlx5_crypto_keytag_create(&attr, &keytag); 2766 spdk_memset_s(attr.dek, attr.dek_len, 0, attr.dek_len); 2767 free(attr.dek); 2768 if (rc) { 2769 SPDK_ERRLOG("Failed to create a keytag, rc %d\n", rc); 2770 return rc; 2771 } 2772 2773 key->priv = keytag; 2774 2775 return 0; 2776 } 2777 2778 static void 2779 accel_mlx5_crypto_key_deinit(struct spdk_accel_crypto_key *key) 2780 { 2781 if (!key || key->module_if != &g_accel_mlx5.module || !key->priv) { 2782 return; 2783 } 2784 2785 spdk_mlx5_crypto_keytag_destroy(key->priv); 2786 } 2787 2788 static void 2789 accel_mlx5_dump_stats_json(struct spdk_json_write_ctx *w, const char *header, 2790 const struct accel_mlx5_stats *stats) 2791 { 2792 double idle_polls_percentage = 0; 2793 double cpls_per_poll = 0; 2794 uint64_t total_tasks = 0; 2795 int i; 2796 2797 if (stats->polls) { 2798 idle_polls_percentage = (double) stats->idle_polls * 100 / stats->polls; 2799 } 2800 if (stats->polls > stats->idle_polls) { 2801 cpls_per_poll = (double) stats->completions / (stats->polls - stats->idle_polls); 2802 } 2803 for (i = 0; i < ACCEL_MLX5_OPC_LAST; i++) { 2804 total_tasks += stats->opcodes[i]; 2805 } 2806 2807 spdk_json_write_named_object_begin(w, header); 2808 2809 spdk_json_write_named_object_begin(w, "umrs"); 2810 spdk_json_write_named_uint64(w, "crypto_umrs", stats->crypto_umrs); 2811 spdk_json_write_named_uint64(w, "sig_umrs", stats->sig_umrs); 2812 spdk_json_write_named_uint64(w, "total", stats->crypto_umrs + stats->sig_umrs); 2813 spdk_json_write_object_end(w); 2814 2815 spdk_json_write_named_object_begin(w, "rdma"); 2816 spdk_json_write_named_uint64(w, "read", stats->rdma_reads); 2817 spdk_json_write_named_uint64(w, "write", stats->rdma_writes); 2818 spdk_json_write_named_uint64(w, "total", stats->rdma_reads + stats->rdma_writes); 2819 spdk_json_write_object_end(w); 2820 2821 spdk_json_write_named_object_begin(w, "polling"); 2822 spdk_json_write_named_uint64(w, "polls", stats->polls); 2823 spdk_json_write_named_uint64(w, "idle_polls", stats->idle_polls); 2824 spdk_json_write_named_uint64(w, "completions", stats->completions); 2825 spdk_json_write_named_double(w, "idle_polls_percentage", idle_polls_percentage); 2826 spdk_json_write_named_double(w, "cpls_per_poll", cpls_per_poll); 2827 spdk_json_write_named_uint64(w, "nomem_qdepth", stats->nomem_qdepth); 2828 spdk_json_write_named_uint64(w, "nomem_mkey", stats->nomem_mkey); 2829 spdk_json_write_object_end(w); 2830 2831 spdk_json_write_named_object_begin(w, "tasks"); 2832 spdk_json_write_named_uint64(w, "copy", stats->opcodes[ACCEL_MLX5_OPC_COPY]); 2833 spdk_json_write_named_uint64(w, "crypto", stats->opcodes[ACCEL_MLX5_OPC_CRYPTO]); 2834 spdk_json_write_named_uint64(w, "crc32c", stats->opcodes[ACCEL_MLX5_OPC_CRC32C]); 2835 spdk_json_write_named_uint64(w, "total", total_tasks); 2836 spdk_json_write_object_end(w); 2837 2838 spdk_json_write_object_end(w); 2839 } 2840 2841 static void 2842 accel_mlx5_dump_channel_stat(struct spdk_io_channel_iter *i) 2843 { 2844 struct accel_mlx5_stats ch_stat = {}; 2845 struct accel_mlx5_dump_stats_ctx *ctx; 2846 struct spdk_io_channel *_ch; 2847 struct accel_mlx5_io_channel *ch; 2848 struct accel_mlx5_dev *dev; 2849 uint32_t j; 2850 2851 ctx = spdk_io_channel_iter_get_ctx(i); 2852 _ch = spdk_io_channel_iter_get_channel(i); 2853 ch = spdk_io_channel_get_ctx(_ch); 2854 2855 if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 2856 spdk_json_write_object_begin(ctx->w); 2857 spdk_json_write_named_object_begin(ctx->w, spdk_thread_get_name(spdk_get_thread())); 2858 } 2859 if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) { 2860 spdk_json_write_named_array_begin(ctx->w, "devices"); 2861 } 2862 2863 for (j = 0; j < ch->num_devs; j++) { 2864 dev = &ch->devs[j]; 2865 /* Save grand total and channel stats */ 2866 accel_mlx5_add_stats(&ctx->total, &dev->stats); 2867 accel_mlx5_add_stats(&ch_stat, &dev->stats); 2868 if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) { 2869 spdk_json_write_object_begin(ctx->w); 2870 accel_mlx5_dump_stats_json(ctx->w, dev->dev_ctx->context->device->name, &dev->stats); 2871 spdk_json_write_object_end(ctx->w); 2872 } 2873 } 2874 2875 if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) { 2876 spdk_json_write_array_end(ctx->w); 2877 } 2878 if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 2879 accel_mlx5_dump_stats_json(ctx->w, "channel_total", &ch_stat); 2880 spdk_json_write_object_end(ctx->w); 2881 spdk_json_write_object_end(ctx->w); 2882 } 2883 2884 spdk_for_each_channel_continue(i, 0); 2885 } 2886 2887 static void 2888 accel_mlx5_dump_channel_stat_done(struct spdk_io_channel_iter *i, int status) 2889 { 2890 struct accel_mlx5_dump_stats_ctx *ctx; 2891 2892 ctx = spdk_io_channel_iter_get_ctx(i); 2893 2894 spdk_spin_lock(&g_accel_mlx5.lock); 2895 /* Add statistics from destroyed channels */ 2896 accel_mlx5_add_stats(&ctx->total, &g_accel_mlx5.stats); 2897 spdk_spin_unlock(&g_accel_mlx5.lock); 2898 2899 if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 2900 /* channels[] */ 2901 spdk_json_write_array_end(ctx->w); 2902 } 2903 2904 accel_mlx5_dump_stats_json(ctx->w, "total", &ctx->total); 2905 2906 /* Ends the whole response which was begun in accel_mlx5_dump_stats */ 2907 spdk_json_write_object_end(ctx->w); 2908 2909 ctx->cb(ctx->ctx, 0); 2910 free(ctx); 2911 } 2912 2913 int 2914 accel_mlx5_dump_stats(struct spdk_json_write_ctx *w, enum accel_mlx5_dump_state_level level, 2915 accel_mlx5_dump_stat_done_cb cb, void *ctx) 2916 { 2917 struct accel_mlx5_dump_stats_ctx *stat_ctx; 2918 2919 if (!w || !cb) { 2920 return -EINVAL; 2921 } 2922 if (!g_accel_mlx5.initialized) { 2923 return -ENODEV; 2924 } 2925 2926 stat_ctx = calloc(1, sizeof(*stat_ctx)); 2927 if (!stat_ctx) { 2928 return -ENOMEM; 2929 } 2930 stat_ctx->cb = cb; 2931 stat_ctx->ctx = ctx; 2932 stat_ctx->level = level; 2933 stat_ctx->w = w; 2934 2935 spdk_json_write_object_begin(w); 2936 2937 if (level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 2938 spdk_json_write_named_array_begin(w, "channels"); 2939 } 2940 2941 spdk_for_each_channel(&g_accel_mlx5, accel_mlx5_dump_channel_stat, stat_ctx, 2942 accel_mlx5_dump_channel_stat_done); 2943 2944 return 0; 2945 } 2946 2947 static bool 2948 accel_mlx5_crypto_supports_cipher(enum spdk_accel_cipher cipher, size_t key_size) 2949 { 2950 switch (cipher) { 2951 case SPDK_ACCEL_CIPHER_AES_XTS: 2952 return key_size == SPDK_ACCEL_AES_XTS_128_KEY_SIZE || key_size == SPDK_ACCEL_AES_XTS_256_KEY_SIZE; 2953 default: 2954 return false; 2955 } 2956 } 2957 2958 static int 2959 accel_mlx5_get_memory_domains(struct spdk_memory_domain **domains, int array_size) 2960 { 2961 int i, size; 2962 2963 if (!domains || !array_size) { 2964 return (int)g_accel_mlx5.num_ctxs; 2965 } 2966 2967 size = spdk_min(array_size, (int)g_accel_mlx5.num_ctxs); 2968 2969 for (i = 0; i < size; i++) { 2970 domains[i] = g_accel_mlx5.dev_ctxs[i].domain; 2971 } 2972 2973 return (int)g_accel_mlx5.num_ctxs; 2974 } 2975 2976 static struct accel_mlx5_module g_accel_mlx5 = { 2977 .module = { 2978 .module_init = accel_mlx5_init, 2979 .module_fini = accel_mlx5_deinit, 2980 .write_config_json = accel_mlx5_write_config_json, 2981 .get_ctx_size = accel_mlx5_get_ctx_size, 2982 .name = "mlx5", 2983 .supports_opcode = accel_mlx5_supports_opcode, 2984 .get_io_channel = accel_mlx5_get_io_channel, 2985 .submit_tasks = accel_mlx5_submit_tasks, 2986 .crypto_key_init = accel_mlx5_crypto_key_init, 2987 .crypto_key_deinit = accel_mlx5_crypto_key_deinit, 2988 .crypto_supports_cipher = accel_mlx5_crypto_supports_cipher, 2989 .get_memory_domains = accel_mlx5_get_memory_domains, 2990 } 2991 }; 2992 2993 SPDK_LOG_REGISTER_COMPONENT(accel_mlx5) 2994