1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 */ 4 5 #include "spdk/env.h" 6 #include "spdk/thread.h" 7 #include "spdk/queue.h" 8 #include "spdk/log.h" 9 #include "spdk/string.h" 10 #include "spdk/likely.h" 11 #include "spdk/dma.h" 12 #include "spdk/json.h" 13 #include "spdk/util.h" 14 15 #include "spdk_internal/mlx5.h" 16 #include "spdk_internal/rdma_utils.h" 17 #include "spdk/accel_module.h" 18 #include "spdk_internal/assert.h" 19 #include "spdk_internal/sgl.h" 20 #include "accel_mlx5.h" 21 22 #include <infiniband/mlx5dv.h> 23 #include <rdma/rdma_cma.h> 24 25 #define ACCEL_MLX5_QP_SIZE (256u) 26 #define ACCEL_MLX5_NUM_REQUESTS (2048u - 1) 27 #define ACCEL_MLX5_RECOVER_POLLER_PERIOD_US (10000) 28 #define ACCEL_MLX5_MAX_SGE (16u) 29 #define ACCEL_MLX5_MAX_WC (64u) 30 #define ACCEL_MLX5_MAX_MKEYS_IN_TASK (16u) 31 32 /* Assume we have up to 16 devices */ 33 #define ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN ((SPDK_MLX5_DEV_MAX_NAME_LEN + 1) * 16) 34 35 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, task) \ 36 do { \ 37 assert((qp)->wrs_submitted < (qp)->wrs_max); \ 38 (qp)->wrs_submitted++; \ 39 assert((task)->num_wrs < UINT16_MAX); \ 40 (task)->num_wrs++; \ 41 } while (0) 42 43 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, task) \ 44 do { \ 45 assert((dev)->wrs_in_cq < (dev)->wrs_in_cq_max); \ 46 (dev)->wrs_in_cq++; \ 47 assert((qp)->wrs_submitted < (qp)->wrs_max); \ 48 (qp)->wrs_submitted++; \ 49 assert((task)->num_wrs < UINT16_MAX); \ 50 (task)->num_wrs++; \ 51 } while (0) 52 53 struct accel_mlx5_io_channel; 54 struct accel_mlx5_task; 55 56 struct accel_mlx5_dev_ctx { 57 struct ibv_context *context; 58 struct ibv_pd *pd; 59 struct spdk_memory_domain *domain; 60 struct spdk_mempool *psv_pool; 61 TAILQ_ENTRY(accel_mlx5_dev_ctx) link; 62 struct spdk_mlx5_psv **psvs; 63 bool crypto_mkeys; 64 bool sig_mkeys; 65 bool crypto_multi_block; 66 }; 67 68 enum accel_mlx5_opcode { 69 ACCEL_MLX5_OPC_COPY, 70 ACCEL_MLX5_OPC_CRYPTO, 71 ACCEL_MLX5_OPC_CRC32C, 72 ACCEL_MLX5_OPC_LAST 73 }; 74 75 struct accel_mlx5_stats { 76 uint64_t crypto_umrs; 77 uint64_t sig_umrs; 78 uint64_t rdma_reads; 79 uint64_t rdma_writes; 80 uint64_t polls; 81 uint64_t idle_polls; 82 uint64_t completions; 83 uint64_t nomem_qdepth; 84 uint64_t nomem_mkey; 85 uint64_t opcodes[ACCEL_MLX5_OPC_LAST]; 86 }; 87 88 struct accel_mlx5_module { 89 struct spdk_accel_module_if module; 90 struct accel_mlx5_stats stats; 91 struct spdk_spinlock lock; 92 struct accel_mlx5_dev_ctx *dev_ctxs; 93 uint32_t num_ctxs; 94 struct accel_mlx5_attr attr; 95 char **allowed_devs; 96 size_t allowed_devs_count; 97 bool initialized; 98 bool enabled; 99 bool crypto_supported; 100 bool crc32c_supported; 101 }; 102 103 struct accel_mlx5_sge { 104 uint32_t src_sge_count; 105 uint32_t dst_sge_count; 106 struct ibv_sge src_sge[ACCEL_MLX5_MAX_SGE]; 107 struct ibv_sge dst_sge[ACCEL_MLX5_MAX_SGE]; 108 }; 109 110 struct accel_mlx5_iov_sgl { 111 struct iovec *iov; 112 uint32_t iovcnt; 113 uint32_t iov_offset; 114 }; 115 116 struct accel_mlx5_psv_wrapper { 117 uint32_t psv_index; 118 struct { 119 uint32_t error : 1; 120 uint32_t reserved : 31; 121 } bits; 122 /* mlx5 engine requires DMAable memory, use this member to copy user's crc value since we don't know which 123 * memory it is in */ 124 uint32_t crc; 125 uint32_t crc_lkey; 126 }; 127 128 struct accel_mlx5_task { 129 struct spdk_accel_task base; 130 struct accel_mlx5_iov_sgl src; 131 struct accel_mlx5_iov_sgl dst; 132 struct accel_mlx5_qp *qp; 133 STAILQ_ENTRY(accel_mlx5_task) link; 134 uint16_t num_reqs; 135 uint16_t num_completed_reqs; 136 uint16_t num_submitted_reqs; 137 uint16_t num_ops; /* number of allocated mkeys or number of operations */ 138 uint16_t num_wrs; /* Number of outstanding operations which consume qp slot */ 139 union { 140 struct { 141 uint16_t blocks_per_req; 142 uint16_t num_processed_blocks; 143 uint16_t num_blocks; 144 }; 145 struct { 146 struct accel_mlx5_psv_wrapper *psv; 147 uint32_t last_umr_len; 148 uint8_t last_mkey_idx; 149 }; 150 }; 151 union { 152 uint8_t raw; 153 struct { 154 uint8_t inplace : 1; 155 uint8_t enc_order : 2; 156 uint8_t mlx5_opcode: 5; 157 }; 158 }; 159 /* Keep this array last since not all elements might be accessed, this reduces amount of data to be 160 * cached */ 161 struct spdk_mlx5_mkey_pool_obj *mkeys[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 162 }; 163 164 SPDK_STATIC_ASSERT(ACCEL_MLX5_MAX_MKEYS_IN_TASK <= UINT8_MAX, "uint8_t is used to iterate mkeys"); 165 166 struct accel_mlx5_qp { 167 struct spdk_mlx5_qp *qp; 168 struct ibv_qp *verbs_qp; 169 struct accel_mlx5_dev *dev; 170 struct accel_mlx5_io_channel *ch; 171 /* tasks submitted to HW. We can't complete a task even in error case until we reap completions for all 172 * submitted requests */ 173 STAILQ_HEAD(, accel_mlx5_task) in_hw; 174 uint16_t wrs_submitted; 175 uint16_t wrs_max; 176 bool recovering; 177 struct spdk_poller *recover_poller; 178 }; 179 180 struct accel_mlx5_dev { 181 struct accel_mlx5_qp qp; 182 struct spdk_mlx5_cq *cq; 183 struct spdk_mlx5_mkey_pool *crypto_mkeys; 184 struct spdk_mlx5_mkey_pool *sig_mkeys; 185 struct spdk_rdma_utils_mem_map *mmap; 186 struct accel_mlx5_dev_ctx *dev_ctx; 187 uint16_t wrs_in_cq; 188 uint16_t wrs_in_cq_max; 189 uint16_t crypto_split_blocks; 190 bool crypto_multi_block; 191 /* Pending tasks waiting for requests resources */ 192 STAILQ_HEAD(, accel_mlx5_task) nomem; 193 TAILQ_ENTRY(accel_mlx5_dev) link; 194 struct accel_mlx5_stats stats; 195 }; 196 197 struct accel_mlx5_io_channel { 198 struct accel_mlx5_dev *devs; 199 struct spdk_poller *poller; 200 uint32_t num_devs; 201 /* Index in \b devs to be used for operations in round-robin way */ 202 uint32_t dev_idx; 203 }; 204 205 struct accel_mlx5_task_operations { 206 int (*init)(struct accel_mlx5_task *task); 207 int (*process)(struct accel_mlx5_task *task); 208 int (*cont)(struct accel_mlx5_task *task); 209 void (*complete)(struct accel_mlx5_task *task); 210 }; 211 212 struct accel_mlx5_psv_pool_iter_cb_args { 213 struct accel_mlx5_dev_ctx *dev; 214 struct spdk_rdma_utils_mem_map *map; 215 int rc; 216 }; 217 218 struct accel_mlx5_dump_stats_ctx { 219 struct accel_mlx5_stats total; 220 struct spdk_json_write_ctx *w; 221 enum accel_mlx5_dump_state_level level; 222 accel_mlx5_dump_stat_done_cb cb; 223 void *ctx; 224 }; 225 226 static struct accel_mlx5_module g_accel_mlx5; 227 228 static inline void 229 accel_mlx5_iov_sgl_init(struct accel_mlx5_iov_sgl *s, struct iovec *iov, uint32_t iovcnt) 230 { 231 s->iov = iov; 232 s->iovcnt = iovcnt; 233 s->iov_offset = 0; 234 } 235 236 static inline void 237 accel_mlx5_iov_sgl_advance(struct accel_mlx5_iov_sgl *s, uint32_t step) 238 { 239 s->iov_offset += step; 240 while (s->iovcnt > 0) { 241 assert(s->iov != NULL); 242 if (s->iov_offset < s->iov->iov_len) { 243 break; 244 } 245 246 s->iov_offset -= s->iov->iov_len; 247 s->iov++; 248 s->iovcnt--; 249 } 250 } 251 252 static inline void 253 accel_mlx5_iov_sgl_unwind(struct accel_mlx5_iov_sgl *s, uint32_t max_iovs, uint32_t step) 254 { 255 SPDK_DEBUGLOG(accel_mlx5, "iov %p, iovcnt %u, max %u, offset %u, step %u\n", s->iov, s->iovcnt, 256 max_iovs, s->iov_offset, step); 257 while (s->iovcnt <= max_iovs) { 258 assert(s->iov != NULL); 259 if (s->iov_offset >= step) { 260 s->iov_offset -= step; 261 SPDK_DEBUGLOG(accel_mlx5, "\tEND, iov %p, iovcnt %u, offset %u\n", s->iov, s->iovcnt, 262 s->iov_offset); 263 return; 264 } 265 step -= s->iov_offset; 266 s->iov--; 267 s->iovcnt++; 268 s->iov_offset = s->iov->iov_len; 269 SPDK_DEBUGLOG(accel_mlx5, "\tiov %p, iovcnt %u, offset %u, step %u\n", s->iov, s->iovcnt, 270 s->iov_offset, step); 271 } 272 273 SPDK_ERRLOG("Can't unwind iovs, remaining %u\n", step); 274 assert(0); 275 } 276 277 static inline int 278 accel_mlx5_sge_unwind(struct ibv_sge *sge, uint32_t sge_count, uint32_t step) 279 { 280 int i; 281 282 assert(sge_count > 0); 283 SPDK_DEBUGLOG(accel_mlx5, "sge %p, count %u, step %u\n", sge, sge_count, step); 284 for (i = (int)sge_count - 1; i >= 0; i--) { 285 if (sge[i].length > step) { 286 sge[i].length -= step; 287 SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step); 288 return (int)i + 1; 289 } 290 SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step); 291 step -= sge[i].length; 292 } 293 294 SPDK_ERRLOG("Can't unwind sge, remaining %u\n", step); 295 assert(step == 0); 296 297 return 0; 298 } 299 300 static inline void 301 accel_mlx5_crypto_task_complete(struct accel_mlx5_task *task) 302 { 303 struct accel_mlx5_dev *dev = task->qp->dev; 304 305 assert(task->num_ops); 306 spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops); 307 spdk_accel_task_complete(&task->base, 0); 308 } 309 310 static inline void 311 accel_mlx5_task_fail(struct accel_mlx5_task *task, int rc) 312 { 313 struct accel_mlx5_dev *dev = task->qp->dev; 314 315 assert(task->num_reqs == task->num_completed_reqs); 316 SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->base.op_code, rc); 317 318 if (task->num_ops) { 319 if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO) { 320 spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops); 321 } 322 if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRC32C) { 323 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops); 324 spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv); 325 } 326 } 327 spdk_accel_task_complete(&task->base, rc); 328 } 329 330 static int 331 accel_mlx5_translate_addr(void *addr, size_t size, struct spdk_memory_domain *domain, 332 void *domain_ctx, struct accel_mlx5_dev *dev, struct ibv_sge *sge) 333 { 334 struct spdk_rdma_utils_memory_translation map_translation; 335 struct spdk_memory_domain_translation_result domain_translation; 336 struct spdk_memory_domain_translation_ctx local_ctx; 337 int rc; 338 339 if (domain) { 340 domain_translation.size = sizeof(struct spdk_memory_domain_translation_result); 341 local_ctx.size = sizeof(local_ctx); 342 local_ctx.rdma.ibv_qp = dev->qp.verbs_qp; 343 rc = spdk_memory_domain_translate_data(domain, domain_ctx, dev->dev_ctx->domain, 344 &local_ctx, addr, size, &domain_translation); 345 if (spdk_unlikely(rc || domain_translation.iov_count != 1)) { 346 SPDK_ERRLOG("Memory domain translation failed, addr %p, length %zu, iovcnt %u\n", addr, size, 347 domain_translation.iov_count); 348 if (rc == 0) { 349 rc = -EINVAL; 350 } 351 352 return rc; 353 } 354 sge->lkey = domain_translation.rdma.lkey; 355 sge->addr = (uint64_t) domain_translation.iov.iov_base; 356 sge->length = domain_translation.iov.iov_len; 357 } else { 358 rc = spdk_rdma_utils_get_translation(dev->mmap, addr, size, 359 &map_translation); 360 if (spdk_unlikely(rc)) { 361 SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", addr, size); 362 return rc; 363 } 364 sge->lkey = spdk_rdma_utils_memory_translation_get_lkey(&map_translation); 365 sge->addr = (uint64_t)addr; 366 sge->length = size; 367 } 368 369 return 0; 370 } 371 372 static inline int 373 accel_mlx5_fill_block_sge(struct accel_mlx5_dev *dev, struct ibv_sge *sge, 374 struct accel_mlx5_iov_sgl *iovs, uint32_t len, uint32_t *_remaining, 375 struct spdk_memory_domain *domain, void *domain_ctx) 376 { 377 void *addr; 378 uint32_t remaining = len; 379 uint32_t size; 380 int i = 0; 381 int rc; 382 383 while (remaining && i < (int)ACCEL_MLX5_MAX_SGE) { 384 size = spdk_min(remaining, iovs->iov->iov_len - iovs->iov_offset); 385 addr = (void *)iovs->iov->iov_base + iovs->iov_offset; 386 rc = accel_mlx5_translate_addr(addr, size, domain, domain_ctx, dev, &sge[i]); 387 if (spdk_unlikely(rc)) { 388 return rc; 389 } 390 SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d]: lkey %u, len %u, addr %"PRIx64"\n", i, sge[i].lkey, 391 sge[i].length, sge[i].addr); 392 accel_mlx5_iov_sgl_advance(iovs, size); 393 i++; 394 assert(remaining >= size); 395 remaining -= size; 396 } 397 *_remaining = remaining; 398 399 return i; 400 } 401 402 static inline bool 403 accel_mlx5_compare_iovs(struct iovec *v1, struct iovec *v2, uint32_t iovcnt) 404 { 405 return memcmp(v1, v2, sizeof(*v1) * iovcnt) == 0; 406 } 407 408 static inline uint16_t 409 accel_mlx5_dev_get_available_slots(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp) 410 { 411 assert(qp->wrs_max >= qp->wrs_submitted); 412 assert(dev->wrs_in_cq_max >= dev->wrs_in_cq); 413 414 /* Each time we produce only 1 CQE, so we need 1 CQ slot */ 415 if (spdk_unlikely(dev->wrs_in_cq == dev->wrs_in_cq_max)) { 416 return 0; 417 } 418 419 return qp->wrs_max - qp->wrs_submitted; 420 } 421 422 static inline uint32_t 423 accel_mlx5_task_alloc_mkeys(struct accel_mlx5_task *task, struct spdk_mlx5_mkey_pool *pool) 424 { 425 uint32_t num_ops; 426 int rc; 427 428 assert(task->num_reqs > task->num_completed_reqs); 429 num_ops = task->num_reqs - task->num_completed_reqs; 430 num_ops = spdk_min(num_ops, ACCEL_MLX5_MAX_MKEYS_IN_TASK); 431 if (!num_ops) { 432 return 0; 433 } 434 rc = spdk_mlx5_mkey_pool_get_bulk(pool, task->mkeys, num_ops); 435 if (spdk_unlikely(rc)) { 436 return 0; 437 } 438 assert(num_ops <= UINT16_MAX); 439 task->num_ops = num_ops; 440 441 return num_ops; 442 } 443 444 static inline uint8_t 445 bs_to_bs_selector(uint32_t bs) 446 { 447 switch (bs) { 448 case 512: 449 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_512; 450 case 520: 451 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_520; 452 case 4096: 453 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4096; 454 case 4160: 455 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4160; 456 default: 457 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED; 458 } 459 } 460 461 static inline int 462 accel_mlx5_configure_crypto_umr(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge, 463 uint32_t mkey, uint32_t num_blocks, struct spdk_mlx5_crypto_dek_data *dek_data) 464 { 465 struct spdk_mlx5_umr_crypto_attr cattr; 466 struct spdk_mlx5_umr_attr umr_attr; 467 struct accel_mlx5_qp *qp = mlx5_task->qp; 468 struct accel_mlx5_dev *dev = qp->dev; 469 struct spdk_accel_task *task = &mlx5_task->base; 470 uint32_t length, remaining = 0, block_size = task->block_size; 471 int rc; 472 473 length = num_blocks * block_size; 474 SPDK_DEBUGLOG(accel_mlx5, "task %p, domain %p, len %u, blocks %u\n", task, task->src_domain, length, 475 num_blocks); 476 rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src, length, &remaining, 477 task->src_domain, task->src_domain_ctx); 478 if (spdk_unlikely(rc <= 0)) { 479 if (rc == 0) { 480 rc = -EINVAL; 481 } 482 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 483 return rc; 484 } 485 sge->src_sge_count = rc; 486 if (spdk_unlikely(remaining)) { 487 uint32_t new_len = length - remaining; 488 uint32_t aligned_len, updated_num_blocks; 489 490 SPDK_DEBUGLOG(accel_mlx5, "Incorrect src iovs, handled %u out of %u bytes\n", new_len, length); 491 if (new_len < block_size) { 492 /* We need to process at least 1 block. If buffer is too fragmented, we can't do 493 * anything */ 494 return -ERANGE; 495 } 496 497 /* Regular integer division, we need to round down to prev block size */ 498 updated_num_blocks = new_len / block_size; 499 assert(updated_num_blocks); 500 assert(updated_num_blocks < num_blocks); 501 aligned_len = updated_num_blocks * block_size; 502 503 if (aligned_len < new_len) { 504 uint32_t dt = new_len - aligned_len; 505 506 /* We can't process part of block, need to unwind src iov_sgl and sge to the 507 * prev block boundary */ 508 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt); 509 accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt); 510 sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt); 511 if (!sge->src_sge_count) { 512 return -ERANGE; 513 } 514 } 515 SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len); 516 length = aligned_len; 517 num_blocks = updated_num_blocks; 518 } 519 520 cattr.xts_iv = task->iv + mlx5_task->num_processed_blocks; 521 cattr.keytag = 0; 522 cattr.dek_obj_id = dek_data->dek_obj_id; 523 cattr.tweak_mode = dek_data->tweak_mode; 524 cattr.enc_order = mlx5_task->enc_order; 525 cattr.bs_selector = bs_to_bs_selector(mlx5_task->base.block_size); 526 if (spdk_unlikely(cattr.bs_selector == SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED)) { 527 SPDK_ERRLOG("unsupported block size %u\n", mlx5_task->base.block_size); 528 return -EINVAL; 529 } 530 umr_attr.mkey = mkey; 531 umr_attr.sge = sge->src_sge; 532 533 if (!mlx5_task->inplace) { 534 SPDK_DEBUGLOG(accel_mlx5, "task %p, dst sge, domain %p, len %u\n", task, task->dst_domain, length); 535 rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, length, &remaining, 536 task->dst_domain, task->dst_domain_ctx); 537 if (spdk_unlikely(rc <= 0)) { 538 if (rc == 0) { 539 rc = -EINVAL; 540 } 541 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 542 return rc; 543 } 544 sge->dst_sge_count = rc; 545 if (spdk_unlikely(remaining)) { 546 uint32_t new_len = length - remaining; 547 uint32_t aligned_len, updated_num_blocks, dt; 548 549 SPDK_DEBUGLOG(accel_mlx5, "Incorrect dst iovs, handled %u out of %u bytes\n", new_len, length); 550 if (new_len < block_size) { 551 /* We need to process at least 1 block. If buffer is too fragmented, we can't do 552 * anything */ 553 return -ERANGE; 554 } 555 556 /* Regular integer division, we need to round down to prev block size */ 557 updated_num_blocks = new_len / block_size; 558 assert(updated_num_blocks); 559 assert(updated_num_blocks < num_blocks); 560 aligned_len = updated_num_blocks * block_size; 561 562 if (aligned_len < new_len) { 563 dt = new_len - aligned_len; 564 assert(dt > 0 && dt < length); 565 /* We can't process part of block, need to unwind src and dst iov_sgl and sge to the 566 * prev block boundary */ 567 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind dst sge for %u bytes\n", task, dt); 568 accel_mlx5_iov_sgl_unwind(&mlx5_task->dst, task->d.iovcnt, dt); 569 sge->dst_sge_count = accel_mlx5_sge_unwind(sge->dst_sge, sge->dst_sge_count, dt); 570 assert(sge->dst_sge_count > 0 && sge->dst_sge_count <= ACCEL_MLX5_MAX_SGE); 571 if (!sge->dst_sge_count) { 572 return -ERANGE; 573 } 574 } 575 assert(length > aligned_len); 576 dt = length - aligned_len; 577 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt); 578 /* The same for src iov_sgl and sge. In worst case we can unwind SRC 2 times */ 579 accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt); 580 sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt); 581 assert(sge->src_sge_count > 0 && sge->src_sge_count <= ACCEL_MLX5_MAX_SGE); 582 if (!sge->src_sge_count) { 583 return -ERANGE; 584 } 585 SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len); 586 length = aligned_len; 587 num_blocks = updated_num_blocks; 588 } 589 } 590 591 SPDK_DEBUGLOG(accel_mlx5, 592 "task %p: bs %u, iv %"PRIu64", enc_on_tx %d, tweak_mode %d, len %u, mkey %x, blocks %u\n", 593 mlx5_task, task->block_size, cattr.xts_iv, mlx5_task->enc_order, cattr.tweak_mode, length, mkey, 594 num_blocks); 595 596 umr_attr.sge_count = sge->src_sge_count; 597 umr_attr.umr_len = length; 598 assert((uint32_t)mlx5_task->num_processed_blocks + num_blocks <= UINT16_MAX); 599 mlx5_task->num_processed_blocks += num_blocks; 600 601 rc = spdk_mlx5_umr_configure_crypto(qp->qp, &umr_attr, &cattr, 0, 0); 602 603 return rc; 604 } 605 606 static inline int 607 accel_mlx5_crypto_task_process(struct accel_mlx5_task *mlx5_task) 608 { 609 struct accel_mlx5_sge sges[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 610 struct spdk_mlx5_crypto_dek_data dek_data; 611 struct accel_mlx5_qp *qp = mlx5_task->qp; 612 struct accel_mlx5_dev *dev = qp->dev; 613 /* First RDMA after UMR must have a SMALL_FENCE */ 614 uint32_t first_rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE; 615 uint16_t num_blocks; 616 uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs, 617 mlx5_task->num_ops); 618 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 619 uint16_t i; 620 int rc; 621 622 assert(qp_slot > 1); 623 num_ops = spdk_min(num_ops, qp_slot >> 1); 624 if (spdk_unlikely(!num_ops)) { 625 return -EINVAL; 626 } 627 628 rc = spdk_mlx5_crypto_get_dek_data(mlx5_task->base.crypto_key->priv, dev->dev_ctx->pd, &dek_data); 629 if (spdk_unlikely(rc)) { 630 return rc; 631 } 632 633 mlx5_task->num_wrs = 0; 634 SPDK_DEBUGLOG(accel_mlx5, "begin, task, %p, reqs: total %u, submitted %u, completed %u\n", 635 mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 636 for (i = 0; i < num_ops; i++) { 637 if (mlx5_task->num_submitted_reqs + i + 1 == mlx5_task->num_reqs) { 638 /* Last request may consume less than calculated if crypto_multi_block is true */ 639 assert(mlx5_task->num_blocks > mlx5_task->num_submitted_reqs); 640 num_blocks = mlx5_task->num_blocks - mlx5_task->num_processed_blocks; 641 } else { 642 num_blocks = mlx5_task->blocks_per_req; 643 } 644 645 rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sges[i], mlx5_task->mkeys[i]->mkey, num_blocks, 646 &dek_data); 647 if (spdk_unlikely(rc)) { 648 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 649 return rc; 650 } 651 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 652 dev->stats.crypto_umrs++; 653 } 654 655 /* Loop `num_ops - 1` for easy flags handling */ 656 for (i = 0; i < num_ops - 1; i++) { 657 /* UMR is used as a destination for RDMA_READ - from UMR to sge */ 658 if (mlx5_task->inplace) { 659 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0, 660 mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence); 661 } else { 662 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0, 663 mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence); 664 } 665 if (spdk_unlikely(rc)) { 666 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 667 return rc; 668 } 669 670 first_rdma_fence = 0; 671 assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs); 672 assert(mlx5_task->num_submitted_reqs < UINT16_MAX); 673 mlx5_task->num_submitted_reqs++; 674 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 675 dev->stats.rdma_reads++; 676 } 677 678 if (mlx5_task->inplace) { 679 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0, 680 mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 681 } else { 682 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0, 683 mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 684 } 685 if (spdk_unlikely(rc)) { 686 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 687 return rc; 688 } 689 690 assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs); 691 assert(mlx5_task->num_submitted_reqs < UINT16_MAX); 692 mlx5_task->num_submitted_reqs++; 693 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 694 dev->stats.rdma_reads++; 695 STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link); 696 697 if (spdk_unlikely(mlx5_task->num_submitted_reqs == mlx5_task->num_reqs && 698 mlx5_task->num_blocks > mlx5_task->num_processed_blocks)) { 699 /* We hit "out of sge 700 * entries" case with highly fragmented payload. In that case 701 * accel_mlx5_configure_crypto_umr function handled fewer data blocks than expected 702 * That means we need at least 1 more request to complete this task, this request will be 703 * executed once all submitted ones are completed */ 704 SPDK_DEBUGLOG(accel_mlx5, "task %p, processed %u/%u blocks, add extra req\n", mlx5_task, 705 mlx5_task->num_processed_blocks, mlx5_task->num_blocks); 706 mlx5_task->num_reqs++; 707 } 708 709 SPDK_DEBUGLOG(accel_mlx5, "end, task, %p, reqs: total %u, submitted %u, completed %u\n", mlx5_task, 710 mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 711 712 return 0; 713 } 714 715 static inline int 716 accel_mlx5_crypto_task_continue(struct accel_mlx5_task *task) 717 { 718 struct accel_mlx5_qp *qp = task->qp; 719 struct accel_mlx5_dev *dev = qp->dev; 720 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 721 722 assert(task->num_reqs > task->num_completed_reqs); 723 if (task->num_ops == 0) { 724 /* No mkeys allocated, try to allocate now */ 725 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task, dev->crypto_mkeys))) { 726 /* Pool is empty, queue this task */ 727 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 728 dev->stats.nomem_mkey++; 729 return -ENOMEM; 730 } 731 } 732 /* We need to post at least 1 UMR and 1 RDMA operation */ 733 if (spdk_unlikely(qp_slot < 2)) { 734 /* QP is full, queue this task */ 735 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 736 task->qp->dev->stats.nomem_qdepth++; 737 return -ENOMEM; 738 } 739 740 return accel_mlx5_crypto_task_process(task); 741 } 742 743 static inline int 744 accel_mlx5_crypto_task_init(struct accel_mlx5_task *mlx5_task) 745 { 746 struct spdk_accel_task *task = &mlx5_task->base; 747 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 748 uint64_t src_nbytes = task->nbytes; 749 #ifdef DEBUG 750 uint64_t dst_nbytes; 751 uint32_t i; 752 #endif 753 bool crypto_key_ok; 754 755 crypto_key_ok = (task->crypto_key && task->crypto_key->module_if == &g_accel_mlx5.module && 756 task->crypto_key->priv); 757 if (spdk_unlikely((task->nbytes % mlx5_task->base.block_size != 0) || !crypto_key_ok)) { 758 if (crypto_key_ok) { 759 SPDK_ERRLOG("src length %"PRIu64" is not a multiple of the block size %u\n", task->nbytes, 760 mlx5_task->base.block_size); 761 } else { 762 SPDK_ERRLOG("Wrong crypto key provided\n"); 763 } 764 return -EINVAL; 765 } 766 767 assert(src_nbytes / mlx5_task->base.block_size <= UINT16_MAX); 768 mlx5_task->num_blocks = src_nbytes / mlx5_task->base.block_size; 769 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 770 if (task->d.iovcnt == 0 || (task->d.iovcnt == task->s.iovcnt && 771 accel_mlx5_compare_iovs(task->d.iovs, task->s.iovs, task->s.iovcnt))) { 772 mlx5_task->inplace = 1; 773 } else { 774 #ifdef DEBUG 775 dst_nbytes = 0; 776 for (i = 0; i < task->d.iovcnt; i++) { 777 dst_nbytes += task->d.iovs[i].iov_len; 778 } 779 780 if (spdk_unlikely(src_nbytes != dst_nbytes)) { 781 return -EINVAL; 782 } 783 #endif 784 mlx5_task->inplace = 0; 785 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 786 } 787 788 if (dev->crypto_multi_block) { 789 if (dev->crypto_split_blocks) { 790 assert(SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks) <= UINT16_MAX); 791 mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks); 792 /* Last req may consume less blocks */ 793 mlx5_task->blocks_per_req = spdk_min(mlx5_task->num_blocks, dev->crypto_split_blocks); 794 } else { 795 if (task->s.iovcnt > ACCEL_MLX5_MAX_SGE || task->d.iovcnt > ACCEL_MLX5_MAX_SGE) { 796 uint32_t max_sge_count = spdk_max(task->s.iovcnt, task->d.iovcnt); 797 798 assert(SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE) <= UINT16_MAX); 799 mlx5_task->num_reqs = SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE); 800 mlx5_task->blocks_per_req = SPDK_CEIL_DIV(mlx5_task->num_blocks, mlx5_task->num_reqs); 801 } else { 802 mlx5_task->num_reqs = 1; 803 mlx5_task->blocks_per_req = mlx5_task->num_blocks; 804 } 805 } 806 } else { 807 mlx5_task->num_reqs = mlx5_task->num_blocks; 808 mlx5_task->blocks_per_req = 1; 809 } 810 811 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(mlx5_task, dev->crypto_mkeys))) { 812 /* Pool is empty, queue this task */ 813 SPDK_DEBUGLOG(accel_mlx5, "no reqs in pool, dev %s\n", dev->dev_ctx->context->device->name); 814 dev->stats.nomem_mkey++; 815 return -ENOMEM; 816 } 817 if (spdk_unlikely(accel_mlx5_dev_get_available_slots(dev, &dev->qp) < 2)) { 818 /* Queue is full, queue this task */ 819 SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", dev->dev_ctx->context->device->name, 820 mlx5_task->qp); 821 dev->stats.nomem_qdepth++; 822 return -ENOMEM; 823 } 824 825 SPDK_DEBUGLOG(accel_mlx5, "task %p, src_iovs %u, dst_iovs %u, num_reqs %u, " 826 "blocks/req %u, blocks %u, inplace %d\n", task, task->s.iovcnt, task->d.iovcnt, 827 mlx5_task->num_reqs, mlx5_task->blocks_per_req, mlx5_task->num_blocks, mlx5_task->inplace); 828 829 return 0; 830 } 831 832 static inline void 833 accel_mlx5_copy_task_complete(struct accel_mlx5_task *mlx5_task) 834 { 835 spdk_accel_task_complete(&mlx5_task->base, 0); 836 } 837 838 static inline int 839 accel_mlx5_copy_task_process_one(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_qp *qp, 840 uint64_t wrid, uint32_t fence) 841 { 842 struct spdk_accel_task *task = &mlx5_task->base; 843 struct accel_mlx5_sge sge; 844 uint32_t remaining = 0; 845 uint32_t dst_len; 846 int rc; 847 848 /* Limit one RDMA_WRITE by length of dst buffer. Not all src buffers may fit into one dst buffer due to 849 * limitation on ACCEL_MLX5_MAX_SGE. If this is the case then remaining is not zero */ 850 assert(mlx5_task->dst.iov->iov_len > mlx5_task->dst.iov_offset); 851 dst_len = mlx5_task->dst.iov->iov_len - mlx5_task->dst.iov_offset; 852 rc = accel_mlx5_fill_block_sge(qp->dev, sge.src_sge, &mlx5_task->src, dst_len, &remaining, 853 task->src_domain, task->src_domain_ctx); 854 if (spdk_unlikely(rc <= 0)) { 855 if (rc == 0) { 856 rc = -EINVAL; 857 } 858 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 859 return rc; 860 } 861 sge.src_sge_count = rc; 862 assert(dst_len > remaining); 863 dst_len -= remaining; 864 865 rc = accel_mlx5_fill_block_sge(qp->dev, sge.dst_sge, &mlx5_task->dst, dst_len, &remaining, 866 task->dst_domain, task->dst_domain_ctx); 867 if (spdk_unlikely(rc != 1)) { 868 /* We use single dst entry, any result other than 1 is an error */ 869 if (rc == 0) { 870 rc = -EINVAL; 871 } 872 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 873 return rc; 874 } 875 if (spdk_unlikely(remaining)) { 876 SPDK_ERRLOG("Incorrect dst length, remaining %u\n", remaining); 877 assert(0); 878 return -EINVAL; 879 } 880 881 rc = spdk_mlx5_qp_rdma_write(mlx5_task->qp->qp, sge.src_sge, sge.src_sge_count, 882 sge.dst_sge[0].addr, sge.dst_sge[0].lkey, wrid, fence); 883 if (spdk_unlikely(rc)) { 884 SPDK_ERRLOG("new RDMA WRITE failed with %d\n", rc); 885 return rc; 886 } 887 qp->dev->stats.rdma_writes++; 888 889 return 0; 890 } 891 892 static inline int 893 accel_mlx5_copy_task_process(struct accel_mlx5_task *mlx5_task) 894 { 895 896 struct accel_mlx5_qp *qp = mlx5_task->qp; 897 struct accel_mlx5_dev *dev = qp->dev; 898 uint16_t i; 899 int rc; 900 901 mlx5_task->num_wrs = 0; 902 assert(mlx5_task->num_reqs > 0); 903 assert(mlx5_task->num_ops > 0); 904 905 /* Handle n-1 reqs in order to simplify wrid and fence handling */ 906 for (i = 0; i < mlx5_task->num_ops - 1; i++) { 907 rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, 0, 0); 908 if (spdk_unlikely(rc)) { 909 return rc; 910 } 911 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 912 mlx5_task->num_submitted_reqs++; 913 } 914 915 rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, (uint64_t)mlx5_task, 916 SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 917 if (spdk_unlikely(rc)) { 918 return rc; 919 } 920 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 921 mlx5_task->num_submitted_reqs++; 922 STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link); 923 924 SPDK_DEBUGLOG(accel_mlx5, "end, copy task, %p\n", mlx5_task); 925 926 return 0; 927 } 928 929 static inline int 930 accel_mlx5_copy_task_continue(struct accel_mlx5_task *task) 931 { 932 struct accel_mlx5_qp *qp = task->qp; 933 struct accel_mlx5_dev *dev = qp->dev; 934 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 935 936 task->num_ops = spdk_min(qp_slot, task->num_reqs - task->num_completed_reqs); 937 if (spdk_unlikely(task->num_ops == 0)) { 938 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 939 dev->stats.nomem_qdepth++; 940 return -ENOMEM; 941 } 942 return accel_mlx5_copy_task_process(task); 943 } 944 945 static inline uint32_t 946 accel_mlx5_get_copy_task_count(struct iovec *src_iov, uint32_t src_iovcnt, 947 struct iovec *dst_iov, uint32_t dst_iovcnt) 948 { 949 uint32_t src = 0; 950 uint32_t dst = 0; 951 uint64_t src_offset = 0; 952 uint64_t dst_offset = 0; 953 uint32_t num_ops = 0; 954 uint32_t src_sge_count = 0; 955 956 while (src < src_iovcnt && dst < dst_iovcnt) { 957 uint64_t src_len = src_iov[src].iov_len - src_offset; 958 uint64_t dst_len = dst_iov[dst].iov_len - dst_offset; 959 960 if (dst_len < src_len) { 961 dst_offset = 0; 962 src_offset += dst_len; 963 dst++; 964 num_ops++; 965 src_sge_count = 0; 966 } else if (src_len < dst_len) { 967 dst_offset += src_len; 968 src_offset = 0; 969 src++; 970 if (++src_sge_count >= ACCEL_MLX5_MAX_SGE) { 971 num_ops++; 972 src_sge_count = 0; 973 } 974 } else { 975 dst_offset = 0; 976 src_offset = 0; 977 dst++; 978 src++; 979 num_ops++; 980 src_sge_count = 0; 981 } 982 } 983 984 assert(src == src_iovcnt); 985 assert(dst == dst_iovcnt); 986 assert(src_offset == 0); 987 assert(dst_offset == 0); 988 return num_ops; 989 } 990 991 static inline int 992 accel_mlx5_copy_task_init(struct accel_mlx5_task *mlx5_task) 993 { 994 struct spdk_accel_task *task = &mlx5_task->base; 995 struct accel_mlx5_qp *qp = mlx5_task->qp; 996 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp); 997 998 if (spdk_likely(task->s.iovcnt <= ACCEL_MLX5_MAX_SGE)) { 999 mlx5_task->num_reqs = task->d.iovcnt; 1000 } else if (task->d.iovcnt == 1) { 1001 mlx5_task->num_reqs = SPDK_CEIL_DIV(task->s.iovcnt, ACCEL_MLX5_MAX_SGE); 1002 } else { 1003 mlx5_task->num_reqs = accel_mlx5_get_copy_task_count(task->s.iovs, task->s.iovcnt, 1004 task->d.iovs, task->d.iovcnt); 1005 } 1006 mlx5_task->inplace = 0; 1007 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 1008 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 1009 mlx5_task->num_ops = spdk_min(qp_slot, mlx5_task->num_reqs); 1010 if (spdk_unlikely(!mlx5_task->num_ops)) { 1011 qp->dev->stats.nomem_qdepth++; 1012 return -ENOMEM; 1013 } 1014 SPDK_DEBUGLOG(accel_mlx5, "copy task num_reqs %u, num_ops %u\n", mlx5_task->num_reqs, 1015 mlx5_task->num_ops); 1016 1017 return 0; 1018 } 1019 1020 static inline uint32_t 1021 accel_mlx5_advance_iovec(struct iovec *iov, uint32_t iovcnt, size_t *iov_offset, size_t *len) 1022 { 1023 uint32_t i; 1024 size_t iov_len; 1025 1026 for (i = 0; *len != 0 && i < iovcnt; i++) { 1027 iov_len = iov[i].iov_len - *iov_offset; 1028 1029 if (iov_len < *len) { 1030 *iov_offset = 0; 1031 *len -= iov_len; 1032 continue; 1033 } 1034 if (iov_len == *len) { 1035 *iov_offset = 0; 1036 i++; 1037 } else { /* iov_len > *len */ 1038 *iov_offset += *len; 1039 } 1040 *len = 0; 1041 break; 1042 } 1043 1044 return i; 1045 } 1046 1047 static inline void 1048 accel_mlx5_crc_task_complete(struct accel_mlx5_task *mlx5_task) 1049 { 1050 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 1051 1052 *mlx5_task->base.crc_dst = mlx5_task->psv->crc ^ UINT32_MAX; 1053 /* Normal task completion without allocated mkeys is not possible */ 1054 assert(mlx5_task->num_ops); 1055 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, mlx5_task->mkeys, mlx5_task->num_ops); 1056 spdk_mempool_put(dev->dev_ctx->psv_pool, mlx5_task->psv); 1057 spdk_accel_task_complete(&mlx5_task->base, 0); 1058 } 1059 1060 static inline int 1061 accel_mlx5_crc_task_configure_umr(struct accel_mlx5_task *mlx5_task, struct ibv_sge *sge, 1062 uint32_t sge_count, struct spdk_mlx5_mkey_pool_obj *mkey, 1063 enum spdk_mlx5_umr_sig_domain sig_domain, uint32_t umr_len, 1064 bool sig_init, bool sig_check_gen) 1065 { 1066 struct spdk_mlx5_umr_sig_attr sattr = { 1067 .seed = mlx5_task->base.seed ^ UINT32_MAX, 1068 .psv_index = mlx5_task->psv->psv_index, 1069 .domain = sig_domain, 1070 .sigerr_count = mkey->sig.sigerr_count, 1071 .raw_data_size = umr_len, 1072 .init = sig_init, 1073 .check_gen = sig_check_gen, 1074 }; 1075 struct spdk_mlx5_umr_attr umr_attr = { 1076 .mkey = mkey->mkey, 1077 .umr_len = umr_len, 1078 .sge_count = sge_count, 1079 .sge = sge, 1080 }; 1081 1082 return spdk_mlx5_umr_configure_sig(mlx5_task->qp->qp, &umr_attr, &sattr, 0, 0); 1083 } 1084 1085 static inline int 1086 accel_mlx5_crc_task_fill_sge(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge) 1087 { 1088 struct spdk_accel_task *task = &mlx5_task->base; 1089 struct accel_mlx5_qp *qp = mlx5_task->qp; 1090 struct accel_mlx5_dev *dev = qp->dev; 1091 uint32_t remaining; 1092 int rc; 1093 1094 rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src, task->nbytes, &remaining, 1095 task->src_domain, task->src_domain_ctx); 1096 if (spdk_unlikely(rc <= 0)) { 1097 if (rc == 0) { 1098 rc = -EINVAL; 1099 } 1100 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 1101 return rc; 1102 } 1103 assert(remaining == 0); 1104 sge->src_sge_count = rc; 1105 1106 if (!mlx5_task->inplace) { 1107 rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, task->nbytes, &remaining, 1108 task->dst_domain, task->dst_domain_ctx); 1109 if (spdk_unlikely(rc <= 0)) { 1110 if (rc == 0) { 1111 rc = -EINVAL; 1112 } 1113 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 1114 return rc; 1115 } 1116 assert(remaining == 0); 1117 sge->dst_sge_count = rc; 1118 } 1119 1120 return 0; 1121 } 1122 1123 static inline int 1124 accel_mlx5_crc_task_process_one_req(struct accel_mlx5_task *mlx5_task) 1125 { 1126 struct accel_mlx5_sge sges; 1127 struct accel_mlx5_qp *qp = mlx5_task->qp; 1128 struct accel_mlx5_dev *dev = qp->dev; 1129 uint32_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs, 1130 mlx5_task->num_ops); 1131 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1132 uint32_t rdma_fence = SPDK_MLX5_WQE_CTRL_STRONG_ORDERING; 1133 struct ibv_sge *sge; 1134 int rc; 1135 uint16_t sge_count; 1136 1137 num_ops = spdk_min(num_ops, qp_slot >> 1); 1138 if (spdk_unlikely(!num_ops)) { 1139 return -EINVAL; 1140 } 1141 1142 mlx5_task->num_wrs = 0; 1143 /* At this moment we have as many requests as can be submitted to a qp */ 1144 rc = accel_mlx5_crc_task_fill_sge(mlx5_task, &sges); 1145 if (spdk_unlikely(rc)) { 1146 return rc; 1147 } 1148 rc = accel_mlx5_crc_task_configure_umr(mlx5_task, sges.src_sge, sges.src_sge_count, 1149 mlx5_task->mkeys[0], SPDK_MLX5_UMR_SIG_DOMAIN_WIRE, mlx5_task->base.nbytes, true, true); 1150 if (spdk_unlikely(rc)) { 1151 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 1152 return rc; 1153 } 1154 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1155 dev->stats.sig_umrs++; 1156 1157 if (mlx5_task->inplace) { 1158 sge = sges.src_sge; 1159 sge_count = sges.src_sge_count; 1160 } else { 1161 sge = sges.dst_sge; 1162 sge_count = sges.dst_sge_count; 1163 } 1164 1165 /* 1166 * Add the crc destination to the end of sges. A free entry must be available for CRC 1167 * because the task init function reserved it. 1168 */ 1169 assert(sge_count < ACCEL_MLX5_MAX_SGE); 1170 sge[sge_count].lkey = mlx5_task->psv->crc_lkey; 1171 sge[sge_count].addr = (uintptr_t)&mlx5_task->psv->crc; 1172 sge[sge_count++].length = sizeof(uint32_t); 1173 1174 if (spdk_unlikely(mlx5_task->psv->bits.error)) { 1175 rc = spdk_mlx5_qp_set_psv(qp->qp, mlx5_task->psv->psv_index, *mlx5_task->base.crc_dst, 0, 0); 1176 if (spdk_unlikely(rc)) { 1177 SPDK_ERRLOG("SET_PSV failed with %d\n", rc); 1178 return rc; 1179 } 1180 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1181 } 1182 1183 rc = spdk_mlx5_qp_rdma_read(qp->qp, sge, sge_count, 0, mlx5_task->mkeys[0]->mkey, 1184 (uint64_t)mlx5_task, rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 1185 if (spdk_unlikely(rc)) { 1186 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 1187 return rc; 1188 } 1189 mlx5_task->num_submitted_reqs++; 1190 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 1191 dev->stats.rdma_reads++; 1192 1193 return 0; 1194 } 1195 1196 static inline int 1197 accel_mlx5_crc_task_fill_umr_sge(struct accel_mlx5_qp *qp, struct ibv_sge *sge, 1198 struct accel_mlx5_iov_sgl *umr_iovs, struct spdk_memory_domain *domain, 1199 void *domain_ctx, struct accel_mlx5_iov_sgl *rdma_iovs, size_t *len) 1200 { 1201 int umr_idx = 0; 1202 int rdma_idx = 0; 1203 int umr_iovcnt = spdk_min(umr_iovs->iovcnt, (int)ACCEL_MLX5_MAX_SGE); 1204 int rdma_iovcnt = spdk_min(rdma_iovs->iovcnt, (int)ACCEL_MLX5_MAX_SGE); 1205 size_t umr_iov_offset; 1206 size_t rdma_iov_offset; 1207 size_t umr_len = 0; 1208 void *sge_addr; 1209 size_t sge_len; 1210 size_t umr_sge_len; 1211 size_t rdma_sge_len; 1212 int rc; 1213 1214 umr_iov_offset = umr_iovs->iov_offset; 1215 rdma_iov_offset = rdma_iovs->iov_offset; 1216 1217 while (umr_idx < umr_iovcnt && rdma_idx < rdma_iovcnt) { 1218 umr_sge_len = umr_iovs->iov[umr_idx].iov_len - umr_iov_offset; 1219 rdma_sge_len = rdma_iovs->iov[rdma_idx].iov_len - rdma_iov_offset; 1220 sge_addr = umr_iovs->iov[umr_idx].iov_base + umr_iov_offset; 1221 1222 if (umr_sge_len == rdma_sge_len) { 1223 rdma_idx++; 1224 umr_iov_offset = 0; 1225 rdma_iov_offset = 0; 1226 sge_len = umr_sge_len; 1227 } else if (umr_sge_len < rdma_sge_len) { 1228 umr_iov_offset = 0; 1229 rdma_iov_offset += umr_sge_len; 1230 sge_len = umr_sge_len; 1231 } else { 1232 size_t remaining; 1233 1234 remaining = umr_sge_len - rdma_sge_len; 1235 while (remaining) { 1236 rdma_idx++; 1237 if (rdma_idx == (int)ACCEL_MLX5_MAX_SGE) { 1238 break; 1239 } 1240 rdma_sge_len = rdma_iovs->iov[rdma_idx].iov_len; 1241 if (remaining == rdma_sge_len) { 1242 rdma_idx++; 1243 rdma_iov_offset = 0; 1244 umr_iov_offset = 0; 1245 remaining = 0; 1246 break; 1247 } 1248 if (remaining < rdma_sge_len) { 1249 rdma_iov_offset = remaining; 1250 umr_iov_offset = 0; 1251 remaining = 0; 1252 break; 1253 } 1254 remaining -= rdma_sge_len; 1255 } 1256 sge_len = umr_sge_len - remaining; 1257 } 1258 rc = accel_mlx5_translate_addr(sge_addr, sge_len, domain, domain_ctx, qp->dev, &sge[umr_idx]); 1259 if (spdk_unlikely(rc)) { 1260 return -EINVAL; 1261 } 1262 SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d] lkey %u, addr %p, len %u\n", umr_idx, sge[umr_idx].lkey, 1263 (void *)sge[umr_idx].addr, sge[umr_idx].length); 1264 umr_len += sge_len; 1265 umr_idx++; 1266 } 1267 accel_mlx5_iov_sgl_advance(umr_iovs, umr_len); 1268 accel_mlx5_iov_sgl_advance(rdma_iovs, umr_len); 1269 *len = umr_len; 1270 1271 return umr_idx; 1272 } 1273 1274 static inline int 1275 accel_mlx5_crc_task_process_multi_req(struct accel_mlx5_task *mlx5_task) 1276 { 1277 size_t umr_len[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 1278 struct ibv_sge sges[ACCEL_MLX5_MAX_SGE]; 1279 struct spdk_accel_task *task = &mlx5_task->base; 1280 struct accel_mlx5_qp *qp = mlx5_task->qp; 1281 struct accel_mlx5_dev *dev = qp->dev; 1282 struct accel_mlx5_iov_sgl umr_sgl; 1283 struct accel_mlx5_iov_sgl *umr_sgl_ptr; 1284 struct accel_mlx5_iov_sgl rdma_sgl; 1285 uint64_t umr_offset; 1286 uint32_t rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE; 1287 int sge_count; 1288 uint32_t remaining; 1289 int rc; 1290 uint16_t i; 1291 uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs, 1292 mlx5_task->num_ops); 1293 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1294 bool sig_init, sig_check_gen = false; 1295 1296 num_ops = spdk_min(num_ops, qp_slot >> 1); 1297 if (spdk_unlikely(!num_ops)) { 1298 return -EINVAL; 1299 } 1300 /* Init signature on the first UMR */ 1301 sig_init = !mlx5_task->num_submitted_reqs; 1302 1303 /* 1304 * accel_mlx5_crc_task_fill_umr_sge() and accel_mlx5_fill_block_sge() advance an IOV during iteration 1305 * on it. We must copy accel_mlx5_iov_sgl to iterate twice or more on the same IOV. 1306 * 1307 * In the in-place case, we iterate on the source IOV three times. That's why we need two copies of 1308 * the source accel_mlx5_iov_sgl. 1309 * 1310 * In the out-of-place case, we iterate on the source IOV once and on the destination IOV two times. 1311 * So, we need one copy of the destination accel_mlx5_iov_sgl. 1312 */ 1313 if (mlx5_task->inplace) { 1314 accel_mlx5_iov_sgl_init(&umr_sgl, mlx5_task->src.iov, mlx5_task->src.iovcnt); 1315 umr_sgl_ptr = &umr_sgl; 1316 accel_mlx5_iov_sgl_init(&rdma_sgl, mlx5_task->src.iov, mlx5_task->src.iovcnt); 1317 } else { 1318 umr_sgl_ptr = &mlx5_task->src; 1319 accel_mlx5_iov_sgl_init(&rdma_sgl, mlx5_task->dst.iov, mlx5_task->dst.iovcnt); 1320 } 1321 mlx5_task->num_wrs = 0; 1322 for (i = 0; i < num_ops; i++) { 1323 /* 1324 * The last request may have only CRC. Skip UMR in this case because the MKey from 1325 * the previous request is used. 1326 */ 1327 if (umr_sgl_ptr->iovcnt == 0) { 1328 assert((mlx5_task->num_completed_reqs + i + 1) == mlx5_task->num_reqs); 1329 break; 1330 } 1331 sge_count = accel_mlx5_crc_task_fill_umr_sge(qp, sges, umr_sgl_ptr, task->src_domain, 1332 task->src_domain_ctx, &rdma_sgl, &umr_len[i]); 1333 if (spdk_unlikely(sge_count <= 0)) { 1334 rc = (sge_count == 0) ? -EINVAL : sge_count; 1335 SPDK_ERRLOG("failed set UMR sge, rc %d\n", rc); 1336 return rc; 1337 } 1338 if (umr_sgl_ptr->iovcnt == 0) { 1339 /* 1340 * We post RDMA without UMR if the last request has only CRC. We use an MKey from 1341 * the last UMR in this case. Since the last request can be postponed to the next 1342 * call of this function, we must save the MKey to the task structure. 1343 */ 1344 mlx5_task->last_umr_len = umr_len[i]; 1345 mlx5_task->last_mkey_idx = i; 1346 sig_check_gen = true; 1347 } 1348 rc = accel_mlx5_crc_task_configure_umr(mlx5_task, sges, sge_count, mlx5_task->mkeys[i], 1349 SPDK_MLX5_UMR_SIG_DOMAIN_WIRE, umr_len[i], sig_init, 1350 sig_check_gen); 1351 if (spdk_unlikely(rc)) { 1352 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 1353 return rc; 1354 } 1355 sig_init = false; 1356 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1357 dev->stats.sig_umrs++; 1358 } 1359 1360 if (spdk_unlikely(mlx5_task->psv->bits.error)) { 1361 rc = spdk_mlx5_qp_set_psv(qp->qp, mlx5_task->psv->psv_index, *mlx5_task->base.crc_dst, 0, 0); 1362 if (spdk_unlikely(rc)) { 1363 SPDK_ERRLOG("SET_PSV failed with %d\n", rc); 1364 return rc; 1365 } 1366 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1367 } 1368 1369 for (i = 0; i < num_ops - 1; i++) { 1370 if (mlx5_task->inplace) { 1371 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->src, umr_len[i], &remaining, 1372 task->src_domain, task->src_domain_ctx); 1373 } else { 1374 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->dst, umr_len[i], &remaining, 1375 task->dst_domain, task->dst_domain_ctx); 1376 } 1377 if (spdk_unlikely(sge_count <= 0)) { 1378 rc = (sge_count == 0) ? -EINVAL : sge_count; 1379 SPDK_ERRLOG("failed set RDMA sge, rc %d\n", rc); 1380 return rc; 1381 } 1382 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges, sge_count, 0, mlx5_task->mkeys[i]->mkey, 1383 0, rdma_fence); 1384 if (spdk_unlikely(rc)) { 1385 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 1386 return rc; 1387 } 1388 mlx5_task->num_submitted_reqs++; 1389 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1390 dev->stats.rdma_reads++; 1391 rdma_fence = SPDK_MLX5_WQE_CTRL_STRONG_ORDERING; 1392 } 1393 if ((mlx5_task->inplace && mlx5_task->src.iovcnt == 0) || (!mlx5_task->inplace && 1394 mlx5_task->dst.iovcnt == 0)) { 1395 /* 1396 * The last RDMA does not have any data, only CRC. It also does not have a paired Mkey. 1397 * The CRC is handled in the previous MKey in this case. 1398 */ 1399 sge_count = 0; 1400 umr_offset = mlx5_task->last_umr_len; 1401 } else { 1402 umr_offset = 0; 1403 mlx5_task->last_mkey_idx = i; 1404 if (mlx5_task->inplace) { 1405 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->src, umr_len[i], &remaining, 1406 task->src_domain, task->src_domain_ctx); 1407 } else { 1408 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->dst, umr_len[i], &remaining, 1409 task->dst_domain, task->dst_domain_ctx); 1410 } 1411 if (spdk_unlikely(sge_count <= 0)) { 1412 rc = (sge_count == 0) ? -EINVAL : sge_count; 1413 SPDK_ERRLOG("failed set RDMA sge, rc %d\n", rc); 1414 return rc; 1415 } 1416 assert(remaining == 0); 1417 } 1418 if ((mlx5_task->num_completed_reqs + i + 1) == mlx5_task->num_reqs) { 1419 /* Ensure that there is a free sge for the CRC destination. */ 1420 assert(sge_count < (int)ACCEL_MLX5_MAX_SGE); 1421 /* Add the crc destination to the end of sges. */ 1422 sges[sge_count].lkey = mlx5_task->psv->crc_lkey; 1423 sges[sge_count].addr = (uintptr_t)&mlx5_task->psv->crc; 1424 sges[sge_count++].length = sizeof(uint32_t); 1425 } 1426 rdma_fence |= SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE; 1427 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges, sge_count, umr_offset, 1428 mlx5_task->mkeys[mlx5_task->last_mkey_idx]->mkey, 1429 (uint64_t)mlx5_task, rdma_fence); 1430 if (spdk_unlikely(rc)) { 1431 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 1432 return rc; 1433 } 1434 mlx5_task->num_submitted_reqs++; 1435 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 1436 dev->stats.rdma_reads++; 1437 1438 return 0; 1439 } 1440 1441 static inline int 1442 accel_mlx5_crc_task_process(struct accel_mlx5_task *mlx5_task) 1443 { 1444 int rc; 1445 1446 assert(mlx5_task->mlx5_opcode == ACCEL_MLX5_OPC_CRC32C); 1447 1448 SPDK_DEBUGLOG(accel_mlx5, "begin, crc task, %p, reqs: total %u, submitted %u, completed %u\n", 1449 mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 1450 1451 if (mlx5_task->num_reqs == 1) { 1452 rc = accel_mlx5_crc_task_process_one_req(mlx5_task); 1453 } else { 1454 rc = accel_mlx5_crc_task_process_multi_req(mlx5_task); 1455 } 1456 1457 if (rc == 0) { 1458 STAILQ_INSERT_TAIL(&mlx5_task->qp->in_hw, mlx5_task, link); 1459 SPDK_DEBUGLOG(accel_mlx5, "end, crc task, %p, reqs: total %u, submitted %u, completed %u\n", 1460 mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, 1461 mlx5_task->num_completed_reqs); 1462 } 1463 1464 return rc; 1465 } 1466 1467 static inline int 1468 accel_mlx5_task_alloc_crc_ctx(struct accel_mlx5_task *task, uint32_t qp_slot) 1469 { 1470 struct accel_mlx5_qp *qp = task->qp; 1471 struct accel_mlx5_dev *dev = qp->dev; 1472 1473 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task, dev->sig_mkeys))) { 1474 SPDK_DEBUGLOG(accel_mlx5, "no mkeys in signature mkey pool, dev %s\n", 1475 dev->dev_ctx->context->device->name); 1476 dev->stats.nomem_mkey++; 1477 return -ENOMEM; 1478 } 1479 task->psv = spdk_mempool_get(dev->dev_ctx->psv_pool); 1480 if (spdk_unlikely(!task->psv)) { 1481 SPDK_DEBUGLOG(accel_mlx5, "no reqs in psv pool, dev %s\n", dev->dev_ctx->context->device->name); 1482 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops); 1483 task->num_ops = 0; 1484 dev->stats.nomem_mkey++; 1485 return -ENOMEM; 1486 } 1487 /* One extra slot is needed for SET_PSV WQE to reset the error state in PSV. */ 1488 if (spdk_unlikely(task->psv->bits.error)) { 1489 uint32_t n_slots = task->num_ops * 2 + 1; 1490 1491 if (qp_slot < n_slots) { 1492 spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv); 1493 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops); 1494 dev->stats.nomem_qdepth++; 1495 task->num_ops = 0; 1496 return -ENOMEM; 1497 } 1498 } 1499 1500 return 0; 1501 } 1502 1503 static inline int 1504 accel_mlx5_crc_task_continue(struct accel_mlx5_task *task) 1505 { 1506 struct accel_mlx5_qp *qp = task->qp; 1507 struct accel_mlx5_dev *dev = qp->dev; 1508 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1509 int rc; 1510 1511 assert(task->num_reqs > task->num_completed_reqs); 1512 if (task->num_ops == 0) { 1513 /* No mkeys allocated, try to allocate now. */ 1514 rc = accel_mlx5_task_alloc_crc_ctx(task, qp_slot); 1515 if (spdk_unlikely(rc)) { 1516 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1517 return -ENOMEM; 1518 } 1519 } 1520 /* We need to post at least 1 UMR and 1 RDMA operation */ 1521 if (spdk_unlikely(qp_slot < 2)) { 1522 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1523 dev->stats.nomem_qdepth++; 1524 return -ENOMEM; 1525 } 1526 1527 return accel_mlx5_crc_task_process(task); 1528 } 1529 1530 static inline uint32_t 1531 accel_mlx5_get_crc_task_count(struct iovec *src_iov, uint32_t src_iovcnt, struct iovec *dst_iov, 1532 uint32_t dst_iovcnt) 1533 { 1534 uint32_t src_idx = 0; 1535 uint32_t dst_idx = 0; 1536 uint32_t num_ops = 1; 1537 uint32_t num_src_sge = 1; 1538 uint32_t num_dst_sge = 1; 1539 size_t src_offset = 0; 1540 size_t dst_offset = 0; 1541 uint32_t num_sge; 1542 size_t src_len; 1543 size_t dst_len; 1544 1545 /* One operation is enough if both iovs fit into ACCEL_MLX5_MAX_SGE. One SGE is reserved for CRC on dst_iov. */ 1546 if (src_iovcnt <= ACCEL_MLX5_MAX_SGE && (dst_iovcnt + 1) <= ACCEL_MLX5_MAX_SGE) { 1547 return 1; 1548 } 1549 1550 while (src_idx < src_iovcnt && dst_idx < dst_iovcnt) { 1551 if (num_src_sge > ACCEL_MLX5_MAX_SGE || num_dst_sge > ACCEL_MLX5_MAX_SGE) { 1552 num_ops++; 1553 num_src_sge = 1; 1554 num_dst_sge = 1; 1555 } 1556 src_len = src_iov[src_idx].iov_len - src_offset; 1557 dst_len = dst_iov[dst_idx].iov_len - dst_offset; 1558 1559 if (src_len == dst_len) { 1560 num_src_sge++; 1561 num_dst_sge++; 1562 src_offset = 0; 1563 dst_offset = 0; 1564 src_idx++; 1565 dst_idx++; 1566 continue; 1567 } 1568 if (src_len < dst_len) { 1569 /* Advance src_iov to reach the point that corresponds to the end of the current dst_iov. */ 1570 num_sge = accel_mlx5_advance_iovec(&src_iov[src_idx], 1571 spdk_min(ACCEL_MLX5_MAX_SGE + 1 - num_src_sge, 1572 src_iovcnt - src_idx), 1573 &src_offset, &dst_len); 1574 src_idx += num_sge; 1575 num_src_sge += num_sge; 1576 if (dst_len != 0) { 1577 /* 1578 * ACCEL_MLX5_MAX_SGE is reached on src_iov, and dst_len bytes 1579 * are left on the current dst_iov. 1580 */ 1581 dst_offset = dst_iov[dst_idx].iov_len - dst_len; 1582 } else { 1583 /* The src_iov advance is completed, shift to the next dst_iov. */ 1584 dst_idx++; 1585 num_dst_sge++; 1586 dst_offset = 0; 1587 } 1588 } else { /* src_len > dst_len */ 1589 /* Advance dst_iov to reach the point that corresponds to the end of the current src_iov. */ 1590 num_sge = accel_mlx5_advance_iovec(&dst_iov[dst_idx], 1591 spdk_min(ACCEL_MLX5_MAX_SGE + 1 - num_dst_sge, 1592 dst_iovcnt - dst_idx), 1593 &dst_offset, &src_len); 1594 dst_idx += num_sge; 1595 num_dst_sge += num_sge; 1596 if (src_len != 0) { 1597 /* 1598 * ACCEL_MLX5_MAX_SGE is reached on dst_iov, and src_len bytes 1599 * are left on the current src_iov. 1600 */ 1601 src_offset = src_iov[src_idx].iov_len - src_len; 1602 } else { 1603 /* The dst_iov advance is completed, shift to the next src_iov. */ 1604 src_idx++; 1605 num_src_sge++; 1606 src_offset = 0; 1607 } 1608 } 1609 } 1610 /* An extra operation is needed if no space is left on dst_iov because CRC takes one SGE. */ 1611 if (num_dst_sge > ACCEL_MLX5_MAX_SGE) { 1612 num_ops++; 1613 } 1614 1615 /* The above loop must reach the end of both iovs simultaneously because their size is the same. */ 1616 assert(src_idx == src_iovcnt); 1617 assert(dst_idx == dst_iovcnt); 1618 assert(src_offset == 0); 1619 assert(dst_offset == 0); 1620 1621 return num_ops; 1622 } 1623 1624 static inline int 1625 accel_mlx5_crc_task_init(struct accel_mlx5_task *mlx5_task) 1626 { 1627 struct spdk_accel_task *task = &mlx5_task->base; 1628 struct accel_mlx5_qp *qp = mlx5_task->qp; 1629 uint32_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp); 1630 int rc; 1631 1632 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 1633 if (mlx5_task->inplace) { 1634 /* One entry is reserved for CRC */ 1635 mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->src.iovcnt + 1, ACCEL_MLX5_MAX_SGE); 1636 } else { 1637 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 1638 mlx5_task->num_reqs = accel_mlx5_get_crc_task_count(mlx5_task->src.iov, mlx5_task->src.iovcnt, 1639 mlx5_task->dst.iov, mlx5_task->dst.iovcnt); 1640 } 1641 1642 rc = accel_mlx5_task_alloc_crc_ctx(mlx5_task, qp_slot); 1643 if (spdk_unlikely(rc)) { 1644 return rc; 1645 } 1646 1647 if (spdk_unlikely(qp_slot < 2)) { 1648 /* Queue is full, queue this task */ 1649 SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", qp->dev->dev_ctx->context->device->name, 1650 mlx5_task->qp); 1651 qp->dev->stats.nomem_qdepth++; 1652 return -ENOMEM; 1653 } 1654 return 0; 1655 } 1656 1657 static int 1658 accel_mlx5_task_op_not_implemented(struct accel_mlx5_task *mlx5_task) 1659 { 1660 SPDK_ERRLOG("wrong function called\n"); 1661 SPDK_UNREACHABLE(); 1662 } 1663 1664 static void 1665 accel_mlx5_task_op_not_implemented_v(struct accel_mlx5_task *mlx5_task) 1666 { 1667 SPDK_ERRLOG("wrong function called\n"); 1668 SPDK_UNREACHABLE(); 1669 } 1670 1671 static int 1672 accel_mlx5_task_op_not_supported(struct accel_mlx5_task *mlx5_task) 1673 { 1674 SPDK_ERRLOG("Unsupported opcode %d\n", mlx5_task->base.op_code); 1675 1676 return -ENOTSUP; 1677 } 1678 1679 static struct accel_mlx5_task_operations g_accel_mlx5_tasks_ops[] = { 1680 [ACCEL_MLX5_OPC_COPY] = { 1681 .init = accel_mlx5_copy_task_init, 1682 .process = accel_mlx5_copy_task_process, 1683 .cont = accel_mlx5_copy_task_continue, 1684 .complete = accel_mlx5_copy_task_complete, 1685 }, 1686 [ACCEL_MLX5_OPC_CRYPTO] = { 1687 .init = accel_mlx5_crypto_task_init, 1688 .process = accel_mlx5_crypto_task_process, 1689 .cont = accel_mlx5_crypto_task_continue, 1690 .complete = accel_mlx5_crypto_task_complete, 1691 }, 1692 [ACCEL_MLX5_OPC_CRC32C] = { 1693 .init = accel_mlx5_crc_task_init, 1694 .process = accel_mlx5_crc_task_process, 1695 .cont = accel_mlx5_crc_task_continue, 1696 .complete = accel_mlx5_crc_task_complete, 1697 }, 1698 [ACCEL_MLX5_OPC_LAST] = { 1699 .init = accel_mlx5_task_op_not_supported, 1700 .process = accel_mlx5_task_op_not_implemented, 1701 .cont = accel_mlx5_task_op_not_implemented, 1702 .complete = accel_mlx5_task_op_not_implemented_v 1703 }, 1704 }; 1705 1706 static inline void 1707 accel_mlx5_task_complete(struct accel_mlx5_task *task) 1708 { 1709 assert(task->num_reqs == task->num_completed_reqs); 1710 SPDK_DEBUGLOG(accel_mlx5, "Complete task %p, opc %d\n", task, task->base.op_code); 1711 1712 g_accel_mlx5_tasks_ops[task->mlx5_opcode].complete(task); 1713 } 1714 1715 static inline int 1716 accel_mlx5_task_continue(struct accel_mlx5_task *task) 1717 { 1718 struct accel_mlx5_qp *qp = task->qp; 1719 struct accel_mlx5_dev *dev = qp->dev; 1720 1721 if (spdk_unlikely(qp->recovering)) { 1722 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1723 return 0; 1724 } 1725 1726 return g_accel_mlx5_tasks_ops[task->mlx5_opcode].cont(task); 1727 } 1728 static inline void 1729 accel_mlx5_task_init_opcode(struct accel_mlx5_task *mlx5_task) 1730 { 1731 uint8_t base_opcode = mlx5_task->base.op_code; 1732 1733 switch (base_opcode) { 1734 case SPDK_ACCEL_OPC_COPY: 1735 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_COPY; 1736 break; 1737 case SPDK_ACCEL_OPC_ENCRYPT: 1738 assert(g_accel_mlx5.crypto_supported); 1739 mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE; 1740 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO; 1741 break; 1742 case SPDK_ACCEL_OPC_DECRYPT: 1743 assert(g_accel_mlx5.crypto_supported); 1744 mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_MEMORY; 1745 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO; 1746 break; 1747 case SPDK_ACCEL_OPC_CRC32C: 1748 mlx5_task->inplace = 1; 1749 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRC32C; 1750 break; 1751 case SPDK_ACCEL_OPC_COPY_CRC32C: 1752 mlx5_task->inplace = 0; 1753 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRC32C; 1754 break; 1755 default: 1756 SPDK_ERRLOG("wrong opcode %d\n", base_opcode); 1757 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_LAST; 1758 } 1759 } 1760 1761 static inline void 1762 accel_mlx5_task_reset(struct accel_mlx5_task *mlx5_task) 1763 { 1764 mlx5_task->num_completed_reqs = 0; 1765 mlx5_task->num_submitted_reqs = 0; 1766 mlx5_task->num_ops = 0; 1767 mlx5_task->num_processed_blocks = 0; 1768 mlx5_task->raw = 0; 1769 } 1770 1771 static int 1772 accel_mlx5_submit_tasks(struct spdk_io_channel *_ch, struct spdk_accel_task *task) 1773 { 1774 struct accel_mlx5_io_channel *ch = spdk_io_channel_get_ctx(_ch); 1775 struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base); 1776 struct accel_mlx5_dev *dev; 1777 int rc; 1778 1779 /* We should not receive any tasks if the module was not enabled */ 1780 assert(g_accel_mlx5.enabled); 1781 1782 dev = &ch->devs[ch->dev_idx]; 1783 ch->dev_idx++; 1784 if (ch->dev_idx == ch->num_devs) { 1785 ch->dev_idx = 0; 1786 } 1787 1788 mlx5_task->qp = &dev->qp; 1789 accel_mlx5_task_reset(mlx5_task); 1790 accel_mlx5_task_init_opcode(mlx5_task); 1791 1792 dev->stats.opcodes[mlx5_task->mlx5_opcode]++; 1793 rc = g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].init(mlx5_task); 1794 if (spdk_unlikely(rc)) { 1795 if (rc == -ENOMEM) { 1796 SPDK_DEBUGLOG(accel_mlx5, "no reqs to handle new task %p (required %u), put to queue\n", mlx5_task, 1797 mlx5_task->num_reqs); 1798 STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link); 1799 return 0; 1800 } 1801 SPDK_ERRLOG("Task opc %d init failed, rc %d\n", task->op_code, rc); 1802 return rc; 1803 } 1804 1805 if (spdk_unlikely(mlx5_task->qp->recovering)) { 1806 STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link); 1807 return 0; 1808 } 1809 1810 return g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].process(mlx5_task); 1811 } 1812 1813 static void accel_mlx5_recover_qp(struct accel_mlx5_qp *qp); 1814 1815 static int 1816 accel_mlx5_recover_qp_poller(void *arg) 1817 { 1818 struct accel_mlx5_qp *qp = arg; 1819 1820 spdk_poller_unregister(&qp->recover_poller); 1821 accel_mlx5_recover_qp(qp); 1822 return SPDK_POLLER_BUSY; 1823 } 1824 1825 static void 1826 accel_mlx5_recover_qp(struct accel_mlx5_qp *qp) 1827 { 1828 struct accel_mlx5_dev *dev = qp->dev; 1829 struct spdk_mlx5_qp_attr mlx5_qp_attr = {}; 1830 int rc; 1831 1832 SPDK_NOTICELOG("Recovering qp %p, core %u\n", qp, spdk_env_get_current_core()); 1833 if (qp->qp) { 1834 spdk_mlx5_qp_destroy(qp->qp); 1835 qp->qp = NULL; 1836 } 1837 1838 mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size; 1839 mlx5_qp_attr.cap.max_recv_wr = 0; 1840 mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE; 1841 mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE; 1842 1843 rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp); 1844 if (rc) { 1845 SPDK_ERRLOG("Failed to create mlx5 dma QP, rc %d. Retry in %d usec\n", 1846 rc, ACCEL_MLX5_RECOVER_POLLER_PERIOD_US); 1847 qp->recover_poller = SPDK_POLLER_REGISTER(accel_mlx5_recover_qp_poller, qp, 1848 ACCEL_MLX5_RECOVER_POLLER_PERIOD_US); 1849 return; 1850 } 1851 1852 qp->recovering = false; 1853 } 1854 1855 static inline void 1856 accel_mlx5_process_error_cpl(struct spdk_mlx5_cq_completion *wc, struct accel_mlx5_task *task) 1857 { 1858 struct accel_mlx5_qp *qp = task->qp; 1859 1860 if (wc->status != IBV_WC_WR_FLUSH_ERR) { 1861 SPDK_WARNLOG("RDMA: qp %p, task %p, WC status %d, core %u\n", 1862 qp, task, wc->status, spdk_env_get_current_core()); 1863 } else { 1864 SPDK_DEBUGLOG(accel_mlx5, 1865 "RDMA: qp %p, task %p, WC status %d, core %u\n", 1866 qp, task, wc->status, spdk_env_get_current_core()); 1867 } 1868 1869 qp->recovering = true; 1870 assert(task->num_completed_reqs <= task->num_submitted_reqs); 1871 if (task->num_completed_reqs == task->num_submitted_reqs) { 1872 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 1873 accel_mlx5_task_fail(task, -EIO); 1874 } 1875 } 1876 1877 static inline int64_t 1878 accel_mlx5_poll_cq(struct accel_mlx5_dev *dev) 1879 { 1880 struct spdk_mlx5_cq_completion wc[ACCEL_MLX5_MAX_WC]; 1881 struct accel_mlx5_task *task; 1882 struct accel_mlx5_qp *qp; 1883 int reaped, i, rc; 1884 uint16_t completed; 1885 1886 dev->stats.polls++; 1887 reaped = spdk_mlx5_cq_poll_completions(dev->cq, wc, ACCEL_MLX5_MAX_WC); 1888 if (spdk_unlikely(reaped < 0)) { 1889 SPDK_ERRLOG("Error polling CQ! (%d): %s\n", errno, spdk_strerror(errno)); 1890 return reaped; 1891 } else if (reaped == 0) { 1892 dev->stats.idle_polls++; 1893 return 0; 1894 } 1895 dev->stats.completions += reaped; 1896 1897 SPDK_DEBUGLOG(accel_mlx5, "Reaped %d cpls on dev %s\n", reaped, 1898 dev->dev_ctx->context->device->name); 1899 1900 for (i = 0; i < reaped; i++) { 1901 if (spdk_unlikely(!wc[i].wr_id)) { 1902 /* Unsignaled completion with error, ignore */ 1903 continue; 1904 } 1905 task = (struct accel_mlx5_task *)wc[i].wr_id; 1906 qp = task->qp; 1907 assert(task == STAILQ_FIRST(&qp->in_hw) && "submission mismatch"); 1908 assert(task->num_submitted_reqs > task->num_completed_reqs); 1909 completed = task->num_submitted_reqs - task->num_completed_reqs; 1910 assert((uint32_t)task->num_completed_reqs + completed <= UINT16_MAX); 1911 task->num_completed_reqs += completed; 1912 assert(qp->wrs_submitted >= task->num_wrs); 1913 qp->wrs_submitted -= task->num_wrs; 1914 assert(dev->wrs_in_cq > 0); 1915 dev->wrs_in_cq--; 1916 1917 if (wc[i].status) { 1918 accel_mlx5_process_error_cpl(&wc[i], task); 1919 if (qp->wrs_submitted == 0) { 1920 assert(STAILQ_EMPTY(&qp->in_hw)); 1921 accel_mlx5_recover_qp(qp); 1922 } 1923 continue; 1924 } 1925 1926 SPDK_DEBUGLOG(accel_mlx5, "task %p, remaining %u\n", task, 1927 task->num_reqs - task->num_completed_reqs); 1928 if (task->num_completed_reqs == task->num_reqs) { 1929 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 1930 accel_mlx5_task_complete(task); 1931 } else { 1932 assert(task->num_submitted_reqs < task->num_reqs); 1933 assert(task->num_completed_reqs == task->num_submitted_reqs); 1934 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 1935 rc = accel_mlx5_task_continue(task); 1936 if (spdk_unlikely(rc)) { 1937 if (rc != -ENOMEM) { 1938 accel_mlx5_task_fail(task, rc); 1939 } 1940 } 1941 } 1942 } 1943 1944 return reaped; 1945 } 1946 1947 static inline void 1948 accel_mlx5_resubmit_nomem_tasks(struct accel_mlx5_dev *dev) 1949 { 1950 struct accel_mlx5_task *task, *tmp, *last; 1951 int rc; 1952 1953 last = STAILQ_LAST(&dev->nomem, accel_mlx5_task, link); 1954 STAILQ_FOREACH_SAFE(task, &dev->nomem, link, tmp) { 1955 STAILQ_REMOVE_HEAD(&dev->nomem, link); 1956 rc = accel_mlx5_task_continue(task); 1957 if (spdk_unlikely(rc)) { 1958 if (rc != -ENOMEM) { 1959 accel_mlx5_task_fail(task, rc); 1960 } 1961 break; 1962 } 1963 /* If qpair is recovering, task is added back to the nomem list and 0 is returned. In that case we 1964 * need a special condition to iterate the list once and stop this FOREACH loop */ 1965 if (task == last) { 1966 break; 1967 } 1968 } 1969 } 1970 1971 static int 1972 accel_mlx5_poller(void *ctx) 1973 { 1974 struct accel_mlx5_io_channel *ch = ctx; 1975 struct accel_mlx5_dev *dev; 1976 1977 int64_t completions = 0, rc; 1978 uint32_t i; 1979 1980 for (i = 0; i < ch->num_devs; i++) { 1981 dev = &ch->devs[i]; 1982 if (dev->wrs_in_cq) { 1983 rc = accel_mlx5_poll_cq(dev); 1984 if (spdk_unlikely(rc < 0)) { 1985 SPDK_ERRLOG("Error %"PRId64" on CQ, dev %s\n", rc, dev->dev_ctx->context->device->name); 1986 } 1987 completions += rc; 1988 if (dev->qp.wrs_submitted) { 1989 spdk_mlx5_qp_complete_send(dev->qp.qp); 1990 } 1991 } 1992 if (!STAILQ_EMPTY(&dev->nomem)) { 1993 accel_mlx5_resubmit_nomem_tasks(dev); 1994 } 1995 } 1996 1997 return !!completions; 1998 } 1999 2000 static bool 2001 accel_mlx5_supports_opcode(enum spdk_accel_opcode opc) 2002 { 2003 assert(g_accel_mlx5.enabled); 2004 2005 switch (opc) { 2006 case SPDK_ACCEL_OPC_COPY: 2007 return true; 2008 case SPDK_ACCEL_OPC_ENCRYPT: 2009 case SPDK_ACCEL_OPC_DECRYPT: 2010 return g_accel_mlx5.crypto_supported; 2011 case SPDK_ACCEL_OPC_CRC32C: 2012 case SPDK_ACCEL_OPC_COPY_CRC32C: 2013 return g_accel_mlx5.crc32c_supported; 2014 default: 2015 return false; 2016 } 2017 } 2018 2019 static struct spdk_io_channel * 2020 accel_mlx5_get_io_channel(void) 2021 { 2022 assert(g_accel_mlx5.enabled); 2023 return spdk_get_io_channel(&g_accel_mlx5); 2024 } 2025 2026 static int 2027 accel_mlx5_create_qp(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp) 2028 { 2029 struct spdk_mlx5_qp_attr mlx5_qp_attr = {}; 2030 int rc; 2031 2032 mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size; 2033 mlx5_qp_attr.cap.max_recv_wr = 0; 2034 mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE; 2035 mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE; 2036 2037 rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp); 2038 if (rc) { 2039 return rc; 2040 } 2041 2042 STAILQ_INIT(&qp->in_hw); 2043 qp->dev = dev; 2044 qp->verbs_qp = spdk_mlx5_qp_get_verbs_qp(qp->qp); 2045 assert(qp->verbs_qp); 2046 qp->wrs_max = g_accel_mlx5.attr.qp_size; 2047 2048 return 0; 2049 } 2050 2051 static void 2052 accel_mlx5_add_stats(struct accel_mlx5_stats *stats, const struct accel_mlx5_stats *to_add) 2053 { 2054 int i; 2055 2056 stats->crypto_umrs += to_add->crypto_umrs; 2057 stats->sig_umrs += to_add->sig_umrs; 2058 stats->rdma_reads += to_add->rdma_reads; 2059 stats->rdma_writes += to_add->rdma_writes; 2060 stats->polls += to_add->polls; 2061 stats->idle_polls += to_add->idle_polls; 2062 stats->completions += to_add->completions; 2063 stats->nomem_qdepth += to_add->nomem_qdepth; 2064 stats->nomem_mkey += to_add->nomem_mkey; 2065 for (i = 0; i < ACCEL_MLX5_OPC_LAST; i++) { 2066 stats->opcodes[i] += to_add->opcodes[i]; 2067 } 2068 } 2069 2070 static void 2071 accel_mlx5_destroy_cb(void *io_device, void *ctx_buf) 2072 { 2073 struct accel_mlx5_io_channel *ch = ctx_buf; 2074 struct accel_mlx5_dev *dev; 2075 uint32_t i; 2076 2077 spdk_poller_unregister(&ch->poller); 2078 for (i = 0; i < ch->num_devs; i++) { 2079 dev = &ch->devs[i]; 2080 spdk_mlx5_qp_destroy(dev->qp.qp); 2081 if (dev->cq) { 2082 spdk_mlx5_cq_destroy(dev->cq); 2083 } 2084 spdk_poller_unregister(&dev->qp.recover_poller); 2085 if (dev->crypto_mkeys) { 2086 spdk_mlx5_mkey_pool_put_ref(dev->crypto_mkeys); 2087 } 2088 if (dev->sig_mkeys) { 2089 spdk_mlx5_mkey_pool_put_ref(dev->sig_mkeys); 2090 } 2091 spdk_rdma_utils_free_mem_map(&dev->mmap); 2092 spdk_spin_lock(&g_accel_mlx5.lock); 2093 accel_mlx5_add_stats(&g_accel_mlx5.stats, &dev->stats); 2094 spdk_spin_unlock(&g_accel_mlx5.lock); 2095 } 2096 free(ch->devs); 2097 } 2098 2099 static int 2100 accel_mlx5_create_cb(void *io_device, void *ctx_buf) 2101 { 2102 struct spdk_mlx5_cq_attr cq_attr = {}; 2103 struct accel_mlx5_io_channel *ch = ctx_buf; 2104 struct accel_mlx5_dev_ctx *dev_ctx; 2105 struct accel_mlx5_dev *dev; 2106 uint32_t i; 2107 int rc; 2108 2109 ch->devs = calloc(g_accel_mlx5.num_ctxs, sizeof(*ch->devs)); 2110 if (!ch->devs) { 2111 SPDK_ERRLOG("Memory allocation failed\n"); 2112 return -ENOMEM; 2113 } 2114 2115 for (i = 0; i < g_accel_mlx5.num_ctxs; i++) { 2116 dev_ctx = &g_accel_mlx5.dev_ctxs[i]; 2117 dev = &ch->devs[i]; 2118 dev->dev_ctx = dev_ctx; 2119 2120 if (dev_ctx->crypto_mkeys) { 2121 dev->crypto_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO); 2122 if (!dev->crypto_mkeys) { 2123 SPDK_ERRLOG("Failed to get crypto mkey pool channel, dev %s\n", dev_ctx->context->device->name); 2124 /* Should not happen since mkey pool is created on accel_mlx5 initialization. 2125 * We should not be here if pool creation failed */ 2126 assert(0); 2127 goto err_out; 2128 } 2129 } 2130 if (dev_ctx->sig_mkeys) { 2131 dev->sig_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE); 2132 if (!dev->sig_mkeys) { 2133 SPDK_ERRLOG("Failed to get sig mkey pool channel, dev %s\n", dev_ctx->context->device->name); 2134 /* Should not happen since mkey pool is created on accel_mlx5 initialization. 2135 * We should not be here if pool creation failed */ 2136 assert(0); 2137 goto err_out; 2138 } 2139 } 2140 2141 memset(&cq_attr, 0, sizeof(cq_attr)); 2142 cq_attr.cqe_cnt = g_accel_mlx5.attr.qp_size; 2143 cq_attr.cqe_size = 64; 2144 cq_attr.cq_context = dev; 2145 2146 ch->num_devs++; 2147 rc = spdk_mlx5_cq_create(dev_ctx->pd, &cq_attr, &dev->cq); 2148 if (rc) { 2149 SPDK_ERRLOG("Failed to create mlx5 CQ, rc %d\n", rc); 2150 goto err_out; 2151 } 2152 2153 rc = accel_mlx5_create_qp(dev, &dev->qp); 2154 if (rc) { 2155 SPDK_ERRLOG("Failed to create mlx5 QP, rc %d\n", rc); 2156 goto err_out; 2157 } 2158 2159 dev->mmap = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL, 2160 IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE); 2161 if (!dev->mmap) { 2162 SPDK_ERRLOG("Failed to create memory map\n"); 2163 rc = -ENOMEM; 2164 goto err_out; 2165 } 2166 dev->crypto_multi_block = dev_ctx->crypto_multi_block; 2167 dev->crypto_split_blocks = dev_ctx->crypto_multi_block ? g_accel_mlx5.attr.crypto_split_blocks : 0; 2168 dev->wrs_in_cq_max = g_accel_mlx5.attr.qp_size; 2169 STAILQ_INIT(&dev->nomem); 2170 } 2171 2172 ch->poller = SPDK_POLLER_REGISTER(accel_mlx5_poller, ch, 0); 2173 2174 return 0; 2175 2176 err_out: 2177 accel_mlx5_destroy_cb(&g_accel_mlx5, ctx_buf); 2178 return rc; 2179 } 2180 2181 void 2182 accel_mlx5_get_default_attr(struct accel_mlx5_attr *attr) 2183 { 2184 assert(attr); 2185 2186 attr->qp_size = ACCEL_MLX5_QP_SIZE; 2187 attr->num_requests = ACCEL_MLX5_NUM_REQUESTS; 2188 attr->allowed_devs = NULL; 2189 attr->crypto_split_blocks = 0; 2190 } 2191 2192 static void 2193 accel_mlx5_allowed_devs_free(void) 2194 { 2195 size_t i; 2196 2197 if (!g_accel_mlx5.allowed_devs) { 2198 return; 2199 } 2200 2201 for (i = 0; i < g_accel_mlx5.allowed_devs_count; i++) { 2202 free(g_accel_mlx5.allowed_devs[i]); 2203 } 2204 free(g_accel_mlx5.attr.allowed_devs); 2205 free(g_accel_mlx5.allowed_devs); 2206 g_accel_mlx5.attr.allowed_devs = NULL; 2207 g_accel_mlx5.allowed_devs = NULL; 2208 g_accel_mlx5.allowed_devs_count = 0; 2209 } 2210 2211 static int 2212 accel_mlx5_allowed_devs_parse(const char *allowed_devs) 2213 { 2214 char *str, *tmp, *tok; 2215 size_t devs_count = 0; 2216 2217 str = strdup(allowed_devs); 2218 if (!str) { 2219 return -ENOMEM; 2220 } 2221 2222 accel_mlx5_allowed_devs_free(); 2223 2224 tmp = str; 2225 while ((tmp = strchr(tmp, ',')) != NULL) { 2226 tmp++; 2227 devs_count++; 2228 } 2229 devs_count++; 2230 2231 g_accel_mlx5.allowed_devs = calloc(devs_count, sizeof(char *)); 2232 if (!g_accel_mlx5.allowed_devs) { 2233 free(str); 2234 return -ENOMEM; 2235 } 2236 2237 devs_count = 0; 2238 tok = strtok(str, ","); 2239 while (tok) { 2240 g_accel_mlx5.allowed_devs[devs_count] = strdup(tok); 2241 if (!g_accel_mlx5.allowed_devs[devs_count]) { 2242 free(str); 2243 accel_mlx5_allowed_devs_free(); 2244 return -ENOMEM; 2245 } 2246 tok = strtok(NULL, ","); 2247 devs_count++; 2248 g_accel_mlx5.allowed_devs_count++; 2249 } 2250 2251 free(str); 2252 2253 return 0; 2254 } 2255 2256 int 2257 accel_mlx5_enable(struct accel_mlx5_attr *attr) 2258 { 2259 int rc; 2260 2261 if (g_accel_mlx5.enabled) { 2262 return -EEXIST; 2263 } 2264 if (attr) { 2265 if (attr->num_requests / spdk_env_get_core_count() < ACCEL_MLX5_MAX_MKEYS_IN_TASK) { 2266 SPDK_ERRLOG("num requests per core must not be less than %u, current value %u\n", 2267 ACCEL_MLX5_MAX_MKEYS_IN_TASK, attr->num_requests / spdk_env_get_core_count()); 2268 return -EINVAL; 2269 } 2270 if (attr->qp_size < 8) { 2271 SPDK_ERRLOG("qp_size must be at least 8\n"); 2272 return -EINVAL; 2273 } 2274 g_accel_mlx5.attr = *attr; 2275 g_accel_mlx5.attr.allowed_devs = NULL; 2276 2277 if (attr->allowed_devs) { 2278 /* Contains a copy of user's string */ 2279 g_accel_mlx5.attr.allowed_devs = strndup(attr->allowed_devs, ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN); 2280 if (!g_accel_mlx5.attr.allowed_devs) { 2281 return -ENOMEM; 2282 } 2283 rc = accel_mlx5_allowed_devs_parse(g_accel_mlx5.attr.allowed_devs); 2284 if (rc) { 2285 return rc; 2286 } 2287 rc = spdk_mlx5_crypto_devs_allow((const char *const *)g_accel_mlx5.allowed_devs, 2288 g_accel_mlx5.allowed_devs_count); 2289 if (rc) { 2290 accel_mlx5_allowed_devs_free(); 2291 return rc; 2292 } 2293 } 2294 } else { 2295 accel_mlx5_get_default_attr(&g_accel_mlx5.attr); 2296 } 2297 2298 g_accel_mlx5.enabled = true; 2299 spdk_accel_module_list_add(&g_accel_mlx5.module); 2300 2301 return 0; 2302 } 2303 2304 static void 2305 accel_mlx5_psvs_release(struct accel_mlx5_dev_ctx *dev_ctx) 2306 { 2307 uint32_t i, num_psvs, num_psvs_in_pool; 2308 2309 if (!dev_ctx->psvs) { 2310 return; 2311 } 2312 2313 num_psvs = g_accel_mlx5.attr.num_requests; 2314 2315 for (i = 0; i < num_psvs; i++) { 2316 if (dev_ctx->psvs[i]) { 2317 spdk_mlx5_destroy_psv(dev_ctx->psvs[i]); 2318 dev_ctx->psvs[i] = NULL; 2319 } 2320 } 2321 free(dev_ctx->psvs); 2322 2323 if (!dev_ctx->psv_pool) { 2324 return; 2325 } 2326 num_psvs_in_pool = spdk_mempool_count(dev_ctx->psv_pool); 2327 if (num_psvs_in_pool != num_psvs) { 2328 SPDK_ERRLOG("Expected %u reqs in the pool, but got only %u\n", num_psvs, num_psvs_in_pool); 2329 } 2330 spdk_mempool_free(dev_ctx->psv_pool); 2331 } 2332 2333 static void 2334 accel_mlx5_free_resources(void) 2335 { 2336 struct accel_mlx5_dev_ctx *dev_ctx; 2337 uint32_t i; 2338 2339 for (i = 0; i < g_accel_mlx5.num_ctxs; i++) { 2340 dev_ctx = &g_accel_mlx5.dev_ctxs[i]; 2341 accel_mlx5_psvs_release(dev_ctx); 2342 if (dev_ctx->pd) { 2343 if (dev_ctx->crypto_mkeys) { 2344 spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO, dev_ctx->pd); 2345 } 2346 if (dev_ctx->sig_mkeys) { 2347 spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE, dev_ctx->pd); 2348 } 2349 spdk_rdma_utils_put_pd(dev_ctx->pd); 2350 } 2351 if (dev_ctx->domain) { 2352 spdk_rdma_utils_put_memory_domain(dev_ctx->domain); 2353 } 2354 } 2355 2356 free(g_accel_mlx5.dev_ctxs); 2357 g_accel_mlx5.dev_ctxs = NULL; 2358 g_accel_mlx5.initialized = false; 2359 } 2360 2361 static void 2362 accel_mlx5_deinit_cb(void *ctx) 2363 { 2364 accel_mlx5_free_resources(); 2365 spdk_spin_destroy(&g_accel_mlx5.lock); 2366 spdk_accel_module_finish(); 2367 } 2368 2369 static void 2370 accel_mlx5_deinit(void *ctx) 2371 { 2372 if (g_accel_mlx5.allowed_devs) { 2373 accel_mlx5_allowed_devs_free(); 2374 } 2375 spdk_mlx5_crypto_devs_allow(NULL, 0); 2376 if (g_accel_mlx5.initialized) { 2377 spdk_io_device_unregister(&g_accel_mlx5, accel_mlx5_deinit_cb); 2378 } else { 2379 spdk_accel_module_finish(); 2380 } 2381 } 2382 2383 static int 2384 accel_mlx5_mkeys_create(struct ibv_pd *pd, uint32_t num_mkeys, uint32_t flags) 2385 { 2386 struct spdk_mlx5_mkey_pool_param pool_param = {}; 2387 2388 pool_param.mkey_count = num_mkeys; 2389 pool_param.cache_per_thread = num_mkeys * 3 / 4 / spdk_env_get_core_count(); 2390 pool_param.flags = flags; 2391 2392 return spdk_mlx5_mkey_pool_init(&pool_param, pd); 2393 } 2394 2395 static void 2396 accel_mlx5_set_psv_in_pool(struct spdk_mempool *mp, void *cb_arg, void *_psv, unsigned obj_idx) 2397 { 2398 struct spdk_rdma_utils_memory_translation translation = {}; 2399 struct accel_mlx5_psv_pool_iter_cb_args *args = cb_arg; 2400 struct accel_mlx5_psv_wrapper *wrapper = _psv; 2401 struct accel_mlx5_dev_ctx *dev_ctx = args->dev; 2402 int rc; 2403 2404 if (args->rc) { 2405 return; 2406 } 2407 assert(obj_idx < g_accel_mlx5.attr.num_requests); 2408 assert(dev_ctx->psvs[obj_idx] != NULL); 2409 memset(wrapper, 0, sizeof(*wrapper)); 2410 wrapper->psv_index = dev_ctx->psvs[obj_idx]->index; 2411 2412 rc = spdk_rdma_utils_get_translation(args->map, &wrapper->crc, sizeof(uint32_t), &translation); 2413 if (rc) { 2414 SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", &wrapper->crc, sizeof(uint32_t)); 2415 args->rc = -EINVAL; 2416 } else { 2417 wrapper->crc_lkey = spdk_rdma_utils_memory_translation_get_lkey(&translation); 2418 } 2419 } 2420 2421 static int 2422 accel_mlx5_psvs_create(struct accel_mlx5_dev_ctx *dev_ctx) 2423 { 2424 struct accel_mlx5_psv_pool_iter_cb_args args = { 2425 .dev = dev_ctx 2426 }; 2427 char pool_name[32]; 2428 uint32_t i; 2429 uint32_t num_psvs = g_accel_mlx5.attr.num_requests; 2430 uint32_t cache_size; 2431 int rc; 2432 2433 dev_ctx->psvs = calloc(num_psvs, (sizeof(struct spdk_mlx5_psv *))); 2434 if (!dev_ctx->psvs) { 2435 SPDK_ERRLOG("Failed to alloc PSVs array\n"); 2436 return -ENOMEM; 2437 } 2438 for (i = 0; i < num_psvs; i++) { 2439 dev_ctx->psvs[i] = spdk_mlx5_create_psv(dev_ctx->pd); 2440 if (!dev_ctx->psvs[i]) { 2441 SPDK_ERRLOG("Failed to create PSV on dev %s\n", dev_ctx->context->device->name); 2442 return -EINVAL; 2443 } 2444 } 2445 2446 rc = snprintf(pool_name, sizeof(pool_name), "accel_psv_%s", dev_ctx->context->device->name); 2447 if (rc < 0) { 2448 assert(0); 2449 return -EINVAL; 2450 } 2451 cache_size = num_psvs * 3 / 4 / spdk_env_get_core_count(); 2452 args.map = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL, 2453 IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE); 2454 if (!args.map) { 2455 return -ENOMEM; 2456 } 2457 dev_ctx->psv_pool = spdk_mempool_create_ctor(pool_name, num_psvs, 2458 sizeof(struct accel_mlx5_psv_wrapper), 2459 cache_size, SPDK_ENV_SOCKET_ID_ANY, 2460 accel_mlx5_set_psv_in_pool, &args); 2461 spdk_rdma_utils_free_mem_map(&args.map); 2462 if (!dev_ctx->psv_pool) { 2463 SPDK_ERRLOG("Failed to create PSV memory pool\n"); 2464 return -ENOMEM; 2465 } 2466 if (args.rc) { 2467 SPDK_ERRLOG("Failed to init PSV memory pool objects, rc %d\n", args.rc); 2468 return args.rc; 2469 } 2470 2471 return 0; 2472 } 2473 2474 2475 static int 2476 accel_mlx5_dev_ctx_init(struct accel_mlx5_dev_ctx *dev_ctx, struct ibv_context *dev, 2477 struct spdk_mlx5_device_caps *caps) 2478 { 2479 struct ibv_pd *pd; 2480 int rc; 2481 2482 pd = spdk_rdma_utils_get_pd(dev); 2483 if (!pd) { 2484 SPDK_ERRLOG("Failed to get PD for context %p, dev %s\n", dev, dev->device->name); 2485 return -EINVAL; 2486 } 2487 dev_ctx->context = dev; 2488 dev_ctx->pd = pd; 2489 dev_ctx->domain = spdk_rdma_utils_get_memory_domain(pd); 2490 if (!dev_ctx->domain) { 2491 return -ENOMEM; 2492 } 2493 2494 if (g_accel_mlx5.crypto_supported) { 2495 dev_ctx->crypto_multi_block = caps->crypto.multi_block_be_tweak; 2496 if (!dev_ctx->crypto_multi_block && g_accel_mlx5.attr.crypto_split_blocks) { 2497 SPDK_WARNLOG("\"crypto_split_blocks\" is set but dev %s doesn't support multi block crypto\n", 2498 dev->device->name); 2499 } 2500 rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO); 2501 if (rc) { 2502 SPDK_ERRLOG("Failed to create crypto mkeys pool, rc %d, dev %s\n", rc, dev->device->name); 2503 return rc; 2504 } 2505 dev_ctx->crypto_mkeys = true; 2506 } 2507 if (g_accel_mlx5.crc32c_supported) { 2508 rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, 2509 SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE); 2510 if (rc) { 2511 SPDK_ERRLOG("Failed to create signature mkeys pool, rc %d, dev %s\n", rc, dev->device->name); 2512 return rc; 2513 } 2514 dev_ctx->sig_mkeys = true; 2515 rc = accel_mlx5_psvs_create(dev_ctx); 2516 if (rc) { 2517 SPDK_ERRLOG("Failed to create PSVs pool, rc %d, dev %s\n", rc, dev->device->name); 2518 return rc; 2519 } 2520 } 2521 2522 return 0; 2523 } 2524 2525 static struct ibv_context ** 2526 accel_mlx5_get_devices(int *_num_devs) 2527 { 2528 struct ibv_context **rdma_devs, **rdma_devs_out = NULL, *dev; 2529 struct ibv_device_attr dev_attr; 2530 size_t j; 2531 int num_devs = 0, i, rc; 2532 int num_devs_out = 0; 2533 bool dev_allowed; 2534 2535 rdma_devs = rdma_get_devices(&num_devs); 2536 if (!rdma_devs || !num_devs) { 2537 *_num_devs = 0; 2538 return NULL; 2539 } 2540 2541 rdma_devs_out = calloc(num_devs + 1, sizeof(struct ibv_context *)); 2542 if (!rdma_devs_out) { 2543 SPDK_ERRLOG("Memory allocation failed\n"); 2544 rdma_free_devices(rdma_devs); 2545 *_num_devs = 0; 2546 return NULL; 2547 } 2548 2549 for (i = 0; i < num_devs; i++) { 2550 dev = rdma_devs[i]; 2551 rc = ibv_query_device(dev, &dev_attr); 2552 if (rc) { 2553 SPDK_ERRLOG("Failed to query dev %s, skipping\n", dev->device->name); 2554 continue; 2555 } 2556 if (dev_attr.vendor_id != SPDK_MLX5_VENDOR_ID_MELLANOX) { 2557 SPDK_DEBUGLOG(accel_mlx5, "dev %s is not Mellanox device, skipping\n", dev->device->name); 2558 continue; 2559 } 2560 2561 if (g_accel_mlx5.allowed_devs_count) { 2562 dev_allowed = false; 2563 for (j = 0; j < g_accel_mlx5.allowed_devs_count; j++) { 2564 if (strcmp(g_accel_mlx5.allowed_devs[j], dev->device->name) == 0) { 2565 dev_allowed = true; 2566 break; 2567 } 2568 } 2569 if (!dev_allowed) { 2570 continue; 2571 } 2572 } 2573 2574 rdma_devs_out[num_devs_out] = dev; 2575 num_devs_out++; 2576 } 2577 2578 rdma_free_devices(rdma_devs); 2579 *_num_devs = num_devs_out; 2580 2581 return rdma_devs_out; 2582 } 2583 2584 static inline bool 2585 accel_mlx5_dev_supports_crypto(struct spdk_mlx5_device_caps *caps) 2586 { 2587 return caps->crypto_supported && !caps->crypto.wrapped_import_method_aes_xts && 2588 (caps->crypto.single_block_le_tweak || 2589 caps->crypto.multi_block_le_tweak || caps->crypto.multi_block_be_tweak); 2590 } 2591 2592 static int 2593 accel_mlx5_init(void) 2594 { 2595 struct spdk_mlx5_device_caps *caps; 2596 struct ibv_context **rdma_devs, *dev; 2597 int num_devs = 0, rc = 0, i; 2598 int best_dev = -1, first_dev = 0; 2599 int best_dev_stat = 0, dev_stat; 2600 bool supports_crypto; 2601 bool find_best_dev = g_accel_mlx5.allowed_devs_count == 0; 2602 2603 if (!g_accel_mlx5.enabled) { 2604 return -EINVAL; 2605 } 2606 2607 spdk_spin_init(&g_accel_mlx5.lock); 2608 rdma_devs = accel_mlx5_get_devices(&num_devs); 2609 if (!rdma_devs || !num_devs) { 2610 return -ENODEV; 2611 } 2612 caps = calloc(num_devs, sizeof(*caps)); 2613 if (!caps) { 2614 rc = -ENOMEM; 2615 goto cleanup; 2616 } 2617 2618 g_accel_mlx5.crypto_supported = true; 2619 g_accel_mlx5.crc32c_supported = true; 2620 g_accel_mlx5.num_ctxs = 0; 2621 2622 /* Iterate devices. We support an offload if all devices support it */ 2623 for (i = 0; i < num_devs; i++) { 2624 dev = rdma_devs[i]; 2625 2626 rc = spdk_mlx5_device_query_caps(dev, &caps[i]); 2627 if (rc) { 2628 SPDK_ERRLOG("Failed to get crypto caps, dev %s\n", dev->device->name); 2629 goto cleanup; 2630 } 2631 supports_crypto = accel_mlx5_dev_supports_crypto(&caps[i]); 2632 if (!supports_crypto) { 2633 SPDK_DEBUGLOG(accel_mlx5, "Disable crypto support because dev %s doesn't support it\n", 2634 rdma_devs[i]->device->name); 2635 g_accel_mlx5.crypto_supported = false; 2636 } 2637 if (!caps[i].crc32c_supported) { 2638 SPDK_DEBUGLOG(accel_mlx5, "Disable crc32c support because dev %s doesn't support it\n", 2639 rdma_devs[i]->device->name); 2640 g_accel_mlx5.crc32c_supported = false; 2641 } 2642 if (find_best_dev) { 2643 /* Find device which supports max number of offloads */ 2644 dev_stat = (int)supports_crypto + (int)caps[i].crc32c_supported; 2645 if (dev_stat > best_dev_stat) { 2646 best_dev_stat = dev_stat; 2647 best_dev = i; 2648 } 2649 } 2650 } 2651 2652 /* User didn't specify devices to use, try to select the best one */ 2653 if (find_best_dev) { 2654 if (best_dev == -1) { 2655 best_dev = 0; 2656 } 2657 g_accel_mlx5.crypto_supported = accel_mlx5_dev_supports_crypto(&caps[best_dev]); 2658 g_accel_mlx5.crc32c_supported = caps[best_dev].crc32c_supported; 2659 SPDK_NOTICELOG("Select dev %s, crypto %d, crc32c %d\n", rdma_devs[best_dev]->device->name, 2660 g_accel_mlx5.crypto_supported, g_accel_mlx5.crc32c_supported); 2661 first_dev = best_dev; 2662 num_devs = 1; 2663 if (g_accel_mlx5.crypto_supported) { 2664 const char *const dev_name[] = { rdma_devs[best_dev]->device->name }; 2665 /* Let mlx5 library know which device to use */ 2666 spdk_mlx5_crypto_devs_allow(dev_name, 1); 2667 } 2668 } else { 2669 SPDK_NOTICELOG("Found %d devices, crypto %d\n", num_devs, g_accel_mlx5.crypto_supported); 2670 } 2671 2672 g_accel_mlx5.dev_ctxs = calloc(num_devs, sizeof(*g_accel_mlx5.dev_ctxs)); 2673 if (!g_accel_mlx5.dev_ctxs) { 2674 SPDK_ERRLOG("Memory allocation failed\n"); 2675 rc = -ENOMEM; 2676 goto cleanup; 2677 } 2678 2679 for (i = first_dev; i < first_dev + num_devs; i++) { 2680 rc = accel_mlx5_dev_ctx_init(&g_accel_mlx5.dev_ctxs[g_accel_mlx5.num_ctxs++], 2681 rdma_devs[i], &caps[i]); 2682 if (rc) { 2683 goto cleanup; 2684 } 2685 } 2686 2687 SPDK_NOTICELOG("Accel framework mlx5 initialized, found %d devices.\n", num_devs); 2688 spdk_io_device_register(&g_accel_mlx5, accel_mlx5_create_cb, accel_mlx5_destroy_cb, 2689 sizeof(struct accel_mlx5_io_channel), "accel_mlx5"); 2690 g_accel_mlx5.initialized = true; 2691 free(rdma_devs); 2692 free(caps); 2693 2694 return 0; 2695 2696 cleanup: 2697 free(rdma_devs); 2698 free(caps); 2699 accel_mlx5_free_resources(); 2700 spdk_spin_destroy(&g_accel_mlx5.lock); 2701 2702 return rc; 2703 } 2704 2705 static void 2706 accel_mlx5_write_config_json(struct spdk_json_write_ctx *w) 2707 { 2708 if (g_accel_mlx5.enabled) { 2709 spdk_json_write_object_begin(w); 2710 spdk_json_write_named_string(w, "method", "mlx5_scan_accel_module"); 2711 spdk_json_write_named_object_begin(w, "params"); 2712 spdk_json_write_named_uint16(w, "qp_size", g_accel_mlx5.attr.qp_size); 2713 spdk_json_write_named_uint32(w, "num_requests", g_accel_mlx5.attr.num_requests); 2714 if (g_accel_mlx5.attr.allowed_devs) { 2715 spdk_json_write_named_string(w, "allowed_devs", g_accel_mlx5.attr.allowed_devs); 2716 } 2717 spdk_json_write_named_uint16(w, "crypto_split_blocks", g_accel_mlx5.attr.crypto_split_blocks); 2718 spdk_json_write_object_end(w); 2719 spdk_json_write_object_end(w); 2720 } 2721 } 2722 2723 static size_t 2724 accel_mlx5_get_ctx_size(void) 2725 { 2726 return sizeof(struct accel_mlx5_task); 2727 } 2728 2729 static int 2730 accel_mlx5_crypto_key_init(struct spdk_accel_crypto_key *key) 2731 { 2732 struct spdk_mlx5_crypto_dek_create_attr attr = {}; 2733 struct spdk_mlx5_crypto_keytag *keytag; 2734 int rc; 2735 2736 if (!key || !key->key || !key->key2 || !key->key_size || !key->key2_size) { 2737 return -EINVAL; 2738 } 2739 2740 attr.dek = calloc(1, key->key_size + key->key2_size); 2741 if (!attr.dek) { 2742 return -ENOMEM; 2743 } 2744 2745 memcpy(attr.dek, key->key, key->key_size); 2746 memcpy(attr.dek + key->key_size, key->key2, key->key2_size); 2747 attr.dek_len = key->key_size + key->key2_size; 2748 2749 rc = spdk_mlx5_crypto_keytag_create(&attr, &keytag); 2750 spdk_memset_s(attr.dek, attr.dek_len, 0, attr.dek_len); 2751 free(attr.dek); 2752 if (rc) { 2753 SPDK_ERRLOG("Failed to create a keytag, rc %d\n", rc); 2754 return rc; 2755 } 2756 2757 key->priv = keytag; 2758 2759 return 0; 2760 } 2761 2762 static void 2763 accel_mlx5_crypto_key_deinit(struct spdk_accel_crypto_key *key) 2764 { 2765 if (!key || key->module_if != &g_accel_mlx5.module || !key->priv) { 2766 return; 2767 } 2768 2769 spdk_mlx5_crypto_keytag_destroy(key->priv); 2770 } 2771 2772 static void 2773 accel_mlx5_dump_stats_json(struct spdk_json_write_ctx *w, const char *header, 2774 const struct accel_mlx5_stats *stats) 2775 { 2776 double idle_polls_percentage = 0; 2777 double cpls_per_poll = 0; 2778 uint64_t total_tasks = 0; 2779 int i; 2780 2781 if (stats->polls) { 2782 idle_polls_percentage = (double) stats->idle_polls * 100 / stats->polls; 2783 } 2784 if (stats->polls > stats->idle_polls) { 2785 cpls_per_poll = (double) stats->completions / (stats->polls - stats->idle_polls); 2786 } 2787 for (i = 0; i < ACCEL_MLX5_OPC_LAST; i++) { 2788 total_tasks += stats->opcodes[i]; 2789 } 2790 2791 spdk_json_write_named_object_begin(w, header); 2792 2793 spdk_json_write_named_object_begin(w, "umrs"); 2794 spdk_json_write_named_uint64(w, "crypto_umrs", stats->crypto_umrs); 2795 spdk_json_write_named_uint64(w, "sig_umrs", stats->sig_umrs); 2796 spdk_json_write_named_uint64(w, "total", stats->crypto_umrs + stats->sig_umrs); 2797 spdk_json_write_object_end(w); 2798 2799 spdk_json_write_named_object_begin(w, "rdma"); 2800 spdk_json_write_named_uint64(w, "read", stats->rdma_reads); 2801 spdk_json_write_named_uint64(w, "write", stats->rdma_writes); 2802 spdk_json_write_named_uint64(w, "total", stats->rdma_reads + stats->rdma_writes); 2803 spdk_json_write_object_end(w); 2804 2805 spdk_json_write_named_object_begin(w, "polling"); 2806 spdk_json_write_named_uint64(w, "polls", stats->polls); 2807 spdk_json_write_named_uint64(w, "idle_polls", stats->idle_polls); 2808 spdk_json_write_named_uint64(w, "completions", stats->completions); 2809 spdk_json_write_named_double(w, "idle_polls_percentage", idle_polls_percentage); 2810 spdk_json_write_named_double(w, "cpls_per_poll", cpls_per_poll); 2811 spdk_json_write_named_uint64(w, "nomem_qdepth", stats->nomem_qdepth); 2812 spdk_json_write_named_uint64(w, "nomem_mkey", stats->nomem_mkey); 2813 spdk_json_write_object_end(w); 2814 2815 spdk_json_write_named_object_begin(w, "tasks"); 2816 spdk_json_write_named_uint64(w, "copy", stats->opcodes[ACCEL_MLX5_OPC_COPY]); 2817 spdk_json_write_named_uint64(w, "crypto", stats->opcodes[ACCEL_MLX5_OPC_CRYPTO]); 2818 spdk_json_write_named_uint64(w, "crc32c", stats->opcodes[ACCEL_MLX5_OPC_CRC32C]); 2819 spdk_json_write_named_uint64(w, "total", total_tasks); 2820 spdk_json_write_object_end(w); 2821 2822 spdk_json_write_object_end(w); 2823 } 2824 2825 static void 2826 accel_mlx5_dump_channel_stat(struct spdk_io_channel_iter *i) 2827 { 2828 struct accel_mlx5_stats ch_stat = {}; 2829 struct accel_mlx5_dump_stats_ctx *ctx; 2830 struct spdk_io_channel *_ch; 2831 struct accel_mlx5_io_channel *ch; 2832 struct accel_mlx5_dev *dev; 2833 uint32_t j; 2834 2835 ctx = spdk_io_channel_iter_get_ctx(i); 2836 _ch = spdk_io_channel_iter_get_channel(i); 2837 ch = spdk_io_channel_get_ctx(_ch); 2838 2839 if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 2840 spdk_json_write_object_begin(ctx->w); 2841 spdk_json_write_named_object_begin(ctx->w, spdk_thread_get_name(spdk_get_thread())); 2842 } 2843 if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) { 2844 spdk_json_write_named_array_begin(ctx->w, "devices"); 2845 } 2846 2847 for (j = 0; j < ch->num_devs; j++) { 2848 dev = &ch->devs[j]; 2849 /* Save grand total and channel stats */ 2850 accel_mlx5_add_stats(&ctx->total, &dev->stats); 2851 accel_mlx5_add_stats(&ch_stat, &dev->stats); 2852 if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) { 2853 spdk_json_write_object_begin(ctx->w); 2854 accel_mlx5_dump_stats_json(ctx->w, dev->dev_ctx->context->device->name, &dev->stats); 2855 spdk_json_write_object_end(ctx->w); 2856 } 2857 } 2858 2859 if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) { 2860 spdk_json_write_array_end(ctx->w); 2861 } 2862 if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 2863 accel_mlx5_dump_stats_json(ctx->w, "channel_total", &ch_stat); 2864 spdk_json_write_object_end(ctx->w); 2865 spdk_json_write_object_end(ctx->w); 2866 } 2867 2868 spdk_for_each_channel_continue(i, 0); 2869 } 2870 2871 static void 2872 accel_mlx5_dump_channel_stat_done(struct spdk_io_channel_iter *i, int status) 2873 { 2874 struct accel_mlx5_dump_stats_ctx *ctx; 2875 2876 ctx = spdk_io_channel_iter_get_ctx(i); 2877 2878 spdk_spin_lock(&g_accel_mlx5.lock); 2879 /* Add statistics from destroyed channels */ 2880 accel_mlx5_add_stats(&ctx->total, &g_accel_mlx5.stats); 2881 spdk_spin_unlock(&g_accel_mlx5.lock); 2882 2883 if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 2884 /* channels[] */ 2885 spdk_json_write_array_end(ctx->w); 2886 } 2887 2888 accel_mlx5_dump_stats_json(ctx->w, "total", &ctx->total); 2889 2890 /* Ends the whole response which was begun in accel_mlx5_dump_stats */ 2891 spdk_json_write_object_end(ctx->w); 2892 2893 ctx->cb(ctx->ctx, 0); 2894 free(ctx); 2895 } 2896 2897 int 2898 accel_mlx5_dump_stats(struct spdk_json_write_ctx *w, enum accel_mlx5_dump_state_level level, 2899 accel_mlx5_dump_stat_done_cb cb, void *ctx) 2900 { 2901 struct accel_mlx5_dump_stats_ctx *stat_ctx; 2902 2903 if (!w || !cb) { 2904 return -EINVAL; 2905 } 2906 if (!g_accel_mlx5.initialized) { 2907 return -ENODEV; 2908 } 2909 2910 stat_ctx = calloc(1, sizeof(*stat_ctx)); 2911 if (!stat_ctx) { 2912 return -ENOMEM; 2913 } 2914 stat_ctx->cb = cb; 2915 stat_ctx->ctx = ctx; 2916 stat_ctx->level = level; 2917 stat_ctx->w = w; 2918 2919 spdk_json_write_object_begin(w); 2920 2921 if (level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 2922 spdk_json_write_named_array_begin(w, "channels"); 2923 } 2924 2925 spdk_for_each_channel(&g_accel_mlx5, accel_mlx5_dump_channel_stat, stat_ctx, 2926 accel_mlx5_dump_channel_stat_done); 2927 2928 return 0; 2929 } 2930 2931 static bool 2932 accel_mlx5_crypto_supports_cipher(enum spdk_accel_cipher cipher, size_t key_size) 2933 { 2934 switch (cipher) { 2935 case SPDK_ACCEL_CIPHER_AES_XTS: 2936 return key_size == SPDK_ACCEL_AES_XTS_128_KEY_SIZE || key_size == SPDK_ACCEL_AES_XTS_256_KEY_SIZE; 2937 default: 2938 return false; 2939 } 2940 } 2941 2942 static int 2943 accel_mlx5_get_memory_domains(struct spdk_memory_domain **domains, int array_size) 2944 { 2945 int i, size; 2946 2947 if (!domains || !array_size) { 2948 return (int)g_accel_mlx5.num_ctxs; 2949 } 2950 2951 size = spdk_min(array_size, (int)g_accel_mlx5.num_ctxs); 2952 2953 for (i = 0; i < size; i++) { 2954 domains[i] = g_accel_mlx5.dev_ctxs[i].domain; 2955 } 2956 2957 return (int)g_accel_mlx5.num_ctxs; 2958 } 2959 2960 static struct accel_mlx5_module g_accel_mlx5 = { 2961 .module = { 2962 .module_init = accel_mlx5_init, 2963 .module_fini = accel_mlx5_deinit, 2964 .write_config_json = accel_mlx5_write_config_json, 2965 .get_ctx_size = accel_mlx5_get_ctx_size, 2966 .name = "mlx5", 2967 .supports_opcode = accel_mlx5_supports_opcode, 2968 .get_io_channel = accel_mlx5_get_io_channel, 2969 .submit_tasks = accel_mlx5_submit_tasks, 2970 .crypto_key_init = accel_mlx5_crypto_key_init, 2971 .crypto_key_deinit = accel_mlx5_crypto_key_deinit, 2972 .crypto_supports_cipher = accel_mlx5_crypto_supports_cipher, 2973 .get_memory_domains = accel_mlx5_get_memory_domains, 2974 } 2975 }; 2976 2977 SPDK_LOG_REGISTER_COMPONENT(accel_mlx5) 2978