1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 */ 4 5 #include "spdk/env.h" 6 #include "spdk/thread.h" 7 #include "spdk/queue.h" 8 #include "spdk/log.h" 9 #include "spdk/string.h" 10 #include "spdk/likely.h" 11 #include "spdk/dma.h" 12 #include "spdk/json.h" 13 #include "spdk/util.h" 14 15 #include "spdk_internal/mlx5.h" 16 #include "spdk_internal/rdma_utils.h" 17 #include "spdk/accel_module.h" 18 #include "spdk_internal/assert.h" 19 #include "spdk_internal/sgl.h" 20 #include "accel_mlx5.h" 21 22 #include <infiniband/mlx5dv.h> 23 #include <rdma/rdma_cma.h> 24 25 #define ACCEL_MLX5_QP_SIZE (256u) 26 #define ACCEL_MLX5_NUM_REQUESTS (2048u - 1) 27 #define ACCEL_MLX5_RECOVER_POLLER_PERIOD_US (10000) 28 #define ACCEL_MLX5_MAX_SGE (16u) 29 #define ACCEL_MLX5_MAX_WC (64u) 30 #define ACCEL_MLX5_MAX_MKEYS_IN_TASK (16u) 31 32 /* Assume we have up to 16 devices */ 33 #define ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN ((SPDK_MLX5_DEV_MAX_NAME_LEN + 1) * 16) 34 35 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, task) \ 36 do { \ 37 assert((qp)->wrs_submitted < (qp)->wrs_max); \ 38 (qp)->wrs_submitted++; \ 39 (qp)->ring_db = true; \ 40 assert((task)->num_wrs < UINT16_MAX); \ 41 (task)->num_wrs++; \ 42 } while (0) 43 44 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, task) \ 45 do { \ 46 assert((dev)->wrs_in_cq < (dev)->wrs_in_cq_max); \ 47 (dev)->wrs_in_cq++; \ 48 assert((qp)->wrs_submitted < (qp)->wrs_max); \ 49 (qp)->wrs_submitted++; \ 50 (qp)->ring_db = true; \ 51 assert((task)->num_wrs < UINT16_MAX); \ 52 (task)->num_wrs++; \ 53 } while (0) 54 55 struct accel_mlx5_io_channel; 56 struct accel_mlx5_task; 57 58 struct accel_mlx5_dev_ctx { 59 struct ibv_context *context; 60 struct ibv_pd *pd; 61 struct spdk_memory_domain *domain; 62 struct spdk_mempool *psv_pool; 63 TAILQ_ENTRY(accel_mlx5_dev_ctx) link; 64 struct spdk_mlx5_psv **psvs; 65 bool crypto_mkeys; 66 bool sig_mkeys; 67 bool crypto_multi_block; 68 }; 69 70 enum accel_mlx5_opcode { 71 ACCEL_MLX5_OPC_COPY, 72 ACCEL_MLX5_OPC_CRYPTO, 73 ACCEL_MLX5_OPC_CRC32C, 74 ACCEL_MLX5_OPC_CRYPTO_MKEY, 75 ACCEL_MLX5_OPC_LAST 76 }; 77 78 SPDK_STATIC_ASSERT(ACCEL_MLX5_OPC_LAST <= 0xf, 79 "accel opcode exceeds 4 bits, update accel_mlx5 struct"); 80 81 struct accel_mlx5_stats { 82 uint64_t crypto_umrs; 83 uint64_t sig_umrs; 84 uint64_t rdma_reads; 85 uint64_t rdma_writes; 86 uint64_t polls; 87 uint64_t idle_polls; 88 uint64_t completions; 89 uint64_t nomem_qdepth; 90 uint64_t nomem_mkey; 91 uint64_t opcodes[ACCEL_MLX5_OPC_LAST]; 92 }; 93 94 struct accel_mlx5_module { 95 struct spdk_accel_module_if module; 96 struct accel_mlx5_stats stats; 97 struct spdk_spinlock lock; 98 struct accel_mlx5_dev_ctx *dev_ctxs; 99 uint32_t num_ctxs; 100 struct accel_mlx5_attr attr; 101 char **allowed_devs; 102 size_t allowed_devs_count; 103 bool initialized; 104 bool enabled; 105 bool crypto_supported; 106 bool crc32c_supported; 107 }; 108 109 struct accel_mlx5_sge { 110 uint32_t src_sge_count; 111 uint32_t dst_sge_count; 112 struct ibv_sge src_sge[ACCEL_MLX5_MAX_SGE]; 113 struct ibv_sge dst_sge[ACCEL_MLX5_MAX_SGE]; 114 }; 115 116 struct accel_mlx5_iov_sgl { 117 struct iovec *iov; 118 uint32_t iovcnt; 119 uint32_t iov_offset; 120 }; 121 122 struct accel_mlx5_psv_wrapper { 123 uint32_t psv_index; 124 struct { 125 uint32_t error : 1; 126 uint32_t reserved : 31; 127 } bits; 128 /* mlx5 engine requires DMAable memory, use this member to copy user's crc value since we don't know which 129 * memory it is in */ 130 uint32_t crc; 131 uint32_t crc_lkey; 132 }; 133 134 struct accel_mlx5_task { 135 struct spdk_accel_task base; 136 struct accel_mlx5_iov_sgl src; 137 struct accel_mlx5_iov_sgl dst; 138 struct accel_mlx5_qp *qp; 139 STAILQ_ENTRY(accel_mlx5_task) link; 140 uint16_t num_reqs; 141 uint16_t num_completed_reqs; 142 uint16_t num_submitted_reqs; 143 uint16_t num_ops; /* number of allocated mkeys or number of operations */ 144 uint16_t num_wrs; /* Number of outstanding operations which consume qp slot */ 145 union { 146 struct { 147 uint16_t blocks_per_req; 148 uint16_t num_processed_blocks; 149 uint16_t num_blocks; 150 }; 151 struct { 152 struct accel_mlx5_psv_wrapper *psv; 153 uint32_t last_umr_len; 154 uint8_t last_mkey_idx; 155 }; 156 }; 157 union { 158 uint16_t raw; 159 struct { 160 uint16_t inplace : 1; 161 uint16_t driver_seq : 1; 162 uint16_t needs_data_transfer : 1; 163 uint16_t enc_order : 2; 164 uint16_t mlx5_opcode: 4; 165 }; 166 }; 167 /* Keep this array last since not all elements might be accessed, this reduces amount of data to be 168 * cached */ 169 struct spdk_mlx5_mkey_pool_obj *mkeys[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 170 }; 171 172 SPDK_STATIC_ASSERT(ACCEL_MLX5_MAX_MKEYS_IN_TASK <= UINT8_MAX, "uint8_t is used to iterate mkeys"); 173 174 struct accel_mlx5_qp { 175 struct spdk_mlx5_qp *qp; 176 struct ibv_qp *verbs_qp; 177 struct accel_mlx5_dev *dev; 178 /* tasks submitted to HW. We can't complete a task even in error case until we reap completions for all 179 * submitted requests */ 180 STAILQ_HEAD(, accel_mlx5_task) in_hw; 181 uint16_t wrs_submitted; 182 uint16_t wrs_max; 183 bool ring_db; 184 bool recovering; 185 struct spdk_poller *recover_poller; 186 }; 187 188 struct accel_mlx5_dev { 189 struct accel_mlx5_qp qp; 190 struct spdk_mlx5_cq *cq; 191 struct spdk_mlx5_mkey_pool *crypto_mkeys; 192 struct spdk_mlx5_mkey_pool *sig_mkeys; 193 struct spdk_rdma_utils_mem_map *mmap; 194 struct accel_mlx5_dev_ctx *dev_ctx; 195 struct spdk_io_channel *ch; 196 uint16_t wrs_in_cq; 197 uint16_t wrs_in_cq_max; 198 uint16_t crypto_split_blocks; 199 bool crypto_multi_block; 200 /* Pending tasks waiting for requests resources */ 201 STAILQ_HEAD(, accel_mlx5_task) nomem; 202 TAILQ_ENTRY(accel_mlx5_dev) link; 203 struct accel_mlx5_stats stats; 204 }; 205 206 struct accel_mlx5_io_channel { 207 struct accel_mlx5_dev *devs; 208 struct spdk_poller *poller; 209 uint16_t num_devs; 210 /* Index in \b devs to be used for operations in round-robin way */ 211 uint16_t dev_idx; 212 bool poller_handler_registered; 213 }; 214 215 struct accel_mlx5_task_operations { 216 int (*init)(struct accel_mlx5_task *task); 217 int (*process)(struct accel_mlx5_task *task); 218 int (*cont)(struct accel_mlx5_task *task); 219 void (*complete)(struct accel_mlx5_task *task); 220 }; 221 222 struct accel_mlx5_psv_pool_iter_cb_args { 223 struct accel_mlx5_dev_ctx *dev; 224 struct spdk_rdma_utils_mem_map *map; 225 int rc; 226 }; 227 228 struct accel_mlx5_dump_stats_ctx { 229 struct accel_mlx5_stats total; 230 struct spdk_json_write_ctx *w; 231 enum accel_mlx5_dump_state_level level; 232 accel_mlx5_dump_stat_done_cb cb; 233 void *ctx; 234 }; 235 236 static struct accel_mlx5_module g_accel_mlx5; 237 static struct spdk_accel_driver g_accel_mlx5_driver; 238 239 static inline int accel_mlx5_execute_sequence(struct spdk_io_channel *ch, 240 struct spdk_accel_sequence *seq); 241 static inline void accel_mlx5_task_complete(struct accel_mlx5_task *mlx5_task); 242 243 static inline void 244 accel_mlx5_iov_sgl_init(struct accel_mlx5_iov_sgl *s, struct iovec *iov, uint32_t iovcnt) 245 { 246 s->iov = iov; 247 s->iovcnt = iovcnt; 248 s->iov_offset = 0; 249 } 250 251 static inline void 252 accel_mlx5_iov_sgl_advance(struct accel_mlx5_iov_sgl *s, uint32_t step) 253 { 254 s->iov_offset += step; 255 while (s->iovcnt > 0) { 256 assert(s->iov != NULL); 257 if (s->iov_offset < s->iov->iov_len) { 258 break; 259 } 260 261 s->iov_offset -= s->iov->iov_len; 262 s->iov++; 263 s->iovcnt--; 264 } 265 } 266 267 static inline void 268 accel_mlx5_iov_sgl_unwind(struct accel_mlx5_iov_sgl *s, uint32_t max_iovs, uint32_t step) 269 { 270 SPDK_DEBUGLOG(accel_mlx5, "iov %p, iovcnt %u, max %u, offset %u, step %u\n", s->iov, s->iovcnt, 271 max_iovs, s->iov_offset, step); 272 while (s->iovcnt <= max_iovs) { 273 assert(s->iov != NULL); 274 if (s->iov_offset >= step) { 275 s->iov_offset -= step; 276 SPDK_DEBUGLOG(accel_mlx5, "\tEND, iov %p, iovcnt %u, offset %u\n", s->iov, s->iovcnt, 277 s->iov_offset); 278 return; 279 } 280 step -= s->iov_offset; 281 s->iov--; 282 s->iovcnt++; 283 s->iov_offset = s->iov->iov_len; 284 SPDK_DEBUGLOG(accel_mlx5, "\tiov %p, iovcnt %u, offset %u, step %u\n", s->iov, s->iovcnt, 285 s->iov_offset, step); 286 } 287 288 SPDK_ERRLOG("Can't unwind iovs, remaining %u\n", step); 289 assert(0); 290 } 291 292 static inline int 293 accel_mlx5_sge_unwind(struct ibv_sge *sge, uint32_t sge_count, uint32_t step) 294 { 295 int i; 296 297 assert(sge_count > 0); 298 SPDK_DEBUGLOG(accel_mlx5, "sge %p, count %u, step %u\n", sge, sge_count, step); 299 for (i = (int)sge_count - 1; i >= 0; i--) { 300 if (sge[i].length > step) { 301 sge[i].length -= step; 302 SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step); 303 return (int)i + 1; 304 } 305 SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step); 306 step -= sge[i].length; 307 } 308 309 SPDK_ERRLOG("Can't unwind sge, remaining %u\n", step); 310 assert(step == 0); 311 312 return 0; 313 } 314 315 static inline void 316 accel_mlx5_crypto_task_complete(struct accel_mlx5_task *task) 317 { 318 struct accel_mlx5_dev *dev = task->qp->dev; 319 320 assert(task->num_ops); 321 spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops); 322 spdk_accel_task_complete(&task->base, 0); 323 } 324 325 static inline void 326 accel_mlx5_task_fail(struct accel_mlx5_task *task, int rc) 327 { 328 struct accel_mlx5_dev *dev = task->qp->dev; 329 struct spdk_accel_task *next; 330 struct spdk_accel_sequence *seq; 331 bool driver_seq; 332 333 assert(task->num_reqs == task->num_completed_reqs); 334 SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->base.op_code, rc); 335 336 if (task->num_ops) { 337 if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO || task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO_MKEY) { 338 spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops); 339 } 340 if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRC32C) { 341 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops); 342 spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv); 343 } 344 } 345 next = spdk_accel_sequence_next_task(&task->base); 346 seq = task->base.seq; 347 driver_seq = task->driver_seq; 348 349 assert(task->num_reqs == task->num_completed_reqs); 350 SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->mlx5_opcode, rc); 351 spdk_accel_task_complete(&task->base, rc); 352 353 if (driver_seq) { 354 struct spdk_io_channel *ch = task->qp->dev->ch; 355 356 assert(seq); 357 if (next) { 358 accel_mlx5_execute_sequence(ch, seq); 359 } else { 360 spdk_accel_sequence_continue(seq); 361 } 362 } 363 } 364 365 static int 366 accel_mlx5_translate_addr(void *addr, size_t size, struct spdk_memory_domain *domain, 367 void *domain_ctx, struct accel_mlx5_dev *dev, struct ibv_sge *sge) 368 { 369 struct spdk_rdma_utils_memory_translation map_translation; 370 struct spdk_memory_domain_translation_result domain_translation; 371 struct spdk_memory_domain_translation_ctx local_ctx; 372 int rc; 373 374 if (domain) { 375 domain_translation.size = sizeof(struct spdk_memory_domain_translation_result); 376 local_ctx.size = sizeof(local_ctx); 377 local_ctx.rdma.ibv_qp = dev->qp.verbs_qp; 378 rc = spdk_memory_domain_translate_data(domain, domain_ctx, dev->dev_ctx->domain, 379 &local_ctx, addr, size, &domain_translation); 380 if (spdk_unlikely(rc || domain_translation.iov_count != 1)) { 381 SPDK_ERRLOG("Memory domain translation failed, addr %p, length %zu, iovcnt %u\n", addr, size, 382 domain_translation.iov_count); 383 if (rc == 0) { 384 rc = -EINVAL; 385 } 386 387 return rc; 388 } 389 sge->lkey = domain_translation.rdma.lkey; 390 sge->addr = (uint64_t) domain_translation.iov.iov_base; 391 sge->length = domain_translation.iov.iov_len; 392 } else { 393 rc = spdk_rdma_utils_get_translation(dev->mmap, addr, size, 394 &map_translation); 395 if (spdk_unlikely(rc)) { 396 SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", addr, size); 397 return rc; 398 } 399 sge->lkey = spdk_rdma_utils_memory_translation_get_lkey(&map_translation); 400 sge->addr = (uint64_t)addr; 401 sge->length = size; 402 } 403 404 return 0; 405 } 406 407 static inline int 408 accel_mlx5_fill_block_sge(struct accel_mlx5_dev *dev, struct ibv_sge *sge, 409 struct accel_mlx5_iov_sgl *iovs, uint32_t len, uint32_t *_remaining, 410 struct spdk_memory_domain *domain, void *domain_ctx) 411 { 412 void *addr; 413 uint32_t remaining = len; 414 uint32_t size; 415 int i = 0; 416 int rc; 417 418 while (remaining && i < (int)ACCEL_MLX5_MAX_SGE) { 419 size = spdk_min(remaining, iovs->iov->iov_len - iovs->iov_offset); 420 addr = (void *)iovs->iov->iov_base + iovs->iov_offset; 421 rc = accel_mlx5_translate_addr(addr, size, domain, domain_ctx, dev, &sge[i]); 422 if (spdk_unlikely(rc)) { 423 return rc; 424 } 425 SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d]: lkey %u, len %u, addr %"PRIx64"\n", i, sge[i].lkey, 426 sge[i].length, sge[i].addr); 427 accel_mlx5_iov_sgl_advance(iovs, size); 428 i++; 429 assert(remaining >= size); 430 remaining -= size; 431 } 432 *_remaining = remaining; 433 434 return i; 435 } 436 437 static inline bool 438 accel_mlx5_compare_iovs(struct iovec *v1, struct iovec *v2, uint32_t iovcnt) 439 { 440 return memcmp(v1, v2, sizeof(*v1) * iovcnt) == 0; 441 } 442 443 static inline uint16_t 444 accel_mlx5_dev_get_available_slots(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp) 445 { 446 assert(qp->wrs_max >= qp->wrs_submitted); 447 assert(dev->wrs_in_cq_max >= dev->wrs_in_cq); 448 449 /* Each time we produce only 1 CQE, so we need 1 CQ slot */ 450 if (spdk_unlikely(dev->wrs_in_cq == dev->wrs_in_cq_max)) { 451 return 0; 452 } 453 454 return qp->wrs_max - qp->wrs_submitted; 455 } 456 457 static inline uint32_t 458 accel_mlx5_task_alloc_mkeys(struct accel_mlx5_task *task, struct spdk_mlx5_mkey_pool *pool) 459 { 460 uint32_t num_ops; 461 int rc; 462 463 assert(task->num_reqs > task->num_completed_reqs); 464 num_ops = task->num_reqs - task->num_completed_reqs; 465 num_ops = spdk_min(num_ops, ACCEL_MLX5_MAX_MKEYS_IN_TASK); 466 if (!num_ops) { 467 return 0; 468 } 469 rc = spdk_mlx5_mkey_pool_get_bulk(pool, task->mkeys, num_ops); 470 if (spdk_unlikely(rc)) { 471 return 0; 472 } 473 assert(num_ops <= UINT16_MAX); 474 task->num_ops = num_ops; 475 476 return num_ops; 477 } 478 479 static inline uint8_t 480 bs_to_bs_selector(uint32_t bs) 481 { 482 switch (bs) { 483 case 512: 484 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_512; 485 case 520: 486 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_520; 487 case 4096: 488 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4096; 489 case 4160: 490 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4160; 491 default: 492 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED; 493 } 494 } 495 496 static inline int 497 accel_mlx5_configure_crypto_umr(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge, 498 uint32_t mkey, uint32_t num_blocks, struct spdk_mlx5_crypto_dek_data *dek_data, 499 uint64_t wr_id, uint32_t flags) 500 { 501 struct spdk_mlx5_umr_crypto_attr cattr; 502 struct spdk_mlx5_umr_attr umr_attr; 503 struct accel_mlx5_qp *qp = mlx5_task->qp; 504 struct accel_mlx5_dev *dev = qp->dev; 505 struct spdk_accel_task *task = &mlx5_task->base; 506 uint32_t length, remaining = 0, block_size = task->block_size; 507 int rc; 508 509 length = num_blocks * block_size; 510 SPDK_DEBUGLOG(accel_mlx5, "task %p, domain %p, len %u, blocks %u\n", task, task->src_domain, length, 511 num_blocks); 512 rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src, length, &remaining, 513 task->src_domain, task->src_domain_ctx); 514 if (spdk_unlikely(rc <= 0)) { 515 if (rc == 0) { 516 rc = -EINVAL; 517 } 518 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 519 return rc; 520 } 521 sge->src_sge_count = rc; 522 if (spdk_unlikely(remaining)) { 523 uint32_t new_len = length - remaining; 524 uint32_t aligned_len, updated_num_blocks; 525 526 SPDK_DEBUGLOG(accel_mlx5, "Incorrect src iovs, handled %u out of %u bytes\n", new_len, length); 527 if (new_len < block_size) { 528 /* We need to process at least 1 block. If buffer is too fragmented, we can't do 529 * anything */ 530 return -ERANGE; 531 } 532 533 /* Regular integer division, we need to round down to prev block size */ 534 updated_num_blocks = new_len / block_size; 535 assert(updated_num_blocks); 536 assert(updated_num_blocks < num_blocks); 537 aligned_len = updated_num_blocks * block_size; 538 539 if (aligned_len < new_len) { 540 uint32_t dt = new_len - aligned_len; 541 542 /* We can't process part of block, need to unwind src iov_sgl and sge to the 543 * prev block boundary */ 544 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt); 545 accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt); 546 sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt); 547 if (!sge->src_sge_count) { 548 return -ERANGE; 549 } 550 } 551 SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len); 552 length = aligned_len; 553 num_blocks = updated_num_blocks; 554 } 555 556 cattr.xts_iv = task->iv + mlx5_task->num_processed_blocks; 557 cattr.keytag = 0; 558 cattr.dek_obj_id = dek_data->dek_obj_id; 559 cattr.tweak_mode = dek_data->tweak_mode; 560 cattr.enc_order = mlx5_task->enc_order; 561 cattr.bs_selector = bs_to_bs_selector(mlx5_task->base.block_size); 562 if (spdk_unlikely(cattr.bs_selector == SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED)) { 563 SPDK_ERRLOG("unsupported block size %u\n", mlx5_task->base.block_size); 564 return -EINVAL; 565 } 566 umr_attr.mkey = mkey; 567 umr_attr.sge = sge->src_sge; 568 569 if (!mlx5_task->inplace) { 570 SPDK_DEBUGLOG(accel_mlx5, "task %p, dst sge, domain %p, len %u\n", task, task->dst_domain, length); 571 rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, length, &remaining, 572 task->dst_domain, task->dst_domain_ctx); 573 if (spdk_unlikely(rc <= 0)) { 574 if (rc == 0) { 575 rc = -EINVAL; 576 } 577 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 578 return rc; 579 } 580 sge->dst_sge_count = rc; 581 if (spdk_unlikely(remaining)) { 582 uint32_t new_len = length - remaining; 583 uint32_t aligned_len, updated_num_blocks, dt; 584 585 SPDK_DEBUGLOG(accel_mlx5, "Incorrect dst iovs, handled %u out of %u bytes\n", new_len, length); 586 if (new_len < block_size) { 587 /* We need to process at least 1 block. If buffer is too fragmented, we can't do 588 * anything */ 589 return -ERANGE; 590 } 591 592 /* Regular integer division, we need to round down to prev block size */ 593 updated_num_blocks = new_len / block_size; 594 assert(updated_num_blocks); 595 assert(updated_num_blocks < num_blocks); 596 aligned_len = updated_num_blocks * block_size; 597 598 if (aligned_len < new_len) { 599 dt = new_len - aligned_len; 600 assert(dt > 0 && dt < length); 601 /* We can't process part of block, need to unwind src and dst iov_sgl and sge to the 602 * prev block boundary */ 603 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind dst sge for %u bytes\n", task, dt); 604 accel_mlx5_iov_sgl_unwind(&mlx5_task->dst, task->d.iovcnt, dt); 605 sge->dst_sge_count = accel_mlx5_sge_unwind(sge->dst_sge, sge->dst_sge_count, dt); 606 assert(sge->dst_sge_count > 0 && sge->dst_sge_count <= ACCEL_MLX5_MAX_SGE); 607 if (!sge->dst_sge_count) { 608 return -ERANGE; 609 } 610 } 611 assert(length > aligned_len); 612 dt = length - aligned_len; 613 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt); 614 /* The same for src iov_sgl and sge. In worst case we can unwind SRC 2 times */ 615 accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt); 616 sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt); 617 assert(sge->src_sge_count > 0 && sge->src_sge_count <= ACCEL_MLX5_MAX_SGE); 618 if (!sge->src_sge_count) { 619 return -ERANGE; 620 } 621 SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len); 622 length = aligned_len; 623 num_blocks = updated_num_blocks; 624 } 625 } 626 627 SPDK_DEBUGLOG(accel_mlx5, 628 "task %p: bs %u, iv %"PRIu64", enc_on_tx %d, tweak_mode %d, len %u, mkey %x, blocks %u\n", 629 mlx5_task, task->block_size, cattr.xts_iv, mlx5_task->enc_order, cattr.tweak_mode, length, mkey, 630 num_blocks); 631 632 umr_attr.sge_count = sge->src_sge_count; 633 umr_attr.umr_len = length; 634 assert((uint32_t)mlx5_task->num_processed_blocks + num_blocks <= UINT16_MAX); 635 mlx5_task->num_processed_blocks += num_blocks; 636 637 rc = spdk_mlx5_umr_configure_crypto(qp->qp, &umr_attr, &cattr, wr_id, flags); 638 639 return rc; 640 } 641 642 static inline int 643 accel_mlx5_crypto_task_process(struct accel_mlx5_task *mlx5_task) 644 { 645 struct accel_mlx5_sge sges[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 646 struct spdk_mlx5_crypto_dek_data dek_data; 647 struct accel_mlx5_qp *qp = mlx5_task->qp; 648 struct accel_mlx5_dev *dev = qp->dev; 649 /* First RDMA after UMR must have a SMALL_FENCE */ 650 uint32_t first_rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE; 651 uint16_t num_blocks; 652 uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs, 653 mlx5_task->num_ops); 654 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 655 uint16_t i; 656 int rc; 657 658 assert(qp_slot > 1); 659 num_ops = spdk_min(num_ops, qp_slot >> 1); 660 if (spdk_unlikely(!num_ops)) { 661 return -EINVAL; 662 } 663 664 rc = spdk_mlx5_crypto_get_dek_data(mlx5_task->base.crypto_key->priv, dev->dev_ctx->pd, &dek_data); 665 if (spdk_unlikely(rc)) { 666 return rc; 667 } 668 669 mlx5_task->num_wrs = 0; 670 SPDK_DEBUGLOG(accel_mlx5, "begin, task, %p, reqs: total %u, submitted %u, completed %u\n", 671 mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 672 for (i = 0; i < num_ops; i++) { 673 if (mlx5_task->num_submitted_reqs + i + 1 == mlx5_task->num_reqs) { 674 /* Last request may consume less than calculated if crypto_multi_block is true */ 675 assert(mlx5_task->num_blocks > mlx5_task->num_submitted_reqs); 676 num_blocks = mlx5_task->num_blocks - mlx5_task->num_processed_blocks; 677 } else { 678 num_blocks = mlx5_task->blocks_per_req; 679 } 680 681 rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sges[i], mlx5_task->mkeys[i]->mkey, num_blocks, 682 &dek_data, 0, 0); 683 if (spdk_unlikely(rc)) { 684 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 685 return rc; 686 } 687 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 688 dev->stats.crypto_umrs++; 689 } 690 691 /* Loop `num_ops - 1` for easy flags handling */ 692 for (i = 0; i < num_ops - 1; i++) { 693 /* UMR is used as a destination for RDMA_READ - from UMR to sge */ 694 if (mlx5_task->inplace) { 695 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0, 696 mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence); 697 } else { 698 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0, 699 mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence); 700 } 701 if (spdk_unlikely(rc)) { 702 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 703 return rc; 704 } 705 706 first_rdma_fence = 0; 707 assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs); 708 assert(mlx5_task->num_submitted_reqs < UINT16_MAX); 709 mlx5_task->num_submitted_reqs++; 710 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 711 dev->stats.rdma_reads++; 712 } 713 714 if (mlx5_task->inplace) { 715 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0, 716 mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 717 } else { 718 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0, 719 mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 720 } 721 if (spdk_unlikely(rc)) { 722 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 723 return rc; 724 } 725 726 assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs); 727 assert(mlx5_task->num_submitted_reqs < UINT16_MAX); 728 mlx5_task->num_submitted_reqs++; 729 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 730 dev->stats.rdma_reads++; 731 STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link); 732 733 if (spdk_unlikely(mlx5_task->num_submitted_reqs == mlx5_task->num_reqs && 734 mlx5_task->num_blocks > mlx5_task->num_processed_blocks)) { 735 /* We hit "out of sge 736 * entries" case with highly fragmented payload. In that case 737 * accel_mlx5_configure_crypto_umr function handled fewer data blocks than expected 738 * That means we need at least 1 more request to complete this task, this request will be 739 * executed once all submitted ones are completed */ 740 SPDK_DEBUGLOG(accel_mlx5, "task %p, processed %u/%u blocks, add extra req\n", mlx5_task, 741 mlx5_task->num_processed_blocks, mlx5_task->num_blocks); 742 mlx5_task->num_reqs++; 743 } 744 745 SPDK_DEBUGLOG(accel_mlx5, "end, task, %p, reqs: total %u, submitted %u, completed %u\n", mlx5_task, 746 mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 747 748 return 0; 749 } 750 751 static inline int 752 accel_mlx5_crypto_task_continue(struct accel_mlx5_task *task) 753 { 754 struct accel_mlx5_qp *qp = task->qp; 755 struct accel_mlx5_dev *dev = qp->dev; 756 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 757 758 assert(task->num_reqs > task->num_completed_reqs); 759 if (task->num_ops == 0) { 760 /* No mkeys allocated, try to allocate now */ 761 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task, dev->crypto_mkeys))) { 762 /* Pool is empty, queue this task */ 763 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 764 dev->stats.nomem_mkey++; 765 return -ENOMEM; 766 } 767 } 768 /* We need to post at least 1 UMR and 1 RDMA operation */ 769 if (spdk_unlikely(qp_slot < 2)) { 770 /* QP is full, queue this task */ 771 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 772 task->qp->dev->stats.nomem_qdepth++; 773 return -ENOMEM; 774 } 775 776 return accel_mlx5_crypto_task_process(task); 777 } 778 779 static inline int 780 accel_mlx5_crypto_task_init(struct accel_mlx5_task *mlx5_task) 781 { 782 struct spdk_accel_task *task = &mlx5_task->base; 783 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 784 uint64_t src_nbytes = task->nbytes; 785 #ifdef DEBUG 786 uint64_t dst_nbytes; 787 uint32_t i; 788 #endif 789 bool crypto_key_ok; 790 791 crypto_key_ok = (task->crypto_key && task->crypto_key->module_if == &g_accel_mlx5.module && 792 task->crypto_key->priv); 793 if (spdk_unlikely((task->nbytes % mlx5_task->base.block_size != 0) || !crypto_key_ok)) { 794 if (crypto_key_ok) { 795 SPDK_ERRLOG("src length %"PRIu64" is not a multiple of the block size %u\n", task->nbytes, 796 mlx5_task->base.block_size); 797 } else { 798 SPDK_ERRLOG("Wrong crypto key provided\n"); 799 } 800 return -EINVAL; 801 } 802 803 assert(src_nbytes / mlx5_task->base.block_size <= UINT16_MAX); 804 mlx5_task->num_blocks = src_nbytes / mlx5_task->base.block_size; 805 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 806 if (task->d.iovcnt == 0 || (task->d.iovcnt == task->s.iovcnt && 807 accel_mlx5_compare_iovs(task->d.iovs, task->s.iovs, task->s.iovcnt))) { 808 mlx5_task->inplace = 1; 809 } else { 810 #ifdef DEBUG 811 dst_nbytes = 0; 812 for (i = 0; i < task->d.iovcnt; i++) { 813 dst_nbytes += task->d.iovs[i].iov_len; 814 } 815 816 if (spdk_unlikely(src_nbytes != dst_nbytes)) { 817 return -EINVAL; 818 } 819 #endif 820 mlx5_task->inplace = 0; 821 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 822 } 823 824 if (dev->crypto_multi_block) { 825 if (dev->crypto_split_blocks) { 826 assert(SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks) <= UINT16_MAX); 827 mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks); 828 /* Last req may consume less blocks */ 829 mlx5_task->blocks_per_req = spdk_min(mlx5_task->num_blocks, dev->crypto_split_blocks); 830 } else { 831 if (task->s.iovcnt > ACCEL_MLX5_MAX_SGE || task->d.iovcnt > ACCEL_MLX5_MAX_SGE) { 832 uint32_t max_sge_count = spdk_max(task->s.iovcnt, task->d.iovcnt); 833 834 assert(SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE) <= UINT16_MAX); 835 mlx5_task->num_reqs = SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE); 836 mlx5_task->blocks_per_req = SPDK_CEIL_DIV(mlx5_task->num_blocks, mlx5_task->num_reqs); 837 } else { 838 mlx5_task->num_reqs = 1; 839 mlx5_task->blocks_per_req = mlx5_task->num_blocks; 840 } 841 } 842 } else { 843 mlx5_task->num_reqs = mlx5_task->num_blocks; 844 mlx5_task->blocks_per_req = 1; 845 } 846 847 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(mlx5_task, dev->crypto_mkeys))) { 848 /* Pool is empty, queue this task */ 849 SPDK_DEBUGLOG(accel_mlx5, "no reqs in pool, dev %s\n", dev->dev_ctx->context->device->name); 850 dev->stats.nomem_mkey++; 851 return -ENOMEM; 852 } 853 if (spdk_unlikely(accel_mlx5_dev_get_available_slots(dev, &dev->qp) < 2)) { 854 /* Queue is full, queue this task */ 855 SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", dev->dev_ctx->context->device->name, 856 mlx5_task->qp); 857 dev->stats.nomem_qdepth++; 858 return -ENOMEM; 859 } 860 861 SPDK_DEBUGLOG(accel_mlx5, "task %p, src_iovs %u, dst_iovs %u, num_reqs %u, " 862 "blocks/req %u, blocks %u, inplace %d\n", task, task->s.iovcnt, task->d.iovcnt, 863 mlx5_task->num_reqs, mlx5_task->blocks_per_req, mlx5_task->num_blocks, mlx5_task->inplace); 864 865 return 0; 866 } 867 868 static inline void 869 accel_mlx5_copy_task_complete(struct accel_mlx5_task *mlx5_task) 870 { 871 spdk_accel_task_complete(&mlx5_task->base, 0); 872 } 873 874 static inline int 875 accel_mlx5_copy_task_process_one(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_qp *qp, 876 uint64_t wrid, uint32_t fence) 877 { 878 struct spdk_accel_task *task = &mlx5_task->base; 879 struct accel_mlx5_sge sge; 880 uint32_t remaining = 0; 881 uint32_t dst_len; 882 int rc; 883 884 /* Limit one RDMA_WRITE by length of dst buffer. Not all src buffers may fit into one dst buffer due to 885 * limitation on ACCEL_MLX5_MAX_SGE. If this is the case then remaining is not zero */ 886 assert(mlx5_task->dst.iov->iov_len > mlx5_task->dst.iov_offset); 887 dst_len = mlx5_task->dst.iov->iov_len - mlx5_task->dst.iov_offset; 888 rc = accel_mlx5_fill_block_sge(qp->dev, sge.src_sge, &mlx5_task->src, dst_len, &remaining, 889 task->src_domain, task->src_domain_ctx); 890 if (spdk_unlikely(rc <= 0)) { 891 if (rc == 0) { 892 rc = -EINVAL; 893 } 894 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 895 return rc; 896 } 897 sge.src_sge_count = rc; 898 assert(dst_len > remaining); 899 dst_len -= remaining; 900 901 rc = accel_mlx5_fill_block_sge(qp->dev, sge.dst_sge, &mlx5_task->dst, dst_len, &remaining, 902 task->dst_domain, task->dst_domain_ctx); 903 if (spdk_unlikely(rc != 1)) { 904 /* We use single dst entry, any result other than 1 is an error */ 905 if (rc == 0) { 906 rc = -EINVAL; 907 } 908 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 909 return rc; 910 } 911 if (spdk_unlikely(remaining)) { 912 SPDK_ERRLOG("Incorrect dst length, remaining %u\n", remaining); 913 assert(0); 914 return -EINVAL; 915 } 916 917 rc = spdk_mlx5_qp_rdma_write(mlx5_task->qp->qp, sge.src_sge, sge.src_sge_count, 918 sge.dst_sge[0].addr, sge.dst_sge[0].lkey, wrid, fence); 919 if (spdk_unlikely(rc)) { 920 SPDK_ERRLOG("new RDMA WRITE failed with %d\n", rc); 921 return rc; 922 } 923 qp->dev->stats.rdma_writes++; 924 925 return 0; 926 } 927 928 static inline int 929 accel_mlx5_copy_task_process(struct accel_mlx5_task *mlx5_task) 930 { 931 932 struct accel_mlx5_qp *qp = mlx5_task->qp; 933 struct accel_mlx5_dev *dev = qp->dev; 934 uint16_t i; 935 int rc; 936 937 mlx5_task->num_wrs = 0; 938 assert(mlx5_task->num_reqs > 0); 939 assert(mlx5_task->num_ops > 0); 940 941 /* Handle n-1 reqs in order to simplify wrid and fence handling */ 942 for (i = 0; i < mlx5_task->num_ops - 1; i++) { 943 rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, 0, 0); 944 if (spdk_unlikely(rc)) { 945 return rc; 946 } 947 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 948 mlx5_task->num_submitted_reqs++; 949 } 950 951 rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, (uint64_t)mlx5_task, 952 SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 953 if (spdk_unlikely(rc)) { 954 return rc; 955 } 956 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 957 mlx5_task->num_submitted_reqs++; 958 STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link); 959 960 SPDK_DEBUGLOG(accel_mlx5, "end, copy task, %p\n", mlx5_task); 961 962 return 0; 963 } 964 965 static inline int 966 accel_mlx5_copy_task_continue(struct accel_mlx5_task *task) 967 { 968 struct accel_mlx5_qp *qp = task->qp; 969 struct accel_mlx5_dev *dev = qp->dev; 970 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 971 972 task->num_ops = spdk_min(qp_slot, task->num_reqs - task->num_completed_reqs); 973 if (spdk_unlikely(task->num_ops == 0)) { 974 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 975 dev->stats.nomem_qdepth++; 976 return -ENOMEM; 977 } 978 return accel_mlx5_copy_task_process(task); 979 } 980 981 static inline uint32_t 982 accel_mlx5_get_copy_task_count(struct iovec *src_iov, uint32_t src_iovcnt, 983 struct iovec *dst_iov, uint32_t dst_iovcnt) 984 { 985 uint32_t src = 0; 986 uint32_t dst = 0; 987 uint64_t src_offset = 0; 988 uint64_t dst_offset = 0; 989 uint32_t num_ops = 0; 990 uint32_t src_sge_count = 0; 991 992 while (src < src_iovcnt && dst < dst_iovcnt) { 993 uint64_t src_len = src_iov[src].iov_len - src_offset; 994 uint64_t dst_len = dst_iov[dst].iov_len - dst_offset; 995 996 if (dst_len < src_len) { 997 dst_offset = 0; 998 src_offset += dst_len; 999 dst++; 1000 num_ops++; 1001 src_sge_count = 0; 1002 } else if (src_len < dst_len) { 1003 dst_offset += src_len; 1004 src_offset = 0; 1005 src++; 1006 if (++src_sge_count >= ACCEL_MLX5_MAX_SGE) { 1007 num_ops++; 1008 src_sge_count = 0; 1009 } 1010 } else { 1011 dst_offset = 0; 1012 src_offset = 0; 1013 dst++; 1014 src++; 1015 num_ops++; 1016 src_sge_count = 0; 1017 } 1018 } 1019 1020 assert(src == src_iovcnt); 1021 assert(dst == dst_iovcnt); 1022 assert(src_offset == 0); 1023 assert(dst_offset == 0); 1024 return num_ops; 1025 } 1026 1027 static inline int 1028 accel_mlx5_copy_task_init(struct accel_mlx5_task *mlx5_task) 1029 { 1030 struct spdk_accel_task *task = &mlx5_task->base; 1031 struct accel_mlx5_qp *qp = mlx5_task->qp; 1032 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp); 1033 1034 if (spdk_likely(task->s.iovcnt <= ACCEL_MLX5_MAX_SGE)) { 1035 mlx5_task->num_reqs = task->d.iovcnt; 1036 } else if (task->d.iovcnt == 1) { 1037 mlx5_task->num_reqs = SPDK_CEIL_DIV(task->s.iovcnt, ACCEL_MLX5_MAX_SGE); 1038 } else { 1039 mlx5_task->num_reqs = accel_mlx5_get_copy_task_count(task->s.iovs, task->s.iovcnt, 1040 task->d.iovs, task->d.iovcnt); 1041 } 1042 mlx5_task->inplace = 0; 1043 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 1044 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 1045 mlx5_task->num_ops = spdk_min(qp_slot, mlx5_task->num_reqs); 1046 if (spdk_unlikely(!mlx5_task->num_ops)) { 1047 qp->dev->stats.nomem_qdepth++; 1048 return -ENOMEM; 1049 } 1050 SPDK_DEBUGLOG(accel_mlx5, "copy task num_reqs %u, num_ops %u\n", mlx5_task->num_reqs, 1051 mlx5_task->num_ops); 1052 1053 return 0; 1054 } 1055 1056 static inline uint32_t 1057 accel_mlx5_advance_iovec(struct iovec *iov, uint32_t iovcnt, size_t *iov_offset, size_t *len) 1058 { 1059 uint32_t i; 1060 size_t iov_len; 1061 1062 for (i = 0; *len != 0 && i < iovcnt; i++) { 1063 iov_len = iov[i].iov_len - *iov_offset; 1064 1065 if (iov_len < *len) { 1066 *iov_offset = 0; 1067 *len -= iov_len; 1068 continue; 1069 } 1070 if (iov_len == *len) { 1071 *iov_offset = 0; 1072 i++; 1073 } else { /* iov_len > *len */ 1074 *iov_offset += *len; 1075 } 1076 *len = 0; 1077 break; 1078 } 1079 1080 return i; 1081 } 1082 1083 static inline void 1084 accel_mlx5_crc_task_complete(struct accel_mlx5_task *mlx5_task) 1085 { 1086 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 1087 1088 *mlx5_task->base.crc_dst = mlx5_task->psv->crc ^ UINT32_MAX; 1089 /* Normal task completion without allocated mkeys is not possible */ 1090 assert(mlx5_task->num_ops); 1091 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, mlx5_task->mkeys, mlx5_task->num_ops); 1092 spdk_mempool_put(dev->dev_ctx->psv_pool, mlx5_task->psv); 1093 spdk_accel_task_complete(&mlx5_task->base, 0); 1094 } 1095 1096 static inline int 1097 accel_mlx5_crc_task_configure_umr(struct accel_mlx5_task *mlx5_task, struct ibv_sge *sge, 1098 uint32_t sge_count, struct spdk_mlx5_mkey_pool_obj *mkey, 1099 enum spdk_mlx5_umr_sig_domain sig_domain, uint32_t umr_len, 1100 bool sig_init, bool sig_check_gen) 1101 { 1102 struct spdk_mlx5_umr_sig_attr sattr = { 1103 .seed = mlx5_task->base.seed ^ UINT32_MAX, 1104 .psv_index = mlx5_task->psv->psv_index, 1105 .domain = sig_domain, 1106 .sigerr_count = mkey->sig.sigerr_count, 1107 .raw_data_size = umr_len, 1108 .init = sig_init, 1109 .check_gen = sig_check_gen, 1110 }; 1111 struct spdk_mlx5_umr_attr umr_attr = { 1112 .mkey = mkey->mkey, 1113 .umr_len = umr_len, 1114 .sge_count = sge_count, 1115 .sge = sge, 1116 }; 1117 1118 return spdk_mlx5_umr_configure_sig(mlx5_task->qp->qp, &umr_attr, &sattr, 0, 0); 1119 } 1120 1121 static inline int 1122 accel_mlx5_crc_task_fill_sge(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge) 1123 { 1124 struct spdk_accel_task *task = &mlx5_task->base; 1125 struct accel_mlx5_qp *qp = mlx5_task->qp; 1126 struct accel_mlx5_dev *dev = qp->dev; 1127 uint32_t remaining; 1128 int rc; 1129 1130 rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src, task->nbytes, &remaining, 1131 task->src_domain, task->src_domain_ctx); 1132 if (spdk_unlikely(rc <= 0)) { 1133 if (rc == 0) { 1134 rc = -EINVAL; 1135 } 1136 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 1137 return rc; 1138 } 1139 assert(remaining == 0); 1140 sge->src_sge_count = rc; 1141 1142 if (!mlx5_task->inplace) { 1143 rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, task->nbytes, &remaining, 1144 task->dst_domain, task->dst_domain_ctx); 1145 if (spdk_unlikely(rc <= 0)) { 1146 if (rc == 0) { 1147 rc = -EINVAL; 1148 } 1149 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 1150 return rc; 1151 } 1152 assert(remaining == 0); 1153 sge->dst_sge_count = rc; 1154 } 1155 1156 return 0; 1157 } 1158 1159 static inline int 1160 accel_mlx5_crc_task_process_one_req(struct accel_mlx5_task *mlx5_task) 1161 { 1162 struct accel_mlx5_sge sges; 1163 struct accel_mlx5_qp *qp = mlx5_task->qp; 1164 struct accel_mlx5_dev *dev = qp->dev; 1165 uint32_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs, 1166 mlx5_task->num_ops); 1167 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1168 uint32_t rdma_fence = SPDK_MLX5_WQE_CTRL_STRONG_ORDERING; 1169 struct ibv_sge *sge; 1170 int rc; 1171 uint16_t sge_count; 1172 1173 num_ops = spdk_min(num_ops, qp_slot >> 1); 1174 if (spdk_unlikely(!num_ops)) { 1175 return -EINVAL; 1176 } 1177 1178 mlx5_task->num_wrs = 0; 1179 /* At this moment we have as many requests as can be submitted to a qp */ 1180 rc = accel_mlx5_crc_task_fill_sge(mlx5_task, &sges); 1181 if (spdk_unlikely(rc)) { 1182 return rc; 1183 } 1184 rc = accel_mlx5_crc_task_configure_umr(mlx5_task, sges.src_sge, sges.src_sge_count, 1185 mlx5_task->mkeys[0], SPDK_MLX5_UMR_SIG_DOMAIN_WIRE, mlx5_task->base.nbytes, true, true); 1186 if (spdk_unlikely(rc)) { 1187 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 1188 return rc; 1189 } 1190 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1191 dev->stats.sig_umrs++; 1192 1193 if (mlx5_task->inplace) { 1194 sge = sges.src_sge; 1195 sge_count = sges.src_sge_count; 1196 } else { 1197 sge = sges.dst_sge; 1198 sge_count = sges.dst_sge_count; 1199 } 1200 1201 /* 1202 * Add the crc destination to the end of sges. A free entry must be available for CRC 1203 * because the task init function reserved it. 1204 */ 1205 assert(sge_count < ACCEL_MLX5_MAX_SGE); 1206 sge[sge_count].lkey = mlx5_task->psv->crc_lkey; 1207 sge[sge_count].addr = (uintptr_t)&mlx5_task->psv->crc; 1208 sge[sge_count++].length = sizeof(uint32_t); 1209 1210 if (spdk_unlikely(mlx5_task->psv->bits.error)) { 1211 rc = spdk_mlx5_qp_set_psv(qp->qp, mlx5_task->psv->psv_index, *mlx5_task->base.crc_dst, 0, 0); 1212 if (spdk_unlikely(rc)) { 1213 SPDK_ERRLOG("SET_PSV failed with %d\n", rc); 1214 return rc; 1215 } 1216 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1217 } 1218 1219 rc = spdk_mlx5_qp_rdma_read(qp->qp, sge, sge_count, 0, mlx5_task->mkeys[0]->mkey, 1220 (uint64_t)mlx5_task, rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 1221 if (spdk_unlikely(rc)) { 1222 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 1223 return rc; 1224 } 1225 mlx5_task->num_submitted_reqs++; 1226 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 1227 dev->stats.rdma_reads++; 1228 1229 return 0; 1230 } 1231 1232 static inline int 1233 accel_mlx5_crc_task_fill_umr_sge(struct accel_mlx5_qp *qp, struct ibv_sge *sge, 1234 struct accel_mlx5_iov_sgl *umr_iovs, struct spdk_memory_domain *domain, 1235 void *domain_ctx, struct accel_mlx5_iov_sgl *rdma_iovs, size_t *len) 1236 { 1237 int umr_idx = 0; 1238 int rdma_idx = 0; 1239 int umr_iovcnt = spdk_min(umr_iovs->iovcnt, (int)ACCEL_MLX5_MAX_SGE); 1240 int rdma_iovcnt = spdk_min(rdma_iovs->iovcnt, (int)ACCEL_MLX5_MAX_SGE); 1241 size_t umr_iov_offset; 1242 size_t rdma_iov_offset; 1243 size_t umr_len = 0; 1244 void *sge_addr; 1245 size_t sge_len; 1246 size_t umr_sge_len; 1247 size_t rdma_sge_len; 1248 int rc; 1249 1250 umr_iov_offset = umr_iovs->iov_offset; 1251 rdma_iov_offset = rdma_iovs->iov_offset; 1252 1253 while (umr_idx < umr_iovcnt && rdma_idx < rdma_iovcnt) { 1254 umr_sge_len = umr_iovs->iov[umr_idx].iov_len - umr_iov_offset; 1255 rdma_sge_len = rdma_iovs->iov[rdma_idx].iov_len - rdma_iov_offset; 1256 sge_addr = umr_iovs->iov[umr_idx].iov_base + umr_iov_offset; 1257 1258 if (umr_sge_len == rdma_sge_len) { 1259 rdma_idx++; 1260 umr_iov_offset = 0; 1261 rdma_iov_offset = 0; 1262 sge_len = umr_sge_len; 1263 } else if (umr_sge_len < rdma_sge_len) { 1264 umr_iov_offset = 0; 1265 rdma_iov_offset += umr_sge_len; 1266 sge_len = umr_sge_len; 1267 } else { 1268 size_t remaining; 1269 1270 remaining = umr_sge_len - rdma_sge_len; 1271 while (remaining) { 1272 rdma_idx++; 1273 if (rdma_idx == (int)ACCEL_MLX5_MAX_SGE) { 1274 break; 1275 } 1276 rdma_sge_len = rdma_iovs->iov[rdma_idx].iov_len; 1277 if (remaining == rdma_sge_len) { 1278 rdma_idx++; 1279 rdma_iov_offset = 0; 1280 umr_iov_offset = 0; 1281 remaining = 0; 1282 break; 1283 } 1284 if (remaining < rdma_sge_len) { 1285 rdma_iov_offset = remaining; 1286 umr_iov_offset = 0; 1287 remaining = 0; 1288 break; 1289 } 1290 remaining -= rdma_sge_len; 1291 } 1292 sge_len = umr_sge_len - remaining; 1293 } 1294 rc = accel_mlx5_translate_addr(sge_addr, sge_len, domain, domain_ctx, qp->dev, &sge[umr_idx]); 1295 if (spdk_unlikely(rc)) { 1296 return -EINVAL; 1297 } 1298 SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d] lkey %u, addr %p, len %u\n", umr_idx, sge[umr_idx].lkey, 1299 (void *)sge[umr_idx].addr, sge[umr_idx].length); 1300 umr_len += sge_len; 1301 umr_idx++; 1302 } 1303 accel_mlx5_iov_sgl_advance(umr_iovs, umr_len); 1304 accel_mlx5_iov_sgl_advance(rdma_iovs, umr_len); 1305 *len = umr_len; 1306 1307 return umr_idx; 1308 } 1309 1310 static inline int 1311 accel_mlx5_crc_task_process_multi_req(struct accel_mlx5_task *mlx5_task) 1312 { 1313 size_t umr_len[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 1314 struct ibv_sge sges[ACCEL_MLX5_MAX_SGE]; 1315 struct spdk_accel_task *task = &mlx5_task->base; 1316 struct accel_mlx5_qp *qp = mlx5_task->qp; 1317 struct accel_mlx5_dev *dev = qp->dev; 1318 struct accel_mlx5_iov_sgl umr_sgl; 1319 struct accel_mlx5_iov_sgl *umr_sgl_ptr; 1320 struct accel_mlx5_iov_sgl rdma_sgl; 1321 uint64_t umr_offset; 1322 uint32_t rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE; 1323 int sge_count; 1324 uint32_t remaining; 1325 int rc; 1326 uint16_t i; 1327 uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs, 1328 mlx5_task->num_ops); 1329 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1330 bool sig_init, sig_check_gen = false; 1331 1332 num_ops = spdk_min(num_ops, qp_slot >> 1); 1333 if (spdk_unlikely(!num_ops)) { 1334 return -EINVAL; 1335 } 1336 /* Init signature on the first UMR */ 1337 sig_init = !mlx5_task->num_submitted_reqs; 1338 1339 /* 1340 * accel_mlx5_crc_task_fill_umr_sge() and accel_mlx5_fill_block_sge() advance an IOV during iteration 1341 * on it. We must copy accel_mlx5_iov_sgl to iterate twice or more on the same IOV. 1342 * 1343 * In the in-place case, we iterate on the source IOV three times. That's why we need two copies of 1344 * the source accel_mlx5_iov_sgl. 1345 * 1346 * In the out-of-place case, we iterate on the source IOV once and on the destination IOV two times. 1347 * So, we need one copy of the destination accel_mlx5_iov_sgl. 1348 */ 1349 if (mlx5_task->inplace) { 1350 accel_mlx5_iov_sgl_init(&umr_sgl, mlx5_task->src.iov, mlx5_task->src.iovcnt); 1351 umr_sgl_ptr = &umr_sgl; 1352 accel_mlx5_iov_sgl_init(&rdma_sgl, mlx5_task->src.iov, mlx5_task->src.iovcnt); 1353 } else { 1354 umr_sgl_ptr = &mlx5_task->src; 1355 accel_mlx5_iov_sgl_init(&rdma_sgl, mlx5_task->dst.iov, mlx5_task->dst.iovcnt); 1356 } 1357 mlx5_task->num_wrs = 0; 1358 for (i = 0; i < num_ops; i++) { 1359 /* 1360 * The last request may have only CRC. Skip UMR in this case because the MKey from 1361 * the previous request is used. 1362 */ 1363 if (umr_sgl_ptr->iovcnt == 0) { 1364 assert((mlx5_task->num_completed_reqs + i + 1) == mlx5_task->num_reqs); 1365 break; 1366 } 1367 sge_count = accel_mlx5_crc_task_fill_umr_sge(qp, sges, umr_sgl_ptr, task->src_domain, 1368 task->src_domain_ctx, &rdma_sgl, &umr_len[i]); 1369 if (spdk_unlikely(sge_count <= 0)) { 1370 rc = (sge_count == 0) ? -EINVAL : sge_count; 1371 SPDK_ERRLOG("failed set UMR sge, rc %d\n", rc); 1372 return rc; 1373 } 1374 if (umr_sgl_ptr->iovcnt == 0) { 1375 /* 1376 * We post RDMA without UMR if the last request has only CRC. We use an MKey from 1377 * the last UMR in this case. Since the last request can be postponed to the next 1378 * call of this function, we must save the MKey to the task structure. 1379 */ 1380 mlx5_task->last_umr_len = umr_len[i]; 1381 mlx5_task->last_mkey_idx = i; 1382 sig_check_gen = true; 1383 } 1384 rc = accel_mlx5_crc_task_configure_umr(mlx5_task, sges, sge_count, mlx5_task->mkeys[i], 1385 SPDK_MLX5_UMR_SIG_DOMAIN_WIRE, umr_len[i], sig_init, 1386 sig_check_gen); 1387 if (spdk_unlikely(rc)) { 1388 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 1389 return rc; 1390 } 1391 sig_init = false; 1392 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1393 dev->stats.sig_umrs++; 1394 } 1395 1396 if (spdk_unlikely(mlx5_task->psv->bits.error)) { 1397 rc = spdk_mlx5_qp_set_psv(qp->qp, mlx5_task->psv->psv_index, *mlx5_task->base.crc_dst, 0, 0); 1398 if (spdk_unlikely(rc)) { 1399 SPDK_ERRLOG("SET_PSV failed with %d\n", rc); 1400 return rc; 1401 } 1402 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1403 } 1404 1405 for (i = 0; i < num_ops - 1; i++) { 1406 if (mlx5_task->inplace) { 1407 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->src, umr_len[i], &remaining, 1408 task->src_domain, task->src_domain_ctx); 1409 } else { 1410 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->dst, umr_len[i], &remaining, 1411 task->dst_domain, task->dst_domain_ctx); 1412 } 1413 if (spdk_unlikely(sge_count <= 0)) { 1414 rc = (sge_count == 0) ? -EINVAL : sge_count; 1415 SPDK_ERRLOG("failed set RDMA sge, rc %d\n", rc); 1416 return rc; 1417 } 1418 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges, sge_count, 0, mlx5_task->mkeys[i]->mkey, 1419 0, rdma_fence); 1420 if (spdk_unlikely(rc)) { 1421 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 1422 return rc; 1423 } 1424 mlx5_task->num_submitted_reqs++; 1425 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1426 dev->stats.rdma_reads++; 1427 rdma_fence = SPDK_MLX5_WQE_CTRL_STRONG_ORDERING; 1428 } 1429 if ((mlx5_task->inplace && mlx5_task->src.iovcnt == 0) || (!mlx5_task->inplace && 1430 mlx5_task->dst.iovcnt == 0)) { 1431 /* 1432 * The last RDMA does not have any data, only CRC. It also does not have a paired Mkey. 1433 * The CRC is handled in the previous MKey in this case. 1434 */ 1435 sge_count = 0; 1436 umr_offset = mlx5_task->last_umr_len; 1437 } else { 1438 umr_offset = 0; 1439 mlx5_task->last_mkey_idx = i; 1440 if (mlx5_task->inplace) { 1441 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->src, umr_len[i], &remaining, 1442 task->src_domain, task->src_domain_ctx); 1443 } else { 1444 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->dst, umr_len[i], &remaining, 1445 task->dst_domain, task->dst_domain_ctx); 1446 } 1447 if (spdk_unlikely(sge_count <= 0)) { 1448 rc = (sge_count == 0) ? -EINVAL : sge_count; 1449 SPDK_ERRLOG("failed set RDMA sge, rc %d\n", rc); 1450 return rc; 1451 } 1452 assert(remaining == 0); 1453 } 1454 if ((mlx5_task->num_completed_reqs + i + 1) == mlx5_task->num_reqs) { 1455 /* Ensure that there is a free sge for the CRC destination. */ 1456 assert(sge_count < (int)ACCEL_MLX5_MAX_SGE); 1457 /* Add the crc destination to the end of sges. */ 1458 sges[sge_count].lkey = mlx5_task->psv->crc_lkey; 1459 sges[sge_count].addr = (uintptr_t)&mlx5_task->psv->crc; 1460 sges[sge_count++].length = sizeof(uint32_t); 1461 } 1462 rdma_fence |= SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE; 1463 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges, sge_count, umr_offset, 1464 mlx5_task->mkeys[mlx5_task->last_mkey_idx]->mkey, 1465 (uint64_t)mlx5_task, rdma_fence); 1466 if (spdk_unlikely(rc)) { 1467 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 1468 return rc; 1469 } 1470 mlx5_task->num_submitted_reqs++; 1471 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 1472 dev->stats.rdma_reads++; 1473 1474 return 0; 1475 } 1476 1477 static inline int 1478 accel_mlx5_crc_task_process(struct accel_mlx5_task *mlx5_task) 1479 { 1480 int rc; 1481 1482 assert(mlx5_task->mlx5_opcode == ACCEL_MLX5_OPC_CRC32C); 1483 1484 SPDK_DEBUGLOG(accel_mlx5, "begin, crc task, %p, reqs: total %u, submitted %u, completed %u\n", 1485 mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 1486 1487 if (mlx5_task->num_reqs == 1) { 1488 rc = accel_mlx5_crc_task_process_one_req(mlx5_task); 1489 } else { 1490 rc = accel_mlx5_crc_task_process_multi_req(mlx5_task); 1491 } 1492 1493 if (rc == 0) { 1494 STAILQ_INSERT_TAIL(&mlx5_task->qp->in_hw, mlx5_task, link); 1495 SPDK_DEBUGLOG(accel_mlx5, "end, crc task, %p, reqs: total %u, submitted %u, completed %u\n", 1496 mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, 1497 mlx5_task->num_completed_reqs); 1498 } 1499 1500 return rc; 1501 } 1502 1503 static inline int 1504 accel_mlx5_task_alloc_crc_ctx(struct accel_mlx5_task *task, uint32_t qp_slot) 1505 { 1506 struct accel_mlx5_qp *qp = task->qp; 1507 struct accel_mlx5_dev *dev = qp->dev; 1508 1509 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task, dev->sig_mkeys))) { 1510 SPDK_DEBUGLOG(accel_mlx5, "no mkeys in signature mkey pool, dev %s\n", 1511 dev->dev_ctx->context->device->name); 1512 dev->stats.nomem_mkey++; 1513 return -ENOMEM; 1514 } 1515 task->psv = spdk_mempool_get(dev->dev_ctx->psv_pool); 1516 if (spdk_unlikely(!task->psv)) { 1517 SPDK_DEBUGLOG(accel_mlx5, "no reqs in psv pool, dev %s\n", dev->dev_ctx->context->device->name); 1518 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops); 1519 task->num_ops = 0; 1520 dev->stats.nomem_mkey++; 1521 return -ENOMEM; 1522 } 1523 /* One extra slot is needed for SET_PSV WQE to reset the error state in PSV. */ 1524 if (spdk_unlikely(task->psv->bits.error)) { 1525 uint32_t n_slots = task->num_ops * 2 + 1; 1526 1527 if (qp_slot < n_slots) { 1528 spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv); 1529 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops); 1530 dev->stats.nomem_qdepth++; 1531 task->num_ops = 0; 1532 return -ENOMEM; 1533 } 1534 } 1535 1536 return 0; 1537 } 1538 1539 static inline int 1540 accel_mlx5_crc_task_continue(struct accel_mlx5_task *task) 1541 { 1542 struct accel_mlx5_qp *qp = task->qp; 1543 struct accel_mlx5_dev *dev = qp->dev; 1544 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1545 int rc; 1546 1547 assert(task->num_reqs > task->num_completed_reqs); 1548 if (task->num_ops == 0) { 1549 /* No mkeys allocated, try to allocate now. */ 1550 rc = accel_mlx5_task_alloc_crc_ctx(task, qp_slot); 1551 if (spdk_unlikely(rc)) { 1552 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1553 return -ENOMEM; 1554 } 1555 } 1556 /* We need to post at least 1 UMR and 1 RDMA operation */ 1557 if (spdk_unlikely(qp_slot < 2)) { 1558 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1559 dev->stats.nomem_qdepth++; 1560 return -ENOMEM; 1561 } 1562 1563 return accel_mlx5_crc_task_process(task); 1564 } 1565 1566 static inline uint32_t 1567 accel_mlx5_get_crc_task_count(struct iovec *src_iov, uint32_t src_iovcnt, struct iovec *dst_iov, 1568 uint32_t dst_iovcnt) 1569 { 1570 uint32_t src_idx = 0; 1571 uint32_t dst_idx = 0; 1572 uint32_t num_ops = 1; 1573 uint32_t num_src_sge = 1; 1574 uint32_t num_dst_sge = 1; 1575 size_t src_offset = 0; 1576 size_t dst_offset = 0; 1577 uint32_t num_sge; 1578 size_t src_len; 1579 size_t dst_len; 1580 1581 /* One operation is enough if both iovs fit into ACCEL_MLX5_MAX_SGE. One SGE is reserved for CRC on dst_iov. */ 1582 if (src_iovcnt <= ACCEL_MLX5_MAX_SGE && (dst_iovcnt + 1) <= ACCEL_MLX5_MAX_SGE) { 1583 return 1; 1584 } 1585 1586 while (src_idx < src_iovcnt && dst_idx < dst_iovcnt) { 1587 if (num_src_sge > ACCEL_MLX5_MAX_SGE || num_dst_sge > ACCEL_MLX5_MAX_SGE) { 1588 num_ops++; 1589 num_src_sge = 1; 1590 num_dst_sge = 1; 1591 } 1592 src_len = src_iov[src_idx].iov_len - src_offset; 1593 dst_len = dst_iov[dst_idx].iov_len - dst_offset; 1594 1595 if (src_len == dst_len) { 1596 num_src_sge++; 1597 num_dst_sge++; 1598 src_offset = 0; 1599 dst_offset = 0; 1600 src_idx++; 1601 dst_idx++; 1602 continue; 1603 } 1604 if (src_len < dst_len) { 1605 /* Advance src_iov to reach the point that corresponds to the end of the current dst_iov. */ 1606 num_sge = accel_mlx5_advance_iovec(&src_iov[src_idx], 1607 spdk_min(ACCEL_MLX5_MAX_SGE + 1 - num_src_sge, 1608 src_iovcnt - src_idx), 1609 &src_offset, &dst_len); 1610 src_idx += num_sge; 1611 num_src_sge += num_sge; 1612 if (dst_len != 0) { 1613 /* 1614 * ACCEL_MLX5_MAX_SGE is reached on src_iov, and dst_len bytes 1615 * are left on the current dst_iov. 1616 */ 1617 dst_offset = dst_iov[dst_idx].iov_len - dst_len; 1618 } else { 1619 /* The src_iov advance is completed, shift to the next dst_iov. */ 1620 dst_idx++; 1621 num_dst_sge++; 1622 dst_offset = 0; 1623 } 1624 } else { /* src_len > dst_len */ 1625 /* Advance dst_iov to reach the point that corresponds to the end of the current src_iov. */ 1626 num_sge = accel_mlx5_advance_iovec(&dst_iov[dst_idx], 1627 spdk_min(ACCEL_MLX5_MAX_SGE + 1 - num_dst_sge, 1628 dst_iovcnt - dst_idx), 1629 &dst_offset, &src_len); 1630 dst_idx += num_sge; 1631 num_dst_sge += num_sge; 1632 if (src_len != 0) { 1633 /* 1634 * ACCEL_MLX5_MAX_SGE is reached on dst_iov, and src_len bytes 1635 * are left on the current src_iov. 1636 */ 1637 src_offset = src_iov[src_idx].iov_len - src_len; 1638 } else { 1639 /* The dst_iov advance is completed, shift to the next src_iov. */ 1640 src_idx++; 1641 num_src_sge++; 1642 src_offset = 0; 1643 } 1644 } 1645 } 1646 /* An extra operation is needed if no space is left on dst_iov because CRC takes one SGE. */ 1647 if (num_dst_sge > ACCEL_MLX5_MAX_SGE) { 1648 num_ops++; 1649 } 1650 1651 /* The above loop must reach the end of both iovs simultaneously because their size is the same. */ 1652 assert(src_idx == src_iovcnt); 1653 assert(dst_idx == dst_iovcnt); 1654 assert(src_offset == 0); 1655 assert(dst_offset == 0); 1656 1657 return num_ops; 1658 } 1659 1660 static inline int 1661 accel_mlx5_crc_task_init(struct accel_mlx5_task *mlx5_task) 1662 { 1663 struct spdk_accel_task *task = &mlx5_task->base; 1664 struct accel_mlx5_qp *qp = mlx5_task->qp; 1665 uint32_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp); 1666 int rc; 1667 1668 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 1669 if (mlx5_task->inplace) { 1670 /* One entry is reserved for CRC */ 1671 mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->src.iovcnt + 1, ACCEL_MLX5_MAX_SGE); 1672 } else { 1673 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 1674 mlx5_task->num_reqs = accel_mlx5_get_crc_task_count(mlx5_task->src.iov, mlx5_task->src.iovcnt, 1675 mlx5_task->dst.iov, mlx5_task->dst.iovcnt); 1676 } 1677 1678 rc = accel_mlx5_task_alloc_crc_ctx(mlx5_task, qp_slot); 1679 if (spdk_unlikely(rc)) { 1680 return rc; 1681 } 1682 1683 if (spdk_unlikely(qp_slot < 2)) { 1684 /* Queue is full, queue this task */ 1685 SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", qp->dev->dev_ctx->context->device->name, 1686 mlx5_task->qp); 1687 qp->dev->stats.nomem_qdepth++; 1688 return -ENOMEM; 1689 } 1690 return 0; 1691 } 1692 1693 static inline int 1694 accel_mlx5_crypto_mkey_task_init(struct accel_mlx5_task *mlx5_task) 1695 { 1696 struct spdk_accel_task *task = &mlx5_task->base; 1697 struct accel_mlx5_qp *qp = mlx5_task->qp; 1698 struct accel_mlx5_dev *dev = qp->dev; 1699 uint32_t num_blocks; 1700 int rc; 1701 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1702 bool crypto_key_ok; 1703 1704 if (spdk_unlikely(task->s.iovcnt > ACCEL_MLX5_MAX_SGE)) { 1705 /* With `external mkey` we can't split task or register several UMRs */ 1706 SPDK_ERRLOG("src buffer is too fragmented\n"); 1707 return -EINVAL; 1708 } 1709 if (spdk_unlikely(task->src_domain == spdk_accel_get_memory_domain())) { 1710 SPDK_ERRLOG("accel domain is not supported\n"); 1711 return -ENOTSUP; 1712 } 1713 if (spdk_unlikely(spdk_accel_sequence_next_task(task) != NULL)) { 1714 SPDK_ERRLOG("Mkey registration is only supported for single task\n"); 1715 return -ENOTSUP; 1716 } 1717 1718 crypto_key_ok = (task->crypto_key && task->crypto_key->module_if == &g_accel_mlx5.module && 1719 task->crypto_key->priv); 1720 if (spdk_unlikely(!crypto_key_ok)) { 1721 SPDK_ERRLOG("Wrong crypto key provided\n"); 1722 return -EINVAL; 1723 } 1724 if (spdk_unlikely(task->nbytes % mlx5_task->base.block_size != 0)) { 1725 SPDK_ERRLOG("src length %"PRIu64" is not a multiple of the block size %u\n", task->nbytes, 1726 mlx5_task->base.block_size); 1727 return -EINVAL; 1728 } 1729 1730 num_blocks = task->nbytes / mlx5_task->base.block_size; 1731 if (dev->crypto_multi_block) { 1732 if (spdk_unlikely(g_accel_mlx5.attr.crypto_split_blocks && 1733 num_blocks > g_accel_mlx5.attr.crypto_split_blocks)) { 1734 SPDK_ERRLOG("Number of blocks in task %u exceeds split threshold %u, can't handle\n", 1735 num_blocks, g_accel_mlx5.attr.crypto_split_blocks); 1736 return -E2BIG; 1737 } 1738 } else if (num_blocks != 1) { 1739 SPDK_ERRLOG("Task contains more than 1 block, can't handle\n"); 1740 return -E2BIG; 1741 } 1742 1743 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 1744 mlx5_task->num_blocks = num_blocks; 1745 mlx5_task->num_processed_blocks = 0; 1746 mlx5_task->num_reqs = 1; 1747 mlx5_task->blocks_per_req = num_blocks; 1748 1749 if (spdk_unlikely(qp_slot == 0)) { 1750 mlx5_task->num_ops = 0; 1751 dev->stats.nomem_qdepth++; 1752 return -ENOMEM; 1753 } 1754 rc = spdk_mlx5_mkey_pool_get_bulk(dev->crypto_mkeys, mlx5_task->mkeys, 1); 1755 if (spdk_unlikely(rc)) { 1756 mlx5_task->num_ops = 0; 1757 dev->stats.nomem_mkey++; 1758 return -ENOMEM; 1759 } 1760 mlx5_task->num_ops = 1; 1761 1762 SPDK_DEBUGLOG(accel_mlx5, "crypto_mkey task num_blocks %u, src_len %zu\n", mlx5_task->num_reqs, 1763 task->nbytes); 1764 1765 return 0; 1766 } 1767 1768 static inline int 1769 accel_mlx5_crypto_mkey_task_process(struct accel_mlx5_task *mlx5_task) 1770 { 1771 struct accel_mlx5_sge sge; 1772 struct spdk_accel_task *task = &mlx5_task->base; 1773 struct accel_mlx5_qp *qp = mlx5_task->qp; 1774 struct accel_mlx5_dev *dev = qp->dev; 1775 struct spdk_mlx5_crypto_dek_data dek_data; 1776 int rc; 1777 1778 if (spdk_unlikely(!mlx5_task->num_ops)) { 1779 return -EINVAL; 1780 } 1781 SPDK_DEBUGLOG(accel_mlx5, "begin, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx); 1782 1783 mlx5_task->num_wrs = 0; 1784 rc = spdk_mlx5_crypto_get_dek_data(task->crypto_key->priv, dev->dev_ctx->pd, &dek_data); 1785 if (spdk_unlikely(rc)) { 1786 return rc; 1787 } 1788 1789 rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sge, mlx5_task->mkeys[0]->mkey, 1790 mlx5_task->num_blocks, &dek_data, (uint64_t)mlx5_task, SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 1791 if (spdk_unlikely(rc)) { 1792 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 1793 return rc; 1794 } 1795 dev->stats.crypto_umrs++; 1796 mlx5_task->num_submitted_reqs++; 1797 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 1798 STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link); 1799 1800 SPDK_DEBUGLOG(accel_mlx5, "end, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx); 1801 1802 return 0; 1803 } 1804 1805 static inline int 1806 accel_mlx5_crypto_mkey_task_continue(struct accel_mlx5_task *task) 1807 { 1808 struct accel_mlx5_qp *qp = task->qp; 1809 struct accel_mlx5_dev *dev = qp->dev; 1810 int rc; 1811 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1812 1813 if (task->num_ops == 0) { 1814 rc = spdk_mlx5_mkey_pool_get_bulk(dev->crypto_mkeys, task->mkeys, 1); 1815 if (spdk_unlikely(rc)) { 1816 dev->stats.nomem_mkey++; 1817 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1818 return -ENOMEM; 1819 } 1820 task->num_ops = 1; 1821 } 1822 if (spdk_unlikely(qp_slot == 0)) { 1823 dev->stats.nomem_qdepth++; 1824 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1825 return -ENOMEM; 1826 } 1827 return accel_mlx5_crypto_mkey_task_process(task); 1828 } 1829 1830 static inline void 1831 accel_mlx5_crypto_mkey_task_complete(struct accel_mlx5_task *mlx5_task) 1832 { 1833 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 1834 1835 assert(mlx5_task->num_ops); 1836 assert(mlx5_task->num_processed_blocks == mlx5_task->num_blocks); 1837 assert(mlx5_task->base.seq); 1838 1839 spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, mlx5_task->mkeys, 1); 1840 spdk_accel_task_complete(&mlx5_task->base, 0); 1841 } 1842 1843 static int 1844 accel_mlx5_task_op_not_implemented(struct accel_mlx5_task *mlx5_task) 1845 { 1846 SPDK_ERRLOG("wrong function called\n"); 1847 SPDK_UNREACHABLE(); 1848 } 1849 1850 static void 1851 accel_mlx5_task_op_not_implemented_v(struct accel_mlx5_task *mlx5_task) 1852 { 1853 SPDK_ERRLOG("wrong function called\n"); 1854 SPDK_UNREACHABLE(); 1855 } 1856 1857 static int 1858 accel_mlx5_task_op_not_supported(struct accel_mlx5_task *mlx5_task) 1859 { 1860 SPDK_ERRLOG("Unsupported opcode %d\n", mlx5_task->base.op_code); 1861 1862 return -ENOTSUP; 1863 } 1864 1865 static struct accel_mlx5_task_operations g_accel_mlx5_tasks_ops[] = { 1866 [ACCEL_MLX5_OPC_COPY] = { 1867 .init = accel_mlx5_copy_task_init, 1868 .process = accel_mlx5_copy_task_process, 1869 .cont = accel_mlx5_copy_task_continue, 1870 .complete = accel_mlx5_copy_task_complete, 1871 }, 1872 [ACCEL_MLX5_OPC_CRYPTO] = { 1873 .init = accel_mlx5_crypto_task_init, 1874 .process = accel_mlx5_crypto_task_process, 1875 .cont = accel_mlx5_crypto_task_continue, 1876 .complete = accel_mlx5_crypto_task_complete, 1877 }, 1878 [ACCEL_MLX5_OPC_CRC32C] = { 1879 .init = accel_mlx5_crc_task_init, 1880 .process = accel_mlx5_crc_task_process, 1881 .cont = accel_mlx5_crc_task_continue, 1882 .complete = accel_mlx5_crc_task_complete, 1883 }, 1884 [ACCEL_MLX5_OPC_CRYPTO_MKEY] = { 1885 .init = accel_mlx5_crypto_mkey_task_init, 1886 .process = accel_mlx5_crypto_mkey_task_process, 1887 .cont = accel_mlx5_crypto_mkey_task_continue, 1888 .complete = accel_mlx5_crypto_mkey_task_complete, 1889 }, 1890 [ACCEL_MLX5_OPC_LAST] = { 1891 .init = accel_mlx5_task_op_not_supported, 1892 .process = accel_mlx5_task_op_not_implemented, 1893 .cont = accel_mlx5_task_op_not_implemented, 1894 .complete = accel_mlx5_task_op_not_implemented_v 1895 }, 1896 }; 1897 1898 static void 1899 accel_mlx5_memory_domain_transfer_cpl(void *ctx, int rc) 1900 { 1901 struct accel_mlx5_task *task = ctx; 1902 1903 assert(task->needs_data_transfer); 1904 task->needs_data_transfer = 0; 1905 1906 if (spdk_likely(!rc)) { 1907 SPDK_DEBUGLOG(accel_mlx5, "task %p, data transfer done\n", task); 1908 accel_mlx5_task_complete(task); 1909 } else { 1910 SPDK_ERRLOG("Task %p, data transfer failed, rc %d\n", task, rc); 1911 accel_mlx5_task_fail(task, rc); 1912 } 1913 } 1914 1915 static inline void 1916 accel_mlx5_memory_domain_transfer(struct accel_mlx5_task *task) 1917 { 1918 struct spdk_memory_domain_translation_result translation; 1919 struct spdk_accel_task *base = &task->base; 1920 struct accel_mlx5_dev *dev = task->qp->dev; 1921 int rc; 1922 1923 assert(task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO_MKEY); 1924 /* UMR is an offset in the addess space, so the start address is 0 */ 1925 translation.iov.iov_base = NULL; 1926 translation.iov.iov_len = base->nbytes; 1927 translation.iov_count = 1; 1928 translation.size = sizeof(translation); 1929 translation.rdma.rkey = task->mkeys[0]->mkey; 1930 translation.rdma.lkey = task->mkeys[0]->mkey; 1931 1932 SPDK_DEBUGLOG(accel_mlx5, "start transfer, task %p, dst_domain_ctx %p, mkey %u\n", task, 1933 task->base.dst_domain_ctx, task->mkeys[0]->mkey); 1934 rc = spdk_memory_domain_transfer_data(base->dst_domain, base->dst_domain_ctx, &translation.iov, 1, 1935 dev->dev_ctx->domain, task, &translation.iov, 1, &translation, 1936 accel_mlx5_memory_domain_transfer_cpl, task); 1937 if (spdk_unlikely(rc)) { 1938 SPDK_ERRLOG("Failed to start data transfer, task %p rc %d\n", task, rc); 1939 accel_mlx5_task_fail(task, rc); 1940 } 1941 } 1942 1943 static inline void 1944 accel_mlx5_task_complete(struct accel_mlx5_task *task) 1945 { 1946 struct spdk_accel_sequence *seq = task->base.seq; 1947 struct spdk_accel_task *next; 1948 bool driver_seq; 1949 1950 if (task->needs_data_transfer) { 1951 accel_mlx5_memory_domain_transfer(task); 1952 return; 1953 } 1954 1955 next = spdk_accel_sequence_next_task(&task->base); 1956 driver_seq = task->driver_seq; 1957 1958 assert(task->num_reqs == task->num_completed_reqs); 1959 SPDK_DEBUGLOG(accel_mlx5, "Complete task %p, opc %d\n", task, task->mlx5_opcode); 1960 1961 g_accel_mlx5_tasks_ops[task->mlx5_opcode].complete(task); 1962 1963 if (driver_seq) { 1964 struct spdk_io_channel *ch = task->qp->dev->ch; 1965 1966 assert(seq); 1967 if (next) { 1968 accel_mlx5_execute_sequence(ch, seq); 1969 } else { 1970 spdk_accel_sequence_continue(seq); 1971 } 1972 } 1973 } 1974 1975 static inline int 1976 accel_mlx5_task_continue(struct accel_mlx5_task *task) 1977 { 1978 struct accel_mlx5_qp *qp = task->qp; 1979 struct accel_mlx5_dev *dev = qp->dev; 1980 1981 if (spdk_unlikely(qp->recovering)) { 1982 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1983 return 0; 1984 } 1985 1986 return g_accel_mlx5_tasks_ops[task->mlx5_opcode].cont(task); 1987 } 1988 static inline void 1989 accel_mlx5_task_init_opcode(struct accel_mlx5_task *mlx5_task) 1990 { 1991 uint8_t base_opcode = mlx5_task->base.op_code; 1992 1993 switch (base_opcode) { 1994 case SPDK_ACCEL_OPC_COPY: 1995 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_COPY; 1996 break; 1997 case SPDK_ACCEL_OPC_ENCRYPT: 1998 assert(g_accel_mlx5.crypto_supported); 1999 mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE; 2000 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO; 2001 break; 2002 case SPDK_ACCEL_OPC_DECRYPT: 2003 assert(g_accel_mlx5.crypto_supported); 2004 mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_MEMORY; 2005 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO; 2006 break; 2007 case SPDK_ACCEL_OPC_CRC32C: 2008 mlx5_task->inplace = 1; 2009 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRC32C; 2010 break; 2011 case SPDK_ACCEL_OPC_COPY_CRC32C: 2012 mlx5_task->inplace = 0; 2013 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRC32C; 2014 break; 2015 default: 2016 SPDK_ERRLOG("wrong opcode %d\n", base_opcode); 2017 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_LAST; 2018 } 2019 } 2020 2021 static void 2022 accel_mlx5_post_poller_handler(void *fn_arg) 2023 { 2024 struct accel_mlx5_io_channel *ch = fn_arg; 2025 struct accel_mlx5_dev *dev; 2026 uint32_t i; 2027 2028 for (i = 0; i < ch->num_devs; i++) { 2029 dev = &ch->devs[i]; 2030 2031 if (dev->qp.ring_db) { 2032 spdk_mlx5_qp_complete_send(dev->qp.qp); 2033 dev->qp.ring_db = false; 2034 } 2035 } 2036 2037 ch->poller_handler_registered = false; 2038 } 2039 2040 static inline int 2041 _accel_mlx5_submit_tasks(struct accel_mlx5_io_channel *accel_ch, struct spdk_accel_task *task) 2042 { 2043 struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base); 2044 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 2045 int rc; 2046 2047 /* We should not receive any tasks if the module was not enabled */ 2048 assert(g_accel_mlx5.enabled); 2049 2050 dev->stats.opcodes[mlx5_task->mlx5_opcode]++; 2051 rc = g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].init(mlx5_task); 2052 if (spdk_unlikely(rc)) { 2053 if (rc == -ENOMEM) { 2054 SPDK_DEBUGLOG(accel_mlx5, "no reqs to handle new task %p (required %u), put to queue\n", mlx5_task, 2055 mlx5_task->num_reqs); 2056 STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link); 2057 return 0; 2058 } 2059 SPDK_ERRLOG("Task opc %d init failed, rc %d\n", task->op_code, rc); 2060 return rc; 2061 } 2062 2063 if (spdk_unlikely(mlx5_task->qp->recovering)) { 2064 STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link); 2065 return 0; 2066 } 2067 2068 if (!accel_ch->poller_handler_registered) { 2069 spdk_thread_register_post_poller_handler(accel_mlx5_post_poller_handler, accel_ch); 2070 /* Function above may fail to register our handler, in that case we ring doorbells on next polling 2071 * cycle. That is less efficient but still works */ 2072 accel_ch->poller_handler_registered = true; 2073 } 2074 2075 return g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].process(mlx5_task); 2076 } 2077 2078 static inline void 2079 accel_mlx5_task_assign_qp(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_io_channel *accel_ch) 2080 { 2081 struct accel_mlx5_dev *dev; 2082 2083 dev = &accel_ch->devs[accel_ch->dev_idx]; 2084 accel_ch->dev_idx++; 2085 if (accel_ch->dev_idx == accel_ch->num_devs) { 2086 accel_ch->dev_idx = 0; 2087 } 2088 2089 mlx5_task->qp = &dev->qp; 2090 } 2091 2092 static inline void 2093 accel_mlx5_task_reset(struct accel_mlx5_task *mlx5_task) 2094 { 2095 mlx5_task->num_completed_reqs = 0; 2096 mlx5_task->num_submitted_reqs = 0; 2097 mlx5_task->num_ops = 0; 2098 mlx5_task->num_processed_blocks = 0; 2099 mlx5_task->raw = 0; 2100 } 2101 2102 static int 2103 accel_mlx5_submit_tasks(struct spdk_io_channel *ch, struct spdk_accel_task *task) 2104 { 2105 struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base); 2106 struct accel_mlx5_io_channel *accel_ch = spdk_io_channel_get_ctx(ch); 2107 2108 accel_mlx5_task_assign_qp(mlx5_task, accel_ch); 2109 accel_mlx5_task_reset(mlx5_task); 2110 accel_mlx5_task_init_opcode(mlx5_task); 2111 2112 return _accel_mlx5_submit_tasks(accel_ch, task); 2113 } 2114 2115 static void accel_mlx5_recover_qp(struct accel_mlx5_qp *qp); 2116 2117 static int 2118 accel_mlx5_recover_qp_poller(void *arg) 2119 { 2120 struct accel_mlx5_qp *qp = arg; 2121 2122 spdk_poller_unregister(&qp->recover_poller); 2123 accel_mlx5_recover_qp(qp); 2124 return SPDK_POLLER_BUSY; 2125 } 2126 2127 static void 2128 accel_mlx5_recover_qp(struct accel_mlx5_qp *qp) 2129 { 2130 struct accel_mlx5_dev *dev = qp->dev; 2131 struct spdk_mlx5_qp_attr mlx5_qp_attr = {}; 2132 int rc; 2133 2134 SPDK_NOTICELOG("Recovering qp %p, core %u\n", qp, spdk_env_get_current_core()); 2135 if (qp->qp) { 2136 spdk_mlx5_qp_destroy(qp->qp); 2137 qp->qp = NULL; 2138 } 2139 2140 mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size; 2141 mlx5_qp_attr.cap.max_recv_wr = 0; 2142 mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE; 2143 mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE; 2144 2145 rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp); 2146 if (rc) { 2147 SPDK_ERRLOG("Failed to create mlx5 dma QP, rc %d. Retry in %d usec\n", 2148 rc, ACCEL_MLX5_RECOVER_POLLER_PERIOD_US); 2149 qp->recover_poller = SPDK_POLLER_REGISTER(accel_mlx5_recover_qp_poller, qp, 2150 ACCEL_MLX5_RECOVER_POLLER_PERIOD_US); 2151 return; 2152 } 2153 2154 qp->recovering = false; 2155 } 2156 2157 static inline void 2158 accel_mlx5_process_error_cpl(struct spdk_mlx5_cq_completion *wc, struct accel_mlx5_task *task) 2159 { 2160 struct accel_mlx5_qp *qp = task->qp; 2161 2162 if (wc->status != IBV_WC_WR_FLUSH_ERR) { 2163 SPDK_WARNLOG("RDMA: qp %p, task %p, WC status %d, core %u\n", 2164 qp, task, wc->status, spdk_env_get_current_core()); 2165 } else { 2166 SPDK_DEBUGLOG(accel_mlx5, 2167 "RDMA: qp %p, task %p, WC status %d, core %u\n", 2168 qp, task, wc->status, spdk_env_get_current_core()); 2169 } 2170 2171 qp->recovering = true; 2172 assert(task->num_completed_reqs <= task->num_submitted_reqs); 2173 if (task->num_completed_reqs == task->num_submitted_reqs) { 2174 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 2175 accel_mlx5_task_fail(task, -EIO); 2176 } 2177 } 2178 2179 static inline int64_t 2180 accel_mlx5_poll_cq(struct accel_mlx5_dev *dev) 2181 { 2182 struct spdk_mlx5_cq_completion wc[ACCEL_MLX5_MAX_WC]; 2183 struct accel_mlx5_task *task; 2184 struct accel_mlx5_qp *qp; 2185 int reaped, i, rc; 2186 uint16_t completed; 2187 2188 dev->stats.polls++; 2189 reaped = spdk_mlx5_cq_poll_completions(dev->cq, wc, ACCEL_MLX5_MAX_WC); 2190 if (spdk_unlikely(reaped < 0)) { 2191 SPDK_ERRLOG("Error polling CQ! (%d): %s\n", errno, spdk_strerror(errno)); 2192 return reaped; 2193 } else if (reaped == 0) { 2194 dev->stats.idle_polls++; 2195 return 0; 2196 } 2197 dev->stats.completions += reaped; 2198 2199 SPDK_DEBUGLOG(accel_mlx5, "Reaped %d cpls on dev %s\n", reaped, 2200 dev->dev_ctx->context->device->name); 2201 2202 for (i = 0; i < reaped; i++) { 2203 if (spdk_unlikely(!wc[i].wr_id)) { 2204 /* Unsignaled completion with error, ignore */ 2205 continue; 2206 } 2207 task = (struct accel_mlx5_task *)wc[i].wr_id; 2208 qp = task->qp; 2209 assert(task == STAILQ_FIRST(&qp->in_hw) && "submission mismatch"); 2210 assert(task->num_submitted_reqs > task->num_completed_reqs); 2211 completed = task->num_submitted_reqs - task->num_completed_reqs; 2212 assert((uint32_t)task->num_completed_reqs + completed <= UINT16_MAX); 2213 task->num_completed_reqs += completed; 2214 assert(qp->wrs_submitted >= task->num_wrs); 2215 qp->wrs_submitted -= task->num_wrs; 2216 assert(dev->wrs_in_cq > 0); 2217 dev->wrs_in_cq--; 2218 2219 if (spdk_unlikely(wc[i].status)) { 2220 accel_mlx5_process_error_cpl(&wc[i], task); 2221 if (qp->wrs_submitted == 0) { 2222 assert(STAILQ_EMPTY(&qp->in_hw)); 2223 accel_mlx5_recover_qp(qp); 2224 } 2225 continue; 2226 } 2227 2228 SPDK_DEBUGLOG(accel_mlx5, "task %p, remaining %u\n", task, 2229 task->num_reqs - task->num_completed_reqs); 2230 if (task->num_completed_reqs == task->num_reqs) { 2231 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 2232 accel_mlx5_task_complete(task); 2233 } else { 2234 assert(task->num_submitted_reqs < task->num_reqs); 2235 assert(task->num_completed_reqs == task->num_submitted_reqs); 2236 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 2237 rc = accel_mlx5_task_continue(task); 2238 if (spdk_unlikely(rc)) { 2239 if (rc != -ENOMEM) { 2240 accel_mlx5_task_fail(task, rc); 2241 } 2242 } 2243 } 2244 } 2245 2246 return reaped; 2247 } 2248 2249 static inline void 2250 accel_mlx5_resubmit_nomem_tasks(struct accel_mlx5_dev *dev) 2251 { 2252 struct accel_mlx5_task *task, *tmp, *last; 2253 int rc; 2254 2255 last = STAILQ_LAST(&dev->nomem, accel_mlx5_task, link); 2256 STAILQ_FOREACH_SAFE(task, &dev->nomem, link, tmp) { 2257 STAILQ_REMOVE_HEAD(&dev->nomem, link); 2258 rc = accel_mlx5_task_continue(task); 2259 if (spdk_unlikely(rc)) { 2260 if (rc != -ENOMEM) { 2261 accel_mlx5_task_fail(task, rc); 2262 } 2263 break; 2264 } 2265 /* If qpair is recovering, task is added back to the nomem list and 0 is returned. In that case we 2266 * need a special condition to iterate the list once and stop this FOREACH loop */ 2267 if (task == last) { 2268 break; 2269 } 2270 } 2271 } 2272 2273 static int 2274 accel_mlx5_poller(void *ctx) 2275 { 2276 struct accel_mlx5_io_channel *ch = ctx; 2277 struct accel_mlx5_dev *dev; 2278 2279 int64_t completions = 0, rc; 2280 uint32_t i; 2281 2282 /* reaped completions may register a post poller handler, that makes no sense in the scope of our own poller */ 2283 ch->poller_handler_registered = true; 2284 for (i = 0; i < ch->num_devs; i++) { 2285 dev = &ch->devs[i]; 2286 if (dev->wrs_in_cq) { 2287 rc = accel_mlx5_poll_cq(dev); 2288 if (spdk_unlikely(rc < 0)) { 2289 SPDK_ERRLOG("Error %"PRId64" on CQ, dev %s\n", rc, dev->dev_ctx->context->device->name); 2290 } 2291 completions += rc; 2292 if (dev->qp.ring_db) { 2293 spdk_mlx5_qp_complete_send(dev->qp.qp); 2294 dev->qp.ring_db = false; 2295 } 2296 } 2297 if (!STAILQ_EMPTY(&dev->nomem)) { 2298 accel_mlx5_resubmit_nomem_tasks(dev); 2299 } 2300 } 2301 ch->poller_handler_registered = false; 2302 2303 return !!completions; 2304 } 2305 2306 static bool 2307 accel_mlx5_supports_opcode(enum spdk_accel_opcode opc) 2308 { 2309 assert(g_accel_mlx5.enabled); 2310 2311 switch (opc) { 2312 case SPDK_ACCEL_OPC_COPY: 2313 return true; 2314 case SPDK_ACCEL_OPC_ENCRYPT: 2315 case SPDK_ACCEL_OPC_DECRYPT: 2316 return g_accel_mlx5.crypto_supported; 2317 case SPDK_ACCEL_OPC_CRC32C: 2318 case SPDK_ACCEL_OPC_COPY_CRC32C: 2319 return g_accel_mlx5.crc32c_supported; 2320 default: 2321 return false; 2322 } 2323 } 2324 2325 static struct spdk_io_channel * 2326 accel_mlx5_get_io_channel(void) 2327 { 2328 assert(g_accel_mlx5.enabled); 2329 return spdk_get_io_channel(&g_accel_mlx5); 2330 } 2331 2332 static int 2333 accel_mlx5_create_qp(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp) 2334 { 2335 struct spdk_mlx5_qp_attr mlx5_qp_attr = {}; 2336 int rc; 2337 2338 mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size; 2339 mlx5_qp_attr.cap.max_recv_wr = 0; 2340 mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE; 2341 mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE; 2342 2343 rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp); 2344 if (rc) { 2345 return rc; 2346 } 2347 2348 STAILQ_INIT(&qp->in_hw); 2349 qp->dev = dev; 2350 qp->verbs_qp = spdk_mlx5_qp_get_verbs_qp(qp->qp); 2351 assert(qp->verbs_qp); 2352 qp->wrs_max = g_accel_mlx5.attr.qp_size; 2353 2354 return 0; 2355 } 2356 2357 static void 2358 accel_mlx5_add_stats(struct accel_mlx5_stats *stats, const struct accel_mlx5_stats *to_add) 2359 { 2360 int i; 2361 2362 stats->crypto_umrs += to_add->crypto_umrs; 2363 stats->sig_umrs += to_add->sig_umrs; 2364 stats->rdma_reads += to_add->rdma_reads; 2365 stats->rdma_writes += to_add->rdma_writes; 2366 stats->polls += to_add->polls; 2367 stats->idle_polls += to_add->idle_polls; 2368 stats->completions += to_add->completions; 2369 stats->nomem_qdepth += to_add->nomem_qdepth; 2370 stats->nomem_mkey += to_add->nomem_mkey; 2371 for (i = 0; i < ACCEL_MLX5_OPC_LAST; i++) { 2372 stats->opcodes[i] += to_add->opcodes[i]; 2373 } 2374 } 2375 2376 static void 2377 accel_mlx5_destroy_cb(void *io_device, void *ctx_buf) 2378 { 2379 struct accel_mlx5_io_channel *ch = ctx_buf; 2380 struct accel_mlx5_dev *dev; 2381 uint32_t i; 2382 2383 spdk_poller_unregister(&ch->poller); 2384 for (i = 0; i < ch->num_devs; i++) { 2385 dev = &ch->devs[i]; 2386 spdk_mlx5_qp_destroy(dev->qp.qp); 2387 if (dev->cq) { 2388 spdk_mlx5_cq_destroy(dev->cq); 2389 } 2390 spdk_poller_unregister(&dev->qp.recover_poller); 2391 if (dev->crypto_mkeys) { 2392 spdk_mlx5_mkey_pool_put_ref(dev->crypto_mkeys); 2393 } 2394 if (dev->sig_mkeys) { 2395 spdk_mlx5_mkey_pool_put_ref(dev->sig_mkeys); 2396 } 2397 spdk_rdma_utils_free_mem_map(&dev->mmap); 2398 spdk_spin_lock(&g_accel_mlx5.lock); 2399 accel_mlx5_add_stats(&g_accel_mlx5.stats, &dev->stats); 2400 spdk_spin_unlock(&g_accel_mlx5.lock); 2401 } 2402 free(ch->devs); 2403 } 2404 2405 static int 2406 accel_mlx5_create_cb(void *io_device, void *ctx_buf) 2407 { 2408 struct spdk_mlx5_cq_attr cq_attr = {}; 2409 struct accel_mlx5_io_channel *ch = ctx_buf; 2410 struct accel_mlx5_dev_ctx *dev_ctx; 2411 struct accel_mlx5_dev *dev; 2412 uint32_t i; 2413 int rc; 2414 2415 ch->devs = calloc(g_accel_mlx5.num_ctxs, sizeof(*ch->devs)); 2416 if (!ch->devs) { 2417 SPDK_ERRLOG("Memory allocation failed\n"); 2418 return -ENOMEM; 2419 } 2420 2421 for (i = 0; i < g_accel_mlx5.num_ctxs; i++) { 2422 dev_ctx = &g_accel_mlx5.dev_ctxs[i]; 2423 dev = &ch->devs[i]; 2424 dev->dev_ctx = dev_ctx; 2425 2426 if (dev_ctx->crypto_mkeys) { 2427 dev->crypto_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO); 2428 if (!dev->crypto_mkeys) { 2429 SPDK_ERRLOG("Failed to get crypto mkey pool channel, dev %s\n", dev_ctx->context->device->name); 2430 /* Should not happen since mkey pool is created on accel_mlx5 initialization. 2431 * We should not be here if pool creation failed */ 2432 assert(0); 2433 goto err_out; 2434 } 2435 } 2436 if (dev_ctx->sig_mkeys) { 2437 dev->sig_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE); 2438 if (!dev->sig_mkeys) { 2439 SPDK_ERRLOG("Failed to get sig mkey pool channel, dev %s\n", dev_ctx->context->device->name); 2440 /* Should not happen since mkey pool is created on accel_mlx5 initialization. 2441 * We should not be here if pool creation failed */ 2442 assert(0); 2443 goto err_out; 2444 } 2445 } 2446 2447 memset(&cq_attr, 0, sizeof(cq_attr)); 2448 cq_attr.cqe_cnt = g_accel_mlx5.attr.qp_size; 2449 cq_attr.cqe_size = 64; 2450 cq_attr.cq_context = dev; 2451 2452 ch->num_devs++; 2453 rc = spdk_mlx5_cq_create(dev_ctx->pd, &cq_attr, &dev->cq); 2454 if (rc) { 2455 SPDK_ERRLOG("Failed to create mlx5 CQ, rc %d\n", rc); 2456 goto err_out; 2457 } 2458 2459 rc = accel_mlx5_create_qp(dev, &dev->qp); 2460 if (rc) { 2461 SPDK_ERRLOG("Failed to create mlx5 QP, rc %d\n", rc); 2462 goto err_out; 2463 } 2464 2465 dev->mmap = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL, 2466 IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE); 2467 if (!dev->mmap) { 2468 SPDK_ERRLOG("Failed to create memory map\n"); 2469 rc = -ENOMEM; 2470 goto err_out; 2471 } 2472 dev->crypto_multi_block = dev_ctx->crypto_multi_block; 2473 dev->crypto_split_blocks = dev_ctx->crypto_multi_block ? g_accel_mlx5.attr.crypto_split_blocks : 0; 2474 dev->wrs_in_cq_max = g_accel_mlx5.attr.qp_size; 2475 dev->ch = spdk_io_channel_from_ctx(ctx_buf); 2476 STAILQ_INIT(&dev->nomem); 2477 } 2478 2479 ch->poller = SPDK_POLLER_REGISTER(accel_mlx5_poller, ch, 0); 2480 2481 return 0; 2482 2483 err_out: 2484 accel_mlx5_destroy_cb(&g_accel_mlx5, ctx_buf); 2485 return rc; 2486 } 2487 2488 void 2489 accel_mlx5_get_default_attr(struct accel_mlx5_attr *attr) 2490 { 2491 assert(attr); 2492 2493 attr->qp_size = ACCEL_MLX5_QP_SIZE; 2494 attr->num_requests = ACCEL_MLX5_NUM_REQUESTS; 2495 attr->allowed_devs = NULL; 2496 attr->crypto_split_blocks = 0; 2497 attr->enable_driver = false; 2498 } 2499 2500 static void 2501 accel_mlx5_allowed_devs_free(void) 2502 { 2503 size_t i; 2504 2505 if (!g_accel_mlx5.allowed_devs) { 2506 return; 2507 } 2508 2509 for (i = 0; i < g_accel_mlx5.allowed_devs_count; i++) { 2510 free(g_accel_mlx5.allowed_devs[i]); 2511 } 2512 free(g_accel_mlx5.attr.allowed_devs); 2513 free(g_accel_mlx5.allowed_devs); 2514 g_accel_mlx5.attr.allowed_devs = NULL; 2515 g_accel_mlx5.allowed_devs = NULL; 2516 g_accel_mlx5.allowed_devs_count = 0; 2517 } 2518 2519 static int 2520 accel_mlx5_allowed_devs_parse(const char *allowed_devs) 2521 { 2522 char *str, *tmp, *tok, *sp = NULL; 2523 size_t devs_count = 0; 2524 2525 str = strdup(allowed_devs); 2526 if (!str) { 2527 return -ENOMEM; 2528 } 2529 2530 accel_mlx5_allowed_devs_free(); 2531 2532 tmp = str; 2533 while ((tmp = strchr(tmp, ',')) != NULL) { 2534 tmp++; 2535 devs_count++; 2536 } 2537 devs_count++; 2538 2539 g_accel_mlx5.allowed_devs = calloc(devs_count, sizeof(char *)); 2540 if (!g_accel_mlx5.allowed_devs) { 2541 free(str); 2542 return -ENOMEM; 2543 } 2544 2545 devs_count = 0; 2546 tok = strtok_r(str, ",", &sp); 2547 while (tok) { 2548 g_accel_mlx5.allowed_devs[devs_count] = strdup(tok); 2549 if (!g_accel_mlx5.allowed_devs[devs_count]) { 2550 free(str); 2551 accel_mlx5_allowed_devs_free(); 2552 return -ENOMEM; 2553 } 2554 tok = strtok_r(NULL, ",", &sp); 2555 devs_count++; 2556 g_accel_mlx5.allowed_devs_count++; 2557 } 2558 2559 free(str); 2560 2561 return 0; 2562 } 2563 2564 int 2565 accel_mlx5_enable(struct accel_mlx5_attr *attr) 2566 { 2567 int rc; 2568 2569 if (g_accel_mlx5.enabled) { 2570 return -EEXIST; 2571 } 2572 if (attr) { 2573 if (attr->num_requests / spdk_env_get_core_count() < ACCEL_MLX5_MAX_MKEYS_IN_TASK) { 2574 SPDK_ERRLOG("num requests per core must not be less than %u, current value %u\n", 2575 ACCEL_MLX5_MAX_MKEYS_IN_TASK, attr->num_requests / spdk_env_get_core_count()); 2576 return -EINVAL; 2577 } 2578 if (attr->qp_size < 8) { 2579 SPDK_ERRLOG("qp_size must be at least 8\n"); 2580 return -EINVAL; 2581 } 2582 g_accel_mlx5.attr = *attr; 2583 g_accel_mlx5.attr.allowed_devs = NULL; 2584 2585 if (attr->allowed_devs) { 2586 /* Contains a copy of user's string */ 2587 g_accel_mlx5.attr.allowed_devs = strndup(attr->allowed_devs, ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN); 2588 if (!g_accel_mlx5.attr.allowed_devs) { 2589 return -ENOMEM; 2590 } 2591 rc = accel_mlx5_allowed_devs_parse(g_accel_mlx5.attr.allowed_devs); 2592 if (rc) { 2593 return rc; 2594 } 2595 rc = spdk_mlx5_crypto_devs_allow((const char *const *)g_accel_mlx5.allowed_devs, 2596 g_accel_mlx5.allowed_devs_count); 2597 if (rc) { 2598 accel_mlx5_allowed_devs_free(); 2599 return rc; 2600 } 2601 } 2602 } else { 2603 accel_mlx5_get_default_attr(&g_accel_mlx5.attr); 2604 } 2605 2606 g_accel_mlx5.enabled = true; 2607 spdk_accel_module_list_add(&g_accel_mlx5.module); 2608 2609 return 0; 2610 } 2611 2612 static void 2613 accel_mlx5_psvs_release(struct accel_mlx5_dev_ctx *dev_ctx) 2614 { 2615 uint32_t i, num_psvs, num_psvs_in_pool; 2616 2617 if (!dev_ctx->psvs) { 2618 return; 2619 } 2620 2621 num_psvs = g_accel_mlx5.attr.num_requests; 2622 2623 for (i = 0; i < num_psvs; i++) { 2624 if (dev_ctx->psvs[i]) { 2625 spdk_mlx5_destroy_psv(dev_ctx->psvs[i]); 2626 dev_ctx->psvs[i] = NULL; 2627 } 2628 } 2629 free(dev_ctx->psvs); 2630 2631 if (!dev_ctx->psv_pool) { 2632 return; 2633 } 2634 num_psvs_in_pool = spdk_mempool_count(dev_ctx->psv_pool); 2635 if (num_psvs_in_pool != num_psvs) { 2636 SPDK_ERRLOG("Expected %u reqs in the pool, but got only %u\n", num_psvs, num_psvs_in_pool); 2637 } 2638 spdk_mempool_free(dev_ctx->psv_pool); 2639 } 2640 2641 static void 2642 accel_mlx5_free_resources(void) 2643 { 2644 struct accel_mlx5_dev_ctx *dev_ctx; 2645 uint32_t i; 2646 2647 for (i = 0; i < g_accel_mlx5.num_ctxs; i++) { 2648 dev_ctx = &g_accel_mlx5.dev_ctxs[i]; 2649 accel_mlx5_psvs_release(dev_ctx); 2650 if (dev_ctx->pd) { 2651 if (dev_ctx->crypto_mkeys) { 2652 spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO, dev_ctx->pd); 2653 } 2654 if (dev_ctx->sig_mkeys) { 2655 spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE, dev_ctx->pd); 2656 } 2657 spdk_rdma_utils_put_pd(dev_ctx->pd); 2658 } 2659 if (dev_ctx->domain) { 2660 spdk_rdma_utils_put_memory_domain(dev_ctx->domain); 2661 } 2662 } 2663 2664 free(g_accel_mlx5.dev_ctxs); 2665 g_accel_mlx5.dev_ctxs = NULL; 2666 g_accel_mlx5.initialized = false; 2667 } 2668 2669 static void 2670 accel_mlx5_deinit_cb(void *ctx) 2671 { 2672 accel_mlx5_free_resources(); 2673 spdk_spin_destroy(&g_accel_mlx5.lock); 2674 spdk_mlx5_umr_implementer_register(false); 2675 spdk_accel_module_finish(); 2676 } 2677 2678 static void 2679 accel_mlx5_deinit(void *ctx) 2680 { 2681 if (g_accel_mlx5.allowed_devs) { 2682 accel_mlx5_allowed_devs_free(); 2683 } 2684 spdk_mlx5_crypto_devs_allow(NULL, 0); 2685 if (g_accel_mlx5.initialized) { 2686 spdk_io_device_unregister(&g_accel_mlx5, accel_mlx5_deinit_cb); 2687 } else { 2688 spdk_accel_module_finish(); 2689 } 2690 } 2691 2692 static int 2693 accel_mlx5_mkeys_create(struct ibv_pd *pd, uint32_t num_mkeys, uint32_t flags) 2694 { 2695 struct spdk_mlx5_mkey_pool_param pool_param = {}; 2696 2697 pool_param.mkey_count = num_mkeys; 2698 pool_param.cache_per_thread = num_mkeys * 3 / 4 / spdk_env_get_core_count(); 2699 pool_param.flags = flags; 2700 2701 return spdk_mlx5_mkey_pool_init(&pool_param, pd); 2702 } 2703 2704 static void 2705 accel_mlx5_set_psv_in_pool(struct spdk_mempool *mp, void *cb_arg, void *_psv, unsigned obj_idx) 2706 { 2707 struct spdk_rdma_utils_memory_translation translation = {}; 2708 struct accel_mlx5_psv_pool_iter_cb_args *args = cb_arg; 2709 struct accel_mlx5_psv_wrapper *wrapper = _psv; 2710 struct accel_mlx5_dev_ctx *dev_ctx = args->dev; 2711 int rc; 2712 2713 if (args->rc) { 2714 return; 2715 } 2716 assert(obj_idx < g_accel_mlx5.attr.num_requests); 2717 assert(dev_ctx->psvs[obj_idx] != NULL); 2718 memset(wrapper, 0, sizeof(*wrapper)); 2719 wrapper->psv_index = dev_ctx->psvs[obj_idx]->index; 2720 2721 rc = spdk_rdma_utils_get_translation(args->map, &wrapper->crc, sizeof(uint32_t), &translation); 2722 if (rc) { 2723 SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", &wrapper->crc, sizeof(uint32_t)); 2724 args->rc = -EINVAL; 2725 } else { 2726 wrapper->crc_lkey = spdk_rdma_utils_memory_translation_get_lkey(&translation); 2727 } 2728 } 2729 2730 static int 2731 accel_mlx5_psvs_create(struct accel_mlx5_dev_ctx *dev_ctx) 2732 { 2733 struct accel_mlx5_psv_pool_iter_cb_args args = { 2734 .dev = dev_ctx 2735 }; 2736 char pool_name[32]; 2737 uint32_t i; 2738 uint32_t num_psvs = g_accel_mlx5.attr.num_requests; 2739 uint32_t cache_size; 2740 int rc; 2741 2742 dev_ctx->psvs = calloc(num_psvs, (sizeof(struct spdk_mlx5_psv *))); 2743 if (!dev_ctx->psvs) { 2744 SPDK_ERRLOG("Failed to alloc PSVs array\n"); 2745 return -ENOMEM; 2746 } 2747 for (i = 0; i < num_psvs; i++) { 2748 dev_ctx->psvs[i] = spdk_mlx5_create_psv(dev_ctx->pd); 2749 if (!dev_ctx->psvs[i]) { 2750 SPDK_ERRLOG("Failed to create PSV on dev %s\n", dev_ctx->context->device->name); 2751 return -EINVAL; 2752 } 2753 } 2754 2755 rc = snprintf(pool_name, sizeof(pool_name), "accel_psv_%s", dev_ctx->context->device->name); 2756 if (rc < 0) { 2757 assert(0); 2758 return -EINVAL; 2759 } 2760 cache_size = num_psvs * 3 / 4 / spdk_env_get_core_count(); 2761 args.map = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL, 2762 IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE); 2763 if (!args.map) { 2764 return -ENOMEM; 2765 } 2766 dev_ctx->psv_pool = spdk_mempool_create_ctor(pool_name, num_psvs, 2767 sizeof(struct accel_mlx5_psv_wrapper), 2768 cache_size, SPDK_ENV_SOCKET_ID_ANY, 2769 accel_mlx5_set_psv_in_pool, &args); 2770 spdk_rdma_utils_free_mem_map(&args.map); 2771 if (!dev_ctx->psv_pool) { 2772 SPDK_ERRLOG("Failed to create PSV memory pool\n"); 2773 return -ENOMEM; 2774 } 2775 if (args.rc) { 2776 SPDK_ERRLOG("Failed to init PSV memory pool objects, rc %d\n", args.rc); 2777 return args.rc; 2778 } 2779 2780 return 0; 2781 } 2782 2783 2784 static int 2785 accel_mlx5_dev_ctx_init(struct accel_mlx5_dev_ctx *dev_ctx, struct ibv_context *dev, 2786 struct spdk_mlx5_device_caps *caps) 2787 { 2788 struct ibv_pd *pd; 2789 int rc; 2790 2791 pd = spdk_rdma_utils_get_pd(dev); 2792 if (!pd) { 2793 SPDK_ERRLOG("Failed to get PD for context %p, dev %s\n", dev, dev->device->name); 2794 return -EINVAL; 2795 } 2796 dev_ctx->context = dev; 2797 dev_ctx->pd = pd; 2798 dev_ctx->domain = spdk_rdma_utils_get_memory_domain(pd); 2799 if (!dev_ctx->domain) { 2800 return -ENOMEM; 2801 } 2802 2803 if (g_accel_mlx5.crypto_supported) { 2804 dev_ctx->crypto_multi_block = caps->crypto.multi_block_be_tweak; 2805 if (!dev_ctx->crypto_multi_block && g_accel_mlx5.attr.crypto_split_blocks) { 2806 SPDK_WARNLOG("\"crypto_split_blocks\" is set but dev %s doesn't support multi block crypto\n", 2807 dev->device->name); 2808 } 2809 rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO); 2810 if (rc) { 2811 SPDK_ERRLOG("Failed to create crypto mkeys pool, rc %d, dev %s\n", rc, dev->device->name); 2812 return rc; 2813 } 2814 dev_ctx->crypto_mkeys = true; 2815 } 2816 if (g_accel_mlx5.crc32c_supported) { 2817 rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, 2818 SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE); 2819 if (rc) { 2820 SPDK_ERRLOG("Failed to create signature mkeys pool, rc %d, dev %s\n", rc, dev->device->name); 2821 return rc; 2822 } 2823 dev_ctx->sig_mkeys = true; 2824 rc = accel_mlx5_psvs_create(dev_ctx); 2825 if (rc) { 2826 SPDK_ERRLOG("Failed to create PSVs pool, rc %d, dev %s\n", rc, dev->device->name); 2827 return rc; 2828 } 2829 } 2830 2831 return 0; 2832 } 2833 2834 static struct ibv_context ** 2835 accel_mlx5_get_devices(int *_num_devs) 2836 { 2837 struct ibv_context **rdma_devs, **rdma_devs_out = NULL, *dev; 2838 struct ibv_device_attr dev_attr; 2839 size_t j; 2840 int num_devs = 0, i, rc; 2841 int num_devs_out = 0; 2842 bool dev_allowed; 2843 2844 rdma_devs = rdma_get_devices(&num_devs); 2845 if (!rdma_devs || !num_devs) { 2846 *_num_devs = 0; 2847 return NULL; 2848 } 2849 2850 rdma_devs_out = calloc(num_devs + 1, sizeof(struct ibv_context *)); 2851 if (!rdma_devs_out) { 2852 SPDK_ERRLOG("Memory allocation failed\n"); 2853 rdma_free_devices(rdma_devs); 2854 *_num_devs = 0; 2855 return NULL; 2856 } 2857 2858 for (i = 0; i < num_devs; i++) { 2859 dev = rdma_devs[i]; 2860 rc = ibv_query_device(dev, &dev_attr); 2861 if (rc) { 2862 SPDK_ERRLOG("Failed to query dev %s, skipping\n", dev->device->name); 2863 continue; 2864 } 2865 if (dev_attr.vendor_id != SPDK_MLX5_VENDOR_ID_MELLANOX) { 2866 SPDK_DEBUGLOG(accel_mlx5, "dev %s is not Mellanox device, skipping\n", dev->device->name); 2867 continue; 2868 } 2869 2870 if (g_accel_mlx5.allowed_devs_count) { 2871 dev_allowed = false; 2872 for (j = 0; j < g_accel_mlx5.allowed_devs_count; j++) { 2873 if (strcmp(g_accel_mlx5.allowed_devs[j], dev->device->name) == 0) { 2874 dev_allowed = true; 2875 break; 2876 } 2877 } 2878 if (!dev_allowed) { 2879 continue; 2880 } 2881 } 2882 2883 rdma_devs_out[num_devs_out] = dev; 2884 num_devs_out++; 2885 } 2886 2887 rdma_free_devices(rdma_devs); 2888 *_num_devs = num_devs_out; 2889 2890 return rdma_devs_out; 2891 } 2892 2893 static inline bool 2894 accel_mlx5_dev_supports_crypto(struct spdk_mlx5_device_caps *caps) 2895 { 2896 return caps->crypto_supported && !caps->crypto.wrapped_import_method_aes_xts && 2897 (caps->crypto.single_block_le_tweak || 2898 caps->crypto.multi_block_le_tweak || caps->crypto.multi_block_be_tweak); 2899 } 2900 2901 static int 2902 accel_mlx5_init(void) 2903 { 2904 struct spdk_mlx5_device_caps *caps; 2905 struct ibv_context **rdma_devs, *dev; 2906 int num_devs = 0, rc = 0, i; 2907 int best_dev = -1, first_dev = 0; 2908 int best_dev_stat = 0, dev_stat; 2909 bool supports_crypto; 2910 bool find_best_dev = g_accel_mlx5.allowed_devs_count == 0; 2911 2912 if (!g_accel_mlx5.enabled) { 2913 return -EINVAL; 2914 } 2915 2916 spdk_spin_init(&g_accel_mlx5.lock); 2917 rdma_devs = accel_mlx5_get_devices(&num_devs); 2918 if (!rdma_devs || !num_devs) { 2919 return -ENODEV; 2920 } 2921 caps = calloc(num_devs, sizeof(*caps)); 2922 if (!caps) { 2923 rc = -ENOMEM; 2924 goto cleanup; 2925 } 2926 2927 g_accel_mlx5.crypto_supported = true; 2928 g_accel_mlx5.crc32c_supported = true; 2929 g_accel_mlx5.num_ctxs = 0; 2930 2931 /* Iterate devices. We support an offload if all devices support it */ 2932 for (i = 0; i < num_devs; i++) { 2933 dev = rdma_devs[i]; 2934 2935 rc = spdk_mlx5_device_query_caps(dev, &caps[i]); 2936 if (rc) { 2937 SPDK_ERRLOG("Failed to get crypto caps, dev %s\n", dev->device->name); 2938 goto cleanup; 2939 } 2940 supports_crypto = accel_mlx5_dev_supports_crypto(&caps[i]); 2941 if (!supports_crypto) { 2942 SPDK_DEBUGLOG(accel_mlx5, "Disable crypto support because dev %s doesn't support it\n", 2943 rdma_devs[i]->device->name); 2944 g_accel_mlx5.crypto_supported = false; 2945 } 2946 if (!caps[i].crc32c_supported) { 2947 SPDK_DEBUGLOG(accel_mlx5, "Disable crc32c support because dev %s doesn't support it\n", 2948 rdma_devs[i]->device->name); 2949 g_accel_mlx5.crc32c_supported = false; 2950 } 2951 if (find_best_dev) { 2952 /* Find device which supports max number of offloads */ 2953 dev_stat = (int)supports_crypto + (int)caps[i].crc32c_supported; 2954 if (dev_stat > best_dev_stat) { 2955 best_dev_stat = dev_stat; 2956 best_dev = i; 2957 } 2958 } 2959 } 2960 2961 /* User didn't specify devices to use, try to select the best one */ 2962 if (find_best_dev) { 2963 if (best_dev == -1) { 2964 best_dev = 0; 2965 } 2966 g_accel_mlx5.crypto_supported = accel_mlx5_dev_supports_crypto(&caps[best_dev]); 2967 g_accel_mlx5.crc32c_supported = caps[best_dev].crc32c_supported; 2968 SPDK_NOTICELOG("Select dev %s, crypto %d, crc32c %d\n", rdma_devs[best_dev]->device->name, 2969 g_accel_mlx5.crypto_supported, g_accel_mlx5.crc32c_supported); 2970 first_dev = best_dev; 2971 num_devs = 1; 2972 if (g_accel_mlx5.crypto_supported) { 2973 const char *const dev_name[] = { rdma_devs[best_dev]->device->name }; 2974 /* Let mlx5 library know which device to use */ 2975 spdk_mlx5_crypto_devs_allow(dev_name, 1); 2976 } 2977 } else { 2978 SPDK_NOTICELOG("Found %d devices, crypto %d\n", num_devs, g_accel_mlx5.crypto_supported); 2979 } 2980 2981 g_accel_mlx5.dev_ctxs = calloc(num_devs, sizeof(*g_accel_mlx5.dev_ctxs)); 2982 if (!g_accel_mlx5.dev_ctxs) { 2983 SPDK_ERRLOG("Memory allocation failed\n"); 2984 rc = -ENOMEM; 2985 goto cleanup; 2986 } 2987 2988 for (i = first_dev; i < first_dev + num_devs; i++) { 2989 rc = accel_mlx5_dev_ctx_init(&g_accel_mlx5.dev_ctxs[g_accel_mlx5.num_ctxs++], 2990 rdma_devs[i], &caps[i]); 2991 if (rc) { 2992 goto cleanup; 2993 } 2994 } 2995 2996 SPDK_NOTICELOG("Accel framework mlx5 initialized, found %d devices.\n", num_devs); 2997 spdk_io_device_register(&g_accel_mlx5, accel_mlx5_create_cb, accel_mlx5_destroy_cb, 2998 sizeof(struct accel_mlx5_io_channel), "accel_mlx5"); 2999 g_accel_mlx5.initialized = true; 3000 free(rdma_devs); 3001 free(caps); 3002 3003 if (g_accel_mlx5.attr.enable_driver) { 3004 SPDK_NOTICELOG("Enabling mlx5 platform driver\n"); 3005 spdk_accel_driver_register(&g_accel_mlx5_driver); 3006 spdk_accel_set_driver(g_accel_mlx5_driver.name); 3007 spdk_mlx5_umr_implementer_register(true); 3008 } 3009 3010 return 0; 3011 3012 cleanup: 3013 free(rdma_devs); 3014 free(caps); 3015 accel_mlx5_free_resources(); 3016 spdk_spin_destroy(&g_accel_mlx5.lock); 3017 3018 return rc; 3019 } 3020 3021 static void 3022 accel_mlx5_write_config_json(struct spdk_json_write_ctx *w) 3023 { 3024 if (g_accel_mlx5.enabled) { 3025 spdk_json_write_object_begin(w); 3026 spdk_json_write_named_string(w, "method", "mlx5_scan_accel_module"); 3027 spdk_json_write_named_object_begin(w, "params"); 3028 spdk_json_write_named_uint16(w, "qp_size", g_accel_mlx5.attr.qp_size); 3029 spdk_json_write_named_uint32(w, "num_requests", g_accel_mlx5.attr.num_requests); 3030 if (g_accel_mlx5.attr.allowed_devs) { 3031 spdk_json_write_named_string(w, "allowed_devs", g_accel_mlx5.attr.allowed_devs); 3032 } 3033 spdk_json_write_named_uint16(w, "crypto_split_blocks", g_accel_mlx5.attr.crypto_split_blocks); 3034 spdk_json_write_named_bool(w, "enable_driver", g_accel_mlx5.attr.enable_driver); 3035 spdk_json_write_object_end(w); 3036 spdk_json_write_object_end(w); 3037 } 3038 } 3039 3040 static size_t 3041 accel_mlx5_get_ctx_size(void) 3042 { 3043 return sizeof(struct accel_mlx5_task); 3044 } 3045 3046 static int 3047 accel_mlx5_crypto_key_init(struct spdk_accel_crypto_key *key) 3048 { 3049 struct spdk_mlx5_crypto_dek_create_attr attr = {}; 3050 struct spdk_mlx5_crypto_keytag *keytag; 3051 int rc; 3052 3053 if (!key || !key->key || !key->key2 || !key->key_size || !key->key2_size) { 3054 return -EINVAL; 3055 } 3056 3057 attr.dek = calloc(1, key->key_size + key->key2_size); 3058 if (!attr.dek) { 3059 return -ENOMEM; 3060 } 3061 3062 memcpy(attr.dek, key->key, key->key_size); 3063 memcpy(attr.dek + key->key_size, key->key2, key->key2_size); 3064 attr.dek_len = key->key_size + key->key2_size; 3065 3066 rc = spdk_mlx5_crypto_keytag_create(&attr, &keytag); 3067 spdk_memset_s(attr.dek, attr.dek_len, 0, attr.dek_len); 3068 free(attr.dek); 3069 if (rc) { 3070 SPDK_ERRLOG("Failed to create a keytag, rc %d\n", rc); 3071 return rc; 3072 } 3073 3074 key->priv = keytag; 3075 3076 return 0; 3077 } 3078 3079 static void 3080 accel_mlx5_crypto_key_deinit(struct spdk_accel_crypto_key *key) 3081 { 3082 if (!key || key->module_if != &g_accel_mlx5.module || !key->priv) { 3083 return; 3084 } 3085 3086 spdk_mlx5_crypto_keytag_destroy(key->priv); 3087 } 3088 3089 static void 3090 accel_mlx5_dump_stats_json(struct spdk_json_write_ctx *w, const char *header, 3091 const struct accel_mlx5_stats *stats) 3092 { 3093 double idle_polls_percentage = 0; 3094 double cpls_per_poll = 0; 3095 uint64_t total_tasks = 0; 3096 int i; 3097 3098 if (stats->polls) { 3099 idle_polls_percentage = (double) stats->idle_polls * 100 / stats->polls; 3100 } 3101 if (stats->polls > stats->idle_polls) { 3102 cpls_per_poll = (double) stats->completions / (stats->polls - stats->idle_polls); 3103 } 3104 for (i = 0; i < ACCEL_MLX5_OPC_LAST; i++) { 3105 total_tasks += stats->opcodes[i]; 3106 } 3107 3108 spdk_json_write_named_object_begin(w, header); 3109 3110 spdk_json_write_named_object_begin(w, "umrs"); 3111 spdk_json_write_named_uint64(w, "crypto_umrs", stats->crypto_umrs); 3112 spdk_json_write_named_uint64(w, "sig_umrs", stats->sig_umrs); 3113 spdk_json_write_named_uint64(w, "total", stats->crypto_umrs + stats->sig_umrs); 3114 spdk_json_write_object_end(w); 3115 3116 spdk_json_write_named_object_begin(w, "rdma"); 3117 spdk_json_write_named_uint64(w, "read", stats->rdma_reads); 3118 spdk_json_write_named_uint64(w, "write", stats->rdma_writes); 3119 spdk_json_write_named_uint64(w, "total", stats->rdma_reads + stats->rdma_writes); 3120 spdk_json_write_object_end(w); 3121 3122 spdk_json_write_named_object_begin(w, "polling"); 3123 spdk_json_write_named_uint64(w, "polls", stats->polls); 3124 spdk_json_write_named_uint64(w, "idle_polls", stats->idle_polls); 3125 spdk_json_write_named_uint64(w, "completions", stats->completions); 3126 spdk_json_write_named_double(w, "idle_polls_percentage", idle_polls_percentage); 3127 spdk_json_write_named_double(w, "cpls_per_poll", cpls_per_poll); 3128 spdk_json_write_named_uint64(w, "nomem_qdepth", stats->nomem_qdepth); 3129 spdk_json_write_named_uint64(w, "nomem_mkey", stats->nomem_mkey); 3130 spdk_json_write_object_end(w); 3131 3132 spdk_json_write_named_object_begin(w, "tasks"); 3133 spdk_json_write_named_uint64(w, "copy", stats->opcodes[ACCEL_MLX5_OPC_COPY]); 3134 spdk_json_write_named_uint64(w, "crypto", stats->opcodes[ACCEL_MLX5_OPC_CRYPTO]); 3135 spdk_json_write_named_uint64(w, "crypto_mkey", stats->opcodes[ACCEL_MLX5_OPC_CRYPTO_MKEY]); 3136 spdk_json_write_named_uint64(w, "crc32c", stats->opcodes[ACCEL_MLX5_OPC_CRC32C]); 3137 spdk_json_write_named_uint64(w, "total", total_tasks); 3138 spdk_json_write_object_end(w); 3139 3140 spdk_json_write_object_end(w); 3141 } 3142 3143 static void 3144 accel_mlx5_dump_channel_stat(struct spdk_io_channel_iter *i) 3145 { 3146 struct accel_mlx5_stats ch_stat = {}; 3147 struct accel_mlx5_dump_stats_ctx *ctx; 3148 struct spdk_io_channel *_ch; 3149 struct accel_mlx5_io_channel *ch; 3150 struct accel_mlx5_dev *dev; 3151 uint32_t j; 3152 3153 ctx = spdk_io_channel_iter_get_ctx(i); 3154 _ch = spdk_io_channel_iter_get_channel(i); 3155 ch = spdk_io_channel_get_ctx(_ch); 3156 3157 if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 3158 spdk_json_write_object_begin(ctx->w); 3159 spdk_json_write_named_object_begin(ctx->w, spdk_thread_get_name(spdk_get_thread())); 3160 } 3161 if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) { 3162 spdk_json_write_named_array_begin(ctx->w, "devices"); 3163 } 3164 3165 for (j = 0; j < ch->num_devs; j++) { 3166 dev = &ch->devs[j]; 3167 /* Save grand total and channel stats */ 3168 accel_mlx5_add_stats(&ctx->total, &dev->stats); 3169 accel_mlx5_add_stats(&ch_stat, &dev->stats); 3170 if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) { 3171 spdk_json_write_object_begin(ctx->w); 3172 accel_mlx5_dump_stats_json(ctx->w, dev->dev_ctx->context->device->name, &dev->stats); 3173 spdk_json_write_object_end(ctx->w); 3174 } 3175 } 3176 3177 if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) { 3178 spdk_json_write_array_end(ctx->w); 3179 } 3180 if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 3181 accel_mlx5_dump_stats_json(ctx->w, "channel_total", &ch_stat); 3182 spdk_json_write_object_end(ctx->w); 3183 spdk_json_write_object_end(ctx->w); 3184 } 3185 3186 spdk_for_each_channel_continue(i, 0); 3187 } 3188 3189 static void 3190 accel_mlx5_dump_channel_stat_done(struct spdk_io_channel_iter *i, int status) 3191 { 3192 struct accel_mlx5_dump_stats_ctx *ctx; 3193 3194 ctx = spdk_io_channel_iter_get_ctx(i); 3195 3196 spdk_spin_lock(&g_accel_mlx5.lock); 3197 /* Add statistics from destroyed channels */ 3198 accel_mlx5_add_stats(&ctx->total, &g_accel_mlx5.stats); 3199 spdk_spin_unlock(&g_accel_mlx5.lock); 3200 3201 if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 3202 /* channels[] */ 3203 spdk_json_write_array_end(ctx->w); 3204 } 3205 3206 accel_mlx5_dump_stats_json(ctx->w, "total", &ctx->total); 3207 3208 /* Ends the whole response which was begun in accel_mlx5_dump_stats */ 3209 spdk_json_write_object_end(ctx->w); 3210 3211 ctx->cb(ctx->ctx, 0); 3212 free(ctx); 3213 } 3214 3215 int 3216 accel_mlx5_dump_stats(struct spdk_json_write_ctx *w, enum accel_mlx5_dump_state_level level, 3217 accel_mlx5_dump_stat_done_cb cb, void *ctx) 3218 { 3219 struct accel_mlx5_dump_stats_ctx *stat_ctx; 3220 3221 if (!w || !cb) { 3222 return -EINVAL; 3223 } 3224 if (!g_accel_mlx5.initialized) { 3225 return -ENODEV; 3226 } 3227 3228 stat_ctx = calloc(1, sizeof(*stat_ctx)); 3229 if (!stat_ctx) { 3230 return -ENOMEM; 3231 } 3232 stat_ctx->cb = cb; 3233 stat_ctx->ctx = ctx; 3234 stat_ctx->level = level; 3235 stat_ctx->w = w; 3236 3237 spdk_json_write_object_begin(w); 3238 3239 if (level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 3240 spdk_json_write_named_array_begin(w, "channels"); 3241 } 3242 3243 spdk_for_each_channel(&g_accel_mlx5, accel_mlx5_dump_channel_stat, stat_ctx, 3244 accel_mlx5_dump_channel_stat_done); 3245 3246 return 0; 3247 } 3248 3249 static bool 3250 accel_mlx5_crypto_supports_cipher(enum spdk_accel_cipher cipher, size_t key_size) 3251 { 3252 switch (cipher) { 3253 case SPDK_ACCEL_CIPHER_AES_XTS: 3254 return key_size == SPDK_ACCEL_AES_XTS_128_KEY_SIZE || key_size == SPDK_ACCEL_AES_XTS_256_KEY_SIZE; 3255 default: 3256 return false; 3257 } 3258 } 3259 3260 static int 3261 accel_mlx5_get_memory_domains(struct spdk_memory_domain **domains, int array_size) 3262 { 3263 int i, size; 3264 3265 if (!domains || !array_size) { 3266 return (int)g_accel_mlx5.num_ctxs; 3267 } 3268 3269 size = spdk_min(array_size, (int)g_accel_mlx5.num_ctxs); 3270 3271 for (i = 0; i < size; i++) { 3272 domains[i] = g_accel_mlx5.dev_ctxs[i].domain; 3273 } 3274 3275 return (int)g_accel_mlx5.num_ctxs; 3276 } 3277 3278 static inline struct accel_mlx5_dev * 3279 accel_mlx5_ch_get_dev_by_pd(struct accel_mlx5_io_channel *accel_ch, struct ibv_pd *pd) 3280 { 3281 uint32_t i; 3282 3283 for (i = 0; i < accel_ch->num_devs; i++) { 3284 if (accel_ch->devs[i].dev_ctx->pd == pd) { 3285 return &accel_ch->devs[i]; 3286 } 3287 } 3288 3289 return NULL; 3290 } 3291 3292 static inline int 3293 accel_mlx5_task_assign_qp_by_domain_pd(struct accel_mlx5_task *task, 3294 struct accel_mlx5_io_channel *acce_ch, struct spdk_memory_domain *domain) 3295 { 3296 struct spdk_memory_domain_rdma_ctx *domain_ctx; 3297 struct accel_mlx5_dev *dev; 3298 struct ibv_pd *domain_pd; 3299 size_t ctx_size; 3300 3301 domain_ctx = spdk_memory_domain_get_user_context(domain, &ctx_size); 3302 if (spdk_unlikely(!domain_ctx || domain_ctx->size != ctx_size)) { 3303 SPDK_ERRLOG("no domain context or wrong size, ctx ptr %p, size %zu\n", domain_ctx, ctx_size); 3304 return -ENOTSUP; 3305 } 3306 domain_pd = domain_ctx->ibv_pd; 3307 if (spdk_unlikely(!domain_pd)) { 3308 SPDK_ERRLOG("no destination domain PD, task %p", task); 3309 return -ENOTSUP; 3310 } 3311 dev = accel_mlx5_ch_get_dev_by_pd(acce_ch, domain_pd); 3312 if (spdk_unlikely(!dev)) { 3313 SPDK_ERRLOG("No dev for PD %p dev %s\n", domain_pd, domain_pd->context->device->name); 3314 return -ENODEV; 3315 } 3316 3317 if (spdk_unlikely(!dev)) { 3318 return -ENODEV; 3319 } 3320 task->qp = &dev->qp; 3321 3322 return 0; 3323 } 3324 3325 static inline int 3326 accel_mlx5_driver_examine_sequence(struct spdk_accel_sequence *seq, 3327 struct accel_mlx5_io_channel *accel_ch) 3328 { 3329 struct spdk_accel_task *first_base = spdk_accel_sequence_first_task(seq); 3330 struct accel_mlx5_task *first = SPDK_CONTAINEROF(first_base, struct accel_mlx5_task, base); 3331 struct spdk_accel_task *next_base = TAILQ_NEXT(first_base, seq_link); 3332 struct accel_mlx5_task *next; 3333 int rc; 3334 3335 accel_mlx5_task_reset(first); 3336 SPDK_DEBUGLOG(accel_mlx5, "first %p, opc %d; next %p, opc %d\n", first_base, first_base->op_code, 3337 next_base, next_base ? next_base->op_code : -1); 3338 if (next_base) { 3339 switch (first_base->op_code) { 3340 case SPDK_ACCEL_OPC_COPY: 3341 if (next_base->op_code == SPDK_ACCEL_OPC_DECRYPT && 3342 first_base->dst_domain && spdk_memory_domain_get_dma_device_type(first_base->dst_domain) == 3343 SPDK_DMA_DEVICE_TYPE_RDMA && TAILQ_NEXT(next_base, seq_link) == NULL) { 3344 next = SPDK_CONTAINEROF(next_base, struct accel_mlx5_task, base); 3345 rc = accel_mlx5_task_assign_qp_by_domain_pd(next, accel_ch, first_base->dst_domain); 3346 if (spdk_unlikely(rc)) { 3347 return rc; 3348 } 3349 /* Update decrypt task memory domain, complete copy task */ 3350 SPDK_DEBUGLOG(accel_mlx5, "Merge copy task (%p) and decrypt (%p)\n", first, next); 3351 next_base->dst_domain = first_base->dst_domain; 3352 next_base->dst_domain_ctx = first_base->dst_domain_ctx; 3353 accel_mlx5_task_reset(next); 3354 next->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO_MKEY; 3355 next->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE; 3356 next->needs_data_transfer = 1; 3357 next->inplace = 1; 3358 spdk_accel_task_complete(first_base, 0); 3359 return 0; 3360 } 3361 break; 3362 case SPDK_ACCEL_OPC_ENCRYPT: 3363 if (next_base->op_code == SPDK_ACCEL_OPC_COPY && 3364 next_base->dst_domain && spdk_memory_domain_get_dma_device_type(next_base->dst_domain) == 3365 SPDK_DMA_DEVICE_TYPE_RDMA && TAILQ_NEXT(next_base, seq_link) == NULL) { 3366 rc = accel_mlx5_task_assign_qp_by_domain_pd(first, accel_ch, next_base->dst_domain); 3367 if (spdk_unlikely(rc)) { 3368 return rc; 3369 } 3370 3371 /* Update encrypt task memory domain, complete copy task */ 3372 SPDK_DEBUGLOG(accel_mlx5, "Merge copy task (%p) and decrypt (%p)\n", 3373 SPDK_CONTAINEROF(next_base, 3374 struct accel_mlx5_task, base), first); 3375 first_base->dst_domain = next_base->dst_domain; 3376 first_base->dst_domain_ctx = next_base->dst_domain_ctx; 3377 first->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO_MKEY; 3378 first->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE; 3379 first->needs_data_transfer = 1; 3380 first->inplace = 1; 3381 spdk_accel_task_complete(next_base, 0); 3382 return 0; 3383 } 3384 break; 3385 3386 default: 3387 break; 3388 } 3389 } 3390 3391 SPDK_DEBUGLOG(accel_mlx5, "seq %p, task %p nothing to merge\n", seq, first_base); 3392 /* Nothing to merge, execute tasks one by one */ 3393 accel_mlx5_task_assign_qp(first, accel_ch); 3394 accel_mlx5_task_init_opcode(first); 3395 3396 return 0; 3397 } 3398 3399 static inline int 3400 accel_mlx5_execute_sequence(struct spdk_io_channel *ch, struct spdk_accel_sequence *seq) 3401 { 3402 struct accel_mlx5_io_channel *accel_ch = spdk_io_channel_get_ctx(ch); 3403 struct spdk_accel_task *task; 3404 struct accel_mlx5_task *mlx5_task; 3405 int rc; 3406 3407 rc = accel_mlx5_driver_examine_sequence(seq, accel_ch); 3408 if (spdk_unlikely(rc)) { 3409 return rc; 3410 } 3411 task = spdk_accel_sequence_first_task(seq); 3412 assert(task); 3413 mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base); 3414 mlx5_task->driver_seq = 1; 3415 3416 SPDK_DEBUGLOG(accel_mlx5, "driver starts seq %p, ch %p, task %p\n", seq, accel_ch, task); 3417 3418 return _accel_mlx5_submit_tasks(accel_ch, task); 3419 } 3420 3421 static struct accel_mlx5_module g_accel_mlx5 = { 3422 .module = { 3423 .module_init = accel_mlx5_init, 3424 .module_fini = accel_mlx5_deinit, 3425 .write_config_json = accel_mlx5_write_config_json, 3426 .get_ctx_size = accel_mlx5_get_ctx_size, 3427 .name = "mlx5", 3428 .supports_opcode = accel_mlx5_supports_opcode, 3429 .get_io_channel = accel_mlx5_get_io_channel, 3430 .submit_tasks = accel_mlx5_submit_tasks, 3431 .crypto_key_init = accel_mlx5_crypto_key_init, 3432 .crypto_key_deinit = accel_mlx5_crypto_key_deinit, 3433 .crypto_supports_cipher = accel_mlx5_crypto_supports_cipher, 3434 .get_memory_domains = accel_mlx5_get_memory_domains, 3435 } 3436 }; 3437 3438 static struct spdk_accel_driver g_accel_mlx5_driver = { 3439 .name = "mlx5", 3440 .execute_sequence = accel_mlx5_execute_sequence, 3441 .get_io_channel = accel_mlx5_get_io_channel 3442 }; 3443 3444 SPDK_LOG_REGISTER_COMPONENT(accel_mlx5) 3445