1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 */ 4 5 #include "spdk/env.h" 6 #include "spdk/thread.h" 7 #include "spdk/queue.h" 8 #include "spdk/log.h" 9 #include "spdk/string.h" 10 #include "spdk/likely.h" 11 #include "spdk/dma.h" 12 #include "spdk/json.h" 13 #include "spdk/util.h" 14 15 #include "spdk_internal/mlx5.h" 16 #include "spdk_internal/rdma_utils.h" 17 #include "spdk/accel_module.h" 18 #include "spdk_internal/assert.h" 19 #include "spdk_internal/sgl.h" 20 #include "accel_mlx5.h" 21 22 #include <infiniband/mlx5dv.h> 23 #include <rdma/rdma_cma.h> 24 25 #define ACCEL_MLX5_QP_SIZE (256u) 26 #define ACCEL_MLX5_NUM_REQUESTS (2048u - 1) 27 #define ACCEL_MLX5_RECOVER_POLLER_PERIOD_US (10000) 28 #define ACCEL_MLX5_MAX_SGE (16u) 29 #define ACCEL_MLX5_MAX_WC (64u) 30 #define ACCEL_MLX5_MAX_MKEYS_IN_TASK (16u) 31 32 /* Assume we have up to 16 devices */ 33 #define ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN ((SPDK_MLX5_DEV_MAX_NAME_LEN + 1) * 16) 34 35 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, task) \ 36 do { \ 37 assert((qp)->wrs_submitted < (qp)->wrs_max); \ 38 (qp)->wrs_submitted++; \ 39 (qp)->ring_db = true; \ 40 assert((task)->num_wrs < UINT16_MAX); \ 41 (task)->num_wrs++; \ 42 } while (0) 43 44 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, task) \ 45 do { \ 46 assert((dev)->wrs_in_cq < (dev)->wrs_in_cq_max); \ 47 (dev)->wrs_in_cq++; \ 48 assert((qp)->wrs_submitted < (qp)->wrs_max); \ 49 (qp)->wrs_submitted++; \ 50 (qp)->ring_db = true; \ 51 assert((task)->num_wrs < UINT16_MAX); \ 52 (task)->num_wrs++; \ 53 } while (0) 54 55 struct accel_mlx5_io_channel; 56 struct accel_mlx5_task; 57 58 struct accel_mlx5_dev_ctx { 59 struct ibv_context *context; 60 struct ibv_pd *pd; 61 struct spdk_memory_domain *domain; 62 struct spdk_mempool *psv_pool; 63 TAILQ_ENTRY(accel_mlx5_dev_ctx) link; 64 struct spdk_mlx5_psv **psvs; 65 bool mkeys; 66 bool crypto_mkeys; 67 bool sig_mkeys; 68 bool crypto_multi_block; 69 }; 70 71 enum accel_mlx5_opcode { 72 ACCEL_MLX5_OPC_COPY, 73 ACCEL_MLX5_OPC_CRYPTO, 74 ACCEL_MLX5_OPC_CRC32C, 75 ACCEL_MLX5_OPC_CRYPTO_MKEY, 76 ACCEL_MLX5_OPC_LAST 77 }; 78 79 SPDK_STATIC_ASSERT(ACCEL_MLX5_OPC_LAST <= 0xf, 80 "accel opcode exceeds 4 bits, update accel_mlx5 struct"); 81 82 struct accel_mlx5_stats { 83 uint64_t crypto_umrs; 84 uint64_t sig_umrs; 85 uint64_t rdma_reads; 86 uint64_t rdma_writes; 87 uint64_t polls; 88 uint64_t idle_polls; 89 uint64_t completions; 90 uint64_t nomem_qdepth; 91 uint64_t nomem_mkey; 92 uint64_t opcodes[ACCEL_MLX5_OPC_LAST]; 93 }; 94 95 struct accel_mlx5_module { 96 struct spdk_accel_module_if module; 97 struct accel_mlx5_stats stats; 98 struct spdk_spinlock lock; 99 struct accel_mlx5_dev_ctx *dev_ctxs; 100 uint32_t num_ctxs; 101 struct accel_mlx5_attr attr; 102 char **allowed_devs; 103 size_t allowed_devs_count; 104 bool initialized; 105 bool enabled; 106 bool crypto_supported; 107 bool crc32c_supported; 108 }; 109 110 struct accel_mlx5_sge { 111 uint32_t src_sge_count; 112 uint32_t dst_sge_count; 113 struct ibv_sge src_sge[ACCEL_MLX5_MAX_SGE]; 114 struct ibv_sge dst_sge[ACCEL_MLX5_MAX_SGE]; 115 }; 116 117 struct accel_mlx5_iov_sgl { 118 struct iovec *iov; 119 uint32_t iovcnt; 120 uint32_t iov_offset; 121 }; 122 123 struct accel_mlx5_psv_wrapper { 124 uint32_t psv_index; 125 struct { 126 uint32_t error : 1; 127 uint32_t reserved : 31; 128 } bits; 129 /* mlx5 engine requires DMAable memory, use this member to copy user's crc value since we don't know which 130 * memory it is in */ 131 uint32_t crc; 132 uint32_t crc_lkey; 133 }; 134 135 struct accel_mlx5_task { 136 struct spdk_accel_task base; 137 struct accel_mlx5_iov_sgl src; 138 struct accel_mlx5_iov_sgl dst; 139 struct accel_mlx5_qp *qp; 140 STAILQ_ENTRY(accel_mlx5_task) link; 141 uint16_t num_reqs; 142 uint16_t num_completed_reqs; 143 uint16_t num_submitted_reqs; 144 uint16_t num_ops; /* number of allocated mkeys or number of operations */ 145 uint16_t num_wrs; /* Number of outstanding operations which consume qp slot */ 146 union { 147 struct { 148 uint16_t blocks_per_req; 149 uint16_t num_processed_blocks; 150 uint16_t num_blocks; 151 }; 152 struct { 153 struct accel_mlx5_psv_wrapper *psv; 154 uint32_t last_umr_len; 155 uint8_t last_mkey_idx; 156 }; 157 }; 158 union { 159 uint16_t raw; 160 struct { 161 uint16_t inplace : 1; 162 uint16_t driver_seq : 1; 163 uint16_t needs_data_transfer : 1; 164 uint16_t enc_order : 2; 165 uint16_t mlx5_opcode: 4; 166 }; 167 }; 168 /* Keep this array last since not all elements might be accessed, this reduces amount of data to be 169 * cached */ 170 struct spdk_mlx5_mkey_pool_obj *mkeys[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 171 }; 172 173 SPDK_STATIC_ASSERT(ACCEL_MLX5_MAX_MKEYS_IN_TASK <= UINT8_MAX, "uint8_t is used to iterate mkeys"); 174 175 struct accel_mlx5_qp { 176 struct spdk_mlx5_qp *qp; 177 struct ibv_qp *verbs_qp; 178 struct accel_mlx5_dev *dev; 179 /* tasks submitted to HW. We can't complete a task even in error case until we reap completions for all 180 * submitted requests */ 181 STAILQ_HEAD(, accel_mlx5_task) in_hw; 182 uint16_t wrs_submitted; 183 uint16_t wrs_max; 184 bool ring_db; 185 bool recovering; 186 struct spdk_poller *recover_poller; 187 }; 188 189 struct accel_mlx5_dev { 190 struct accel_mlx5_qp qp; 191 struct spdk_mlx5_cq *cq; 192 struct spdk_mlx5_mkey_pool *mkeys; 193 struct spdk_mlx5_mkey_pool *crypto_mkeys; 194 struct spdk_mlx5_mkey_pool *sig_mkeys; 195 struct spdk_rdma_utils_mem_map *mmap; 196 struct accel_mlx5_dev_ctx *dev_ctx; 197 struct spdk_io_channel *ch; 198 uint16_t wrs_in_cq; 199 uint16_t wrs_in_cq_max; 200 uint16_t crypto_split_blocks; 201 bool crypto_multi_block; 202 /* Pending tasks waiting for requests resources */ 203 STAILQ_HEAD(, accel_mlx5_task) nomem; 204 TAILQ_ENTRY(accel_mlx5_dev) link; 205 struct accel_mlx5_stats stats; 206 }; 207 208 struct accel_mlx5_io_channel { 209 struct accel_mlx5_dev *devs; 210 struct spdk_poller *poller; 211 uint16_t num_devs; 212 /* Index in \b devs to be used for operations in round-robin way */ 213 uint16_t dev_idx; 214 bool poller_handler_registered; 215 }; 216 217 struct accel_mlx5_task_operations { 218 int (*init)(struct accel_mlx5_task *task); 219 int (*process)(struct accel_mlx5_task *task); 220 int (*cont)(struct accel_mlx5_task *task); 221 void (*complete)(struct accel_mlx5_task *task); 222 }; 223 224 struct accel_mlx5_psv_pool_iter_cb_args { 225 struct accel_mlx5_dev_ctx *dev; 226 struct spdk_rdma_utils_mem_map *map; 227 int rc; 228 }; 229 230 struct accel_mlx5_dump_stats_ctx { 231 struct accel_mlx5_stats total; 232 struct spdk_json_write_ctx *w; 233 enum accel_mlx5_dump_state_level level; 234 accel_mlx5_dump_stat_done_cb cb; 235 void *ctx; 236 }; 237 238 static struct accel_mlx5_module g_accel_mlx5; 239 static struct spdk_accel_driver g_accel_mlx5_driver; 240 241 static inline int accel_mlx5_execute_sequence(struct spdk_io_channel *ch, 242 struct spdk_accel_sequence *seq); 243 static inline void accel_mlx5_task_complete(struct accel_mlx5_task *mlx5_task); 244 245 static inline void 246 accel_mlx5_iov_sgl_init(struct accel_mlx5_iov_sgl *s, struct iovec *iov, uint32_t iovcnt) 247 { 248 s->iov = iov; 249 s->iovcnt = iovcnt; 250 s->iov_offset = 0; 251 } 252 253 static inline void 254 accel_mlx5_iov_sgl_advance(struct accel_mlx5_iov_sgl *s, uint32_t step) 255 { 256 s->iov_offset += step; 257 while (s->iovcnt > 0) { 258 assert(s->iov != NULL); 259 if (s->iov_offset < s->iov->iov_len) { 260 break; 261 } 262 263 s->iov_offset -= s->iov->iov_len; 264 s->iov++; 265 s->iovcnt--; 266 } 267 } 268 269 static inline void 270 accel_mlx5_iov_sgl_unwind(struct accel_mlx5_iov_sgl *s, uint32_t max_iovs, uint32_t step) 271 { 272 SPDK_DEBUGLOG(accel_mlx5, "iov %p, iovcnt %u, max %u, offset %u, step %u\n", s->iov, s->iovcnt, 273 max_iovs, s->iov_offset, step); 274 while (s->iovcnt <= max_iovs) { 275 assert(s->iov != NULL); 276 if (s->iov_offset >= step) { 277 s->iov_offset -= step; 278 SPDK_DEBUGLOG(accel_mlx5, "\tEND, iov %p, iovcnt %u, offset %u\n", s->iov, s->iovcnt, 279 s->iov_offset); 280 return; 281 } 282 step -= s->iov_offset; 283 s->iov--; 284 s->iovcnt++; 285 s->iov_offset = s->iov->iov_len; 286 SPDK_DEBUGLOG(accel_mlx5, "\tiov %p, iovcnt %u, offset %u, step %u\n", s->iov, s->iovcnt, 287 s->iov_offset, step); 288 } 289 290 SPDK_ERRLOG("Can't unwind iovs, remaining %u\n", step); 291 assert(0); 292 } 293 294 static inline int 295 accel_mlx5_sge_unwind(struct ibv_sge *sge, uint32_t sge_count, uint32_t step) 296 { 297 int i; 298 299 assert(sge_count > 0); 300 SPDK_DEBUGLOG(accel_mlx5, "sge %p, count %u, step %u\n", sge, sge_count, step); 301 for (i = (int)sge_count - 1; i >= 0; i--) { 302 if (sge[i].length > step) { 303 sge[i].length -= step; 304 SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step); 305 return (int)i + 1; 306 } 307 SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step); 308 step -= sge[i].length; 309 } 310 311 SPDK_ERRLOG("Can't unwind sge, remaining %u\n", step); 312 assert(step == 0); 313 314 return 0; 315 } 316 317 static inline void 318 accel_mlx5_crypto_task_complete(struct accel_mlx5_task *task) 319 { 320 struct accel_mlx5_dev *dev = task->qp->dev; 321 322 assert(task->num_ops); 323 spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops); 324 spdk_accel_task_complete(&task->base, 0); 325 } 326 327 static inline void 328 accel_mlx5_task_fail(struct accel_mlx5_task *task, int rc) 329 { 330 struct accel_mlx5_dev *dev = task->qp->dev; 331 struct spdk_accel_task *next; 332 struct spdk_accel_sequence *seq; 333 bool driver_seq; 334 335 assert(task->num_reqs == task->num_completed_reqs); 336 SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->base.op_code, rc); 337 338 if (task->num_ops) { 339 if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO || task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO_MKEY) { 340 spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops); 341 } 342 if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRC32C) { 343 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops); 344 spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv); 345 } 346 } 347 next = spdk_accel_sequence_next_task(&task->base); 348 seq = task->base.seq; 349 driver_seq = task->driver_seq; 350 351 assert(task->num_reqs == task->num_completed_reqs); 352 SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->mlx5_opcode, rc); 353 spdk_accel_task_complete(&task->base, rc); 354 355 if (driver_seq) { 356 struct spdk_io_channel *ch = task->qp->dev->ch; 357 358 assert(seq); 359 if (next) { 360 accel_mlx5_execute_sequence(ch, seq); 361 } else { 362 spdk_accel_sequence_continue(seq); 363 } 364 } 365 } 366 367 static int 368 accel_mlx5_translate_addr(void *addr, size_t size, struct spdk_memory_domain *domain, 369 void *domain_ctx, struct accel_mlx5_dev *dev, struct ibv_sge *sge) 370 { 371 struct spdk_rdma_utils_memory_translation map_translation; 372 struct spdk_memory_domain_translation_result domain_translation; 373 struct spdk_memory_domain_translation_ctx local_ctx; 374 int rc; 375 376 if (domain) { 377 domain_translation.size = sizeof(struct spdk_memory_domain_translation_result); 378 local_ctx.size = sizeof(local_ctx); 379 local_ctx.rdma.ibv_qp = dev->qp.verbs_qp; 380 rc = spdk_memory_domain_translate_data(domain, domain_ctx, dev->dev_ctx->domain, 381 &local_ctx, addr, size, &domain_translation); 382 if (spdk_unlikely(rc || domain_translation.iov_count != 1)) { 383 SPDK_ERRLOG("Memory domain translation failed, addr %p, length %zu, iovcnt %u\n", addr, size, 384 domain_translation.iov_count); 385 if (rc == 0) { 386 rc = -EINVAL; 387 } 388 389 return rc; 390 } 391 sge->lkey = domain_translation.rdma.lkey; 392 sge->addr = (uint64_t) domain_translation.iov.iov_base; 393 sge->length = domain_translation.iov.iov_len; 394 } else { 395 rc = spdk_rdma_utils_get_translation(dev->mmap, addr, size, 396 &map_translation); 397 if (spdk_unlikely(rc)) { 398 SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", addr, size); 399 return rc; 400 } 401 sge->lkey = spdk_rdma_utils_memory_translation_get_lkey(&map_translation); 402 sge->addr = (uint64_t)addr; 403 sge->length = size; 404 } 405 406 return 0; 407 } 408 409 static inline int 410 accel_mlx5_fill_block_sge(struct accel_mlx5_dev *dev, struct ibv_sge *sge, 411 struct accel_mlx5_iov_sgl *iovs, uint32_t len, uint32_t *_remaining, 412 struct spdk_memory_domain *domain, void *domain_ctx) 413 { 414 void *addr; 415 uint32_t remaining = len; 416 uint32_t size; 417 int i = 0; 418 int rc; 419 420 while (remaining && i < (int)ACCEL_MLX5_MAX_SGE) { 421 size = spdk_min(remaining, iovs->iov->iov_len - iovs->iov_offset); 422 addr = (void *)iovs->iov->iov_base + iovs->iov_offset; 423 rc = accel_mlx5_translate_addr(addr, size, domain, domain_ctx, dev, &sge[i]); 424 if (spdk_unlikely(rc)) { 425 return rc; 426 } 427 SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d]: lkey %u, len %u, addr %"PRIx64"\n", i, sge[i].lkey, 428 sge[i].length, sge[i].addr); 429 accel_mlx5_iov_sgl_advance(iovs, size); 430 i++; 431 assert(remaining >= size); 432 remaining -= size; 433 } 434 *_remaining = remaining; 435 436 return i; 437 } 438 439 static inline bool 440 accel_mlx5_compare_iovs(struct iovec *v1, struct iovec *v2, uint32_t iovcnt) 441 { 442 return memcmp(v1, v2, sizeof(*v1) * iovcnt) == 0; 443 } 444 445 static inline uint16_t 446 accel_mlx5_dev_get_available_slots(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp) 447 { 448 assert(qp->wrs_max >= qp->wrs_submitted); 449 assert(dev->wrs_in_cq_max >= dev->wrs_in_cq); 450 451 /* Each time we produce only 1 CQE, so we need 1 CQ slot */ 452 if (spdk_unlikely(dev->wrs_in_cq == dev->wrs_in_cq_max)) { 453 return 0; 454 } 455 456 return qp->wrs_max - qp->wrs_submitted; 457 } 458 459 static inline uint32_t 460 accel_mlx5_task_alloc_mkeys(struct accel_mlx5_task *task, struct spdk_mlx5_mkey_pool *pool) 461 { 462 uint32_t num_ops; 463 int rc; 464 465 assert(task->num_reqs > task->num_completed_reqs); 466 num_ops = task->num_reqs - task->num_completed_reqs; 467 num_ops = spdk_min(num_ops, ACCEL_MLX5_MAX_MKEYS_IN_TASK); 468 if (!num_ops) { 469 return 0; 470 } 471 rc = spdk_mlx5_mkey_pool_get_bulk(pool, task->mkeys, num_ops); 472 if (spdk_unlikely(rc)) { 473 return 0; 474 } 475 assert(num_ops <= UINT16_MAX); 476 task->num_ops = num_ops; 477 478 return num_ops; 479 } 480 481 static inline uint8_t 482 bs_to_bs_selector(uint32_t bs) 483 { 484 switch (bs) { 485 case 512: 486 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_512; 487 case 520: 488 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_520; 489 case 4096: 490 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4096; 491 case 4160: 492 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4160; 493 default: 494 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED; 495 } 496 } 497 498 static inline int 499 accel_mlx5_configure_crypto_umr(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge, 500 uint32_t mkey, uint32_t num_blocks, struct spdk_mlx5_crypto_dek_data *dek_data, 501 uint64_t wr_id, uint32_t flags) 502 { 503 struct spdk_mlx5_umr_crypto_attr cattr; 504 struct spdk_mlx5_umr_attr umr_attr; 505 struct accel_mlx5_qp *qp = mlx5_task->qp; 506 struct accel_mlx5_dev *dev = qp->dev; 507 struct spdk_accel_task *task = &mlx5_task->base; 508 uint32_t length, remaining = 0, block_size = task->block_size; 509 int rc; 510 511 length = num_blocks * block_size; 512 SPDK_DEBUGLOG(accel_mlx5, "task %p, domain %p, len %u, blocks %u\n", task, task->src_domain, length, 513 num_blocks); 514 rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src, length, &remaining, 515 task->src_domain, task->src_domain_ctx); 516 if (spdk_unlikely(rc <= 0)) { 517 if (rc == 0) { 518 rc = -EINVAL; 519 } 520 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 521 return rc; 522 } 523 sge->src_sge_count = rc; 524 if (spdk_unlikely(remaining)) { 525 uint32_t new_len = length - remaining; 526 uint32_t aligned_len, updated_num_blocks; 527 528 SPDK_DEBUGLOG(accel_mlx5, "Incorrect src iovs, handled %u out of %u bytes\n", new_len, length); 529 if (new_len < block_size) { 530 /* We need to process at least 1 block. If buffer is too fragmented, we can't do 531 * anything */ 532 return -ERANGE; 533 } 534 535 /* Regular integer division, we need to round down to prev block size */ 536 updated_num_blocks = new_len / block_size; 537 assert(updated_num_blocks); 538 assert(updated_num_blocks < num_blocks); 539 aligned_len = updated_num_blocks * block_size; 540 541 if (aligned_len < new_len) { 542 uint32_t dt = new_len - aligned_len; 543 544 /* We can't process part of block, need to unwind src iov_sgl and sge to the 545 * prev block boundary */ 546 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt); 547 accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt); 548 sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt); 549 if (!sge->src_sge_count) { 550 return -ERANGE; 551 } 552 } 553 SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len); 554 length = aligned_len; 555 num_blocks = updated_num_blocks; 556 } 557 558 cattr.xts_iv = task->iv + mlx5_task->num_processed_blocks; 559 cattr.keytag = 0; 560 cattr.dek_obj_id = dek_data->dek_obj_id; 561 cattr.tweak_mode = dek_data->tweak_mode; 562 cattr.enc_order = mlx5_task->enc_order; 563 cattr.bs_selector = bs_to_bs_selector(mlx5_task->base.block_size); 564 if (spdk_unlikely(cattr.bs_selector == SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED)) { 565 SPDK_ERRLOG("unsupported block size %u\n", mlx5_task->base.block_size); 566 return -EINVAL; 567 } 568 umr_attr.mkey = mkey; 569 umr_attr.sge = sge->src_sge; 570 571 if (!mlx5_task->inplace) { 572 SPDK_DEBUGLOG(accel_mlx5, "task %p, dst sge, domain %p, len %u\n", task, task->dst_domain, length); 573 rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, length, &remaining, 574 task->dst_domain, task->dst_domain_ctx); 575 if (spdk_unlikely(rc <= 0)) { 576 if (rc == 0) { 577 rc = -EINVAL; 578 } 579 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 580 return rc; 581 } 582 sge->dst_sge_count = rc; 583 if (spdk_unlikely(remaining)) { 584 uint32_t new_len = length - remaining; 585 uint32_t aligned_len, updated_num_blocks, dt; 586 587 SPDK_DEBUGLOG(accel_mlx5, "Incorrect dst iovs, handled %u out of %u bytes\n", new_len, length); 588 if (new_len < block_size) { 589 /* We need to process at least 1 block. If buffer is too fragmented, we can't do 590 * anything */ 591 return -ERANGE; 592 } 593 594 /* Regular integer division, we need to round down to prev block size */ 595 updated_num_blocks = new_len / block_size; 596 assert(updated_num_blocks); 597 assert(updated_num_blocks < num_blocks); 598 aligned_len = updated_num_blocks * block_size; 599 600 if (aligned_len < new_len) { 601 dt = new_len - aligned_len; 602 assert(dt > 0 && dt < length); 603 /* We can't process part of block, need to unwind src and dst iov_sgl and sge to the 604 * prev block boundary */ 605 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind dst sge for %u bytes\n", task, dt); 606 accel_mlx5_iov_sgl_unwind(&mlx5_task->dst, task->d.iovcnt, dt); 607 sge->dst_sge_count = accel_mlx5_sge_unwind(sge->dst_sge, sge->dst_sge_count, dt); 608 assert(sge->dst_sge_count > 0 && sge->dst_sge_count <= ACCEL_MLX5_MAX_SGE); 609 if (!sge->dst_sge_count) { 610 return -ERANGE; 611 } 612 } 613 assert(length > aligned_len); 614 dt = length - aligned_len; 615 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt); 616 /* The same for src iov_sgl and sge. In worst case we can unwind SRC 2 times */ 617 accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt); 618 sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt); 619 assert(sge->src_sge_count > 0 && sge->src_sge_count <= ACCEL_MLX5_MAX_SGE); 620 if (!sge->src_sge_count) { 621 return -ERANGE; 622 } 623 SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len); 624 length = aligned_len; 625 num_blocks = updated_num_blocks; 626 } 627 } 628 629 SPDK_DEBUGLOG(accel_mlx5, 630 "task %p: bs %u, iv %"PRIu64", enc_on_tx %d, tweak_mode %d, len %u, mkey %x, blocks %u\n", 631 mlx5_task, task->block_size, cattr.xts_iv, mlx5_task->enc_order, cattr.tweak_mode, length, mkey, 632 num_blocks); 633 634 umr_attr.sge_count = sge->src_sge_count; 635 umr_attr.umr_len = length; 636 assert((uint32_t)mlx5_task->num_processed_blocks + num_blocks <= UINT16_MAX); 637 mlx5_task->num_processed_blocks += num_blocks; 638 639 rc = spdk_mlx5_umr_configure_crypto(qp->qp, &umr_attr, &cattr, wr_id, flags); 640 641 return rc; 642 } 643 644 static inline int 645 accel_mlx5_crypto_task_process(struct accel_mlx5_task *mlx5_task) 646 { 647 struct accel_mlx5_sge sges[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 648 struct spdk_mlx5_crypto_dek_data dek_data; 649 struct accel_mlx5_qp *qp = mlx5_task->qp; 650 struct accel_mlx5_dev *dev = qp->dev; 651 /* First RDMA after UMR must have a SMALL_FENCE */ 652 uint32_t first_rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE; 653 uint16_t num_blocks; 654 uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs, 655 mlx5_task->num_ops); 656 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 657 uint16_t i; 658 int rc; 659 660 assert(qp_slot > 1); 661 num_ops = spdk_min(num_ops, qp_slot >> 1); 662 if (spdk_unlikely(!num_ops)) { 663 return -EINVAL; 664 } 665 666 rc = spdk_mlx5_crypto_get_dek_data(mlx5_task->base.crypto_key->priv, dev->dev_ctx->pd, &dek_data); 667 if (spdk_unlikely(rc)) { 668 return rc; 669 } 670 671 mlx5_task->num_wrs = 0; 672 SPDK_DEBUGLOG(accel_mlx5, "begin, task, %p, reqs: total %u, submitted %u, completed %u\n", 673 mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 674 for (i = 0; i < num_ops; i++) { 675 if (mlx5_task->num_submitted_reqs + i + 1 == mlx5_task->num_reqs) { 676 /* Last request may consume less than calculated if crypto_multi_block is true */ 677 assert(mlx5_task->num_blocks > mlx5_task->num_submitted_reqs); 678 num_blocks = mlx5_task->num_blocks - mlx5_task->num_processed_blocks; 679 } else { 680 num_blocks = mlx5_task->blocks_per_req; 681 } 682 683 rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sges[i], mlx5_task->mkeys[i]->mkey, num_blocks, 684 &dek_data, 0, 0); 685 if (spdk_unlikely(rc)) { 686 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 687 return rc; 688 } 689 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 690 dev->stats.crypto_umrs++; 691 } 692 693 /* Loop `num_ops - 1` for easy flags handling */ 694 for (i = 0; i < num_ops - 1; i++) { 695 /* UMR is used as a destination for RDMA_READ - from UMR to sge */ 696 if (mlx5_task->inplace) { 697 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0, 698 mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence); 699 } else { 700 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0, 701 mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence); 702 } 703 if (spdk_unlikely(rc)) { 704 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 705 return rc; 706 } 707 708 first_rdma_fence = 0; 709 assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs); 710 assert(mlx5_task->num_submitted_reqs < UINT16_MAX); 711 mlx5_task->num_submitted_reqs++; 712 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 713 dev->stats.rdma_reads++; 714 } 715 716 if (mlx5_task->inplace) { 717 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0, 718 mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 719 } else { 720 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0, 721 mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 722 } 723 if (spdk_unlikely(rc)) { 724 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 725 return rc; 726 } 727 728 assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs); 729 assert(mlx5_task->num_submitted_reqs < UINT16_MAX); 730 mlx5_task->num_submitted_reqs++; 731 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 732 dev->stats.rdma_reads++; 733 STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link); 734 735 if (spdk_unlikely(mlx5_task->num_submitted_reqs == mlx5_task->num_reqs && 736 mlx5_task->num_blocks > mlx5_task->num_processed_blocks)) { 737 /* We hit "out of sge 738 * entries" case with highly fragmented payload. In that case 739 * accel_mlx5_configure_crypto_umr function handled fewer data blocks than expected 740 * That means we need at least 1 more request to complete this task, this request will be 741 * executed once all submitted ones are completed */ 742 SPDK_DEBUGLOG(accel_mlx5, "task %p, processed %u/%u blocks, add extra req\n", mlx5_task, 743 mlx5_task->num_processed_blocks, mlx5_task->num_blocks); 744 mlx5_task->num_reqs++; 745 } 746 747 SPDK_DEBUGLOG(accel_mlx5, "end, task, %p, reqs: total %u, submitted %u, completed %u\n", mlx5_task, 748 mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 749 750 return 0; 751 } 752 753 static inline int 754 accel_mlx5_crypto_task_continue(struct accel_mlx5_task *task) 755 { 756 struct accel_mlx5_qp *qp = task->qp; 757 struct accel_mlx5_dev *dev = qp->dev; 758 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 759 760 assert(task->num_reqs > task->num_completed_reqs); 761 if (task->num_ops == 0) { 762 /* No mkeys allocated, try to allocate now */ 763 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task, dev->crypto_mkeys))) { 764 /* Pool is empty, queue this task */ 765 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 766 dev->stats.nomem_mkey++; 767 return -ENOMEM; 768 } 769 } 770 /* We need to post at least 1 UMR and 1 RDMA operation */ 771 if (spdk_unlikely(qp_slot < 2)) { 772 /* QP is full, queue this task */ 773 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 774 task->qp->dev->stats.nomem_qdepth++; 775 return -ENOMEM; 776 } 777 778 return accel_mlx5_crypto_task_process(task); 779 } 780 781 static inline int 782 accel_mlx5_crypto_task_init(struct accel_mlx5_task *mlx5_task) 783 { 784 struct spdk_accel_task *task = &mlx5_task->base; 785 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 786 uint64_t src_nbytes = task->nbytes; 787 #ifdef DEBUG 788 uint64_t dst_nbytes; 789 uint32_t i; 790 #endif 791 bool crypto_key_ok; 792 793 crypto_key_ok = (task->crypto_key && task->crypto_key->module_if == &g_accel_mlx5.module && 794 task->crypto_key->priv); 795 if (spdk_unlikely((task->nbytes % mlx5_task->base.block_size != 0) || !crypto_key_ok)) { 796 if (crypto_key_ok) { 797 SPDK_ERRLOG("src length %"PRIu64" is not a multiple of the block size %u\n", task->nbytes, 798 mlx5_task->base.block_size); 799 } else { 800 SPDK_ERRLOG("Wrong crypto key provided\n"); 801 } 802 return -EINVAL; 803 } 804 805 assert(src_nbytes / mlx5_task->base.block_size <= UINT16_MAX); 806 mlx5_task->num_blocks = src_nbytes / mlx5_task->base.block_size; 807 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 808 if (task->d.iovcnt == 0 || (task->d.iovcnt == task->s.iovcnt && 809 accel_mlx5_compare_iovs(task->d.iovs, task->s.iovs, task->s.iovcnt))) { 810 mlx5_task->inplace = 1; 811 } else { 812 #ifdef DEBUG 813 dst_nbytes = 0; 814 for (i = 0; i < task->d.iovcnt; i++) { 815 dst_nbytes += task->d.iovs[i].iov_len; 816 } 817 818 if (spdk_unlikely(src_nbytes != dst_nbytes)) { 819 return -EINVAL; 820 } 821 #endif 822 mlx5_task->inplace = 0; 823 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 824 } 825 826 if (dev->crypto_multi_block) { 827 if (dev->crypto_split_blocks) { 828 assert(SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks) <= UINT16_MAX); 829 mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks); 830 /* Last req may consume less blocks */ 831 mlx5_task->blocks_per_req = spdk_min(mlx5_task->num_blocks, dev->crypto_split_blocks); 832 } else { 833 if (task->s.iovcnt > ACCEL_MLX5_MAX_SGE || task->d.iovcnt > ACCEL_MLX5_MAX_SGE) { 834 uint32_t max_sge_count = spdk_max(task->s.iovcnt, task->d.iovcnt); 835 836 assert(SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE) <= UINT16_MAX); 837 mlx5_task->num_reqs = SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE); 838 mlx5_task->blocks_per_req = SPDK_CEIL_DIV(mlx5_task->num_blocks, mlx5_task->num_reqs); 839 } else { 840 mlx5_task->num_reqs = 1; 841 mlx5_task->blocks_per_req = mlx5_task->num_blocks; 842 } 843 } 844 } else { 845 mlx5_task->num_reqs = mlx5_task->num_blocks; 846 mlx5_task->blocks_per_req = 1; 847 } 848 849 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(mlx5_task, dev->crypto_mkeys))) { 850 /* Pool is empty, queue this task */ 851 SPDK_DEBUGLOG(accel_mlx5, "no reqs in pool, dev %s\n", dev->dev_ctx->context->device->name); 852 dev->stats.nomem_mkey++; 853 return -ENOMEM; 854 } 855 if (spdk_unlikely(accel_mlx5_dev_get_available_slots(dev, &dev->qp) < 2)) { 856 /* Queue is full, queue this task */ 857 SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", dev->dev_ctx->context->device->name, 858 mlx5_task->qp); 859 dev->stats.nomem_qdepth++; 860 return -ENOMEM; 861 } 862 863 SPDK_DEBUGLOG(accel_mlx5, "task %p, src_iovs %u, dst_iovs %u, num_reqs %u, " 864 "blocks/req %u, blocks %u, inplace %d\n", task, task->s.iovcnt, task->d.iovcnt, 865 mlx5_task->num_reqs, mlx5_task->blocks_per_req, mlx5_task->num_blocks, mlx5_task->inplace); 866 867 return 0; 868 } 869 870 static inline void 871 accel_mlx5_copy_task_complete(struct accel_mlx5_task *mlx5_task) 872 { 873 spdk_accel_task_complete(&mlx5_task->base, 0); 874 } 875 876 static inline int 877 accel_mlx5_copy_task_process_one(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_qp *qp, 878 uint64_t wrid, uint32_t fence) 879 { 880 struct spdk_accel_task *task = &mlx5_task->base; 881 struct accel_mlx5_sge sge; 882 uint32_t remaining = 0; 883 uint32_t dst_len; 884 int rc; 885 886 /* Limit one RDMA_WRITE by length of dst buffer. Not all src buffers may fit into one dst buffer due to 887 * limitation on ACCEL_MLX5_MAX_SGE. If this is the case then remaining is not zero */ 888 assert(mlx5_task->dst.iov->iov_len > mlx5_task->dst.iov_offset); 889 dst_len = mlx5_task->dst.iov->iov_len - mlx5_task->dst.iov_offset; 890 rc = accel_mlx5_fill_block_sge(qp->dev, sge.src_sge, &mlx5_task->src, dst_len, &remaining, 891 task->src_domain, task->src_domain_ctx); 892 if (spdk_unlikely(rc <= 0)) { 893 if (rc == 0) { 894 rc = -EINVAL; 895 } 896 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 897 return rc; 898 } 899 sge.src_sge_count = rc; 900 assert(dst_len > remaining); 901 dst_len -= remaining; 902 903 rc = accel_mlx5_fill_block_sge(qp->dev, sge.dst_sge, &mlx5_task->dst, dst_len, &remaining, 904 task->dst_domain, task->dst_domain_ctx); 905 if (spdk_unlikely(rc != 1)) { 906 /* We use single dst entry, any result other than 1 is an error */ 907 if (rc == 0) { 908 rc = -EINVAL; 909 } 910 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 911 return rc; 912 } 913 if (spdk_unlikely(remaining)) { 914 SPDK_ERRLOG("Incorrect dst length, remaining %u\n", remaining); 915 assert(0); 916 return -EINVAL; 917 } 918 919 rc = spdk_mlx5_qp_rdma_write(mlx5_task->qp->qp, sge.src_sge, sge.src_sge_count, 920 sge.dst_sge[0].addr, sge.dst_sge[0].lkey, wrid, fence); 921 if (spdk_unlikely(rc)) { 922 SPDK_ERRLOG("new RDMA WRITE failed with %d\n", rc); 923 return rc; 924 } 925 qp->dev->stats.rdma_writes++; 926 927 return 0; 928 } 929 930 static inline int 931 accel_mlx5_copy_task_process(struct accel_mlx5_task *mlx5_task) 932 { 933 934 struct accel_mlx5_qp *qp = mlx5_task->qp; 935 struct accel_mlx5_dev *dev = qp->dev; 936 uint16_t i; 937 int rc; 938 939 mlx5_task->num_wrs = 0; 940 assert(mlx5_task->num_reqs > 0); 941 assert(mlx5_task->num_ops > 0); 942 943 /* Handle n-1 reqs in order to simplify wrid and fence handling */ 944 for (i = 0; i < mlx5_task->num_ops - 1; i++) { 945 rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, 0, 0); 946 if (spdk_unlikely(rc)) { 947 return rc; 948 } 949 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 950 mlx5_task->num_submitted_reqs++; 951 } 952 953 rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, (uint64_t)mlx5_task, 954 SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 955 if (spdk_unlikely(rc)) { 956 return rc; 957 } 958 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 959 mlx5_task->num_submitted_reqs++; 960 STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link); 961 962 SPDK_DEBUGLOG(accel_mlx5, "end, copy task, %p\n", mlx5_task); 963 964 return 0; 965 } 966 967 static inline int 968 accel_mlx5_copy_task_continue(struct accel_mlx5_task *task) 969 { 970 struct accel_mlx5_qp *qp = task->qp; 971 struct accel_mlx5_dev *dev = qp->dev; 972 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 973 974 task->num_ops = spdk_min(qp_slot, task->num_reqs - task->num_completed_reqs); 975 if (spdk_unlikely(task->num_ops == 0)) { 976 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 977 dev->stats.nomem_qdepth++; 978 return -ENOMEM; 979 } 980 return accel_mlx5_copy_task_process(task); 981 } 982 983 static inline uint32_t 984 accel_mlx5_get_copy_task_count(struct iovec *src_iov, uint32_t src_iovcnt, 985 struct iovec *dst_iov, uint32_t dst_iovcnt) 986 { 987 uint32_t src = 0; 988 uint32_t dst = 0; 989 uint64_t src_offset = 0; 990 uint64_t dst_offset = 0; 991 uint32_t num_ops = 0; 992 uint32_t src_sge_count = 0; 993 994 while (src < src_iovcnt && dst < dst_iovcnt) { 995 uint64_t src_len = src_iov[src].iov_len - src_offset; 996 uint64_t dst_len = dst_iov[dst].iov_len - dst_offset; 997 998 if (dst_len < src_len) { 999 dst_offset = 0; 1000 src_offset += dst_len; 1001 dst++; 1002 num_ops++; 1003 src_sge_count = 0; 1004 } else if (src_len < dst_len) { 1005 dst_offset += src_len; 1006 src_offset = 0; 1007 src++; 1008 if (++src_sge_count >= ACCEL_MLX5_MAX_SGE) { 1009 num_ops++; 1010 src_sge_count = 0; 1011 } 1012 } else { 1013 dst_offset = 0; 1014 src_offset = 0; 1015 dst++; 1016 src++; 1017 num_ops++; 1018 src_sge_count = 0; 1019 } 1020 } 1021 1022 assert(src == src_iovcnt); 1023 assert(dst == dst_iovcnt); 1024 assert(src_offset == 0); 1025 assert(dst_offset == 0); 1026 return num_ops; 1027 } 1028 1029 static inline int 1030 accel_mlx5_copy_task_init(struct accel_mlx5_task *mlx5_task) 1031 { 1032 struct spdk_accel_task *task = &mlx5_task->base; 1033 struct accel_mlx5_qp *qp = mlx5_task->qp; 1034 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp); 1035 1036 if (spdk_likely(task->s.iovcnt <= ACCEL_MLX5_MAX_SGE)) { 1037 mlx5_task->num_reqs = task->d.iovcnt; 1038 } else if (task->d.iovcnt == 1) { 1039 mlx5_task->num_reqs = SPDK_CEIL_DIV(task->s.iovcnt, ACCEL_MLX5_MAX_SGE); 1040 } else { 1041 mlx5_task->num_reqs = accel_mlx5_get_copy_task_count(task->s.iovs, task->s.iovcnt, 1042 task->d.iovs, task->d.iovcnt); 1043 } 1044 mlx5_task->inplace = 0; 1045 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 1046 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 1047 mlx5_task->num_ops = spdk_min(qp_slot, mlx5_task->num_reqs); 1048 if (spdk_unlikely(!mlx5_task->num_ops)) { 1049 qp->dev->stats.nomem_qdepth++; 1050 return -ENOMEM; 1051 } 1052 SPDK_DEBUGLOG(accel_mlx5, "copy task num_reqs %u, num_ops %u\n", mlx5_task->num_reqs, 1053 mlx5_task->num_ops); 1054 1055 return 0; 1056 } 1057 1058 static inline uint32_t 1059 accel_mlx5_advance_iovec(struct iovec *iov, uint32_t iovcnt, size_t *iov_offset, size_t *len) 1060 { 1061 uint32_t i; 1062 size_t iov_len; 1063 1064 for (i = 0; *len != 0 && i < iovcnt; i++) { 1065 iov_len = iov[i].iov_len - *iov_offset; 1066 1067 if (iov_len < *len) { 1068 *iov_offset = 0; 1069 *len -= iov_len; 1070 continue; 1071 } 1072 if (iov_len == *len) { 1073 *iov_offset = 0; 1074 i++; 1075 } else { /* iov_len > *len */ 1076 *iov_offset += *len; 1077 } 1078 *len = 0; 1079 break; 1080 } 1081 1082 return i; 1083 } 1084 1085 static inline void 1086 accel_mlx5_crc_task_complete(struct accel_mlx5_task *mlx5_task) 1087 { 1088 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 1089 1090 *mlx5_task->base.crc_dst = mlx5_task->psv->crc ^ UINT32_MAX; 1091 /* Normal task completion without allocated mkeys is not possible */ 1092 assert(mlx5_task->num_ops); 1093 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, mlx5_task->mkeys, mlx5_task->num_ops); 1094 spdk_mempool_put(dev->dev_ctx->psv_pool, mlx5_task->psv); 1095 spdk_accel_task_complete(&mlx5_task->base, 0); 1096 } 1097 1098 static inline int 1099 accel_mlx5_crc_task_configure_umr(struct accel_mlx5_task *mlx5_task, struct ibv_sge *sge, 1100 uint32_t sge_count, struct spdk_mlx5_mkey_pool_obj *mkey, 1101 enum spdk_mlx5_umr_sig_domain sig_domain, uint32_t umr_len, 1102 bool sig_init, bool sig_check_gen) 1103 { 1104 struct spdk_mlx5_umr_sig_attr sattr = { 1105 .seed = mlx5_task->base.seed ^ UINT32_MAX, 1106 .psv_index = mlx5_task->psv->psv_index, 1107 .domain = sig_domain, 1108 .sigerr_count = mkey->sig.sigerr_count, 1109 .raw_data_size = umr_len, 1110 .init = sig_init, 1111 .check_gen = sig_check_gen, 1112 }; 1113 struct spdk_mlx5_umr_attr umr_attr = { 1114 .mkey = mkey->mkey, 1115 .umr_len = umr_len, 1116 .sge_count = sge_count, 1117 .sge = sge, 1118 }; 1119 1120 return spdk_mlx5_umr_configure_sig(mlx5_task->qp->qp, &umr_attr, &sattr, 0, 0); 1121 } 1122 1123 static inline int 1124 accel_mlx5_crc_task_fill_sge(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge) 1125 { 1126 struct spdk_accel_task *task = &mlx5_task->base; 1127 struct accel_mlx5_qp *qp = mlx5_task->qp; 1128 struct accel_mlx5_dev *dev = qp->dev; 1129 uint32_t remaining; 1130 int rc; 1131 1132 rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src, task->nbytes, &remaining, 1133 task->src_domain, task->src_domain_ctx); 1134 if (spdk_unlikely(rc <= 0)) { 1135 if (rc == 0) { 1136 rc = -EINVAL; 1137 } 1138 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 1139 return rc; 1140 } 1141 assert(remaining == 0); 1142 sge->src_sge_count = rc; 1143 1144 if (!mlx5_task->inplace) { 1145 rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, task->nbytes, &remaining, 1146 task->dst_domain, task->dst_domain_ctx); 1147 if (spdk_unlikely(rc <= 0)) { 1148 if (rc == 0) { 1149 rc = -EINVAL; 1150 } 1151 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 1152 return rc; 1153 } 1154 assert(remaining == 0); 1155 sge->dst_sge_count = rc; 1156 } 1157 1158 return 0; 1159 } 1160 1161 static inline int 1162 accel_mlx5_crc_task_process_one_req(struct accel_mlx5_task *mlx5_task) 1163 { 1164 struct accel_mlx5_sge sges; 1165 struct accel_mlx5_qp *qp = mlx5_task->qp; 1166 struct accel_mlx5_dev *dev = qp->dev; 1167 uint32_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs, 1168 mlx5_task->num_ops); 1169 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1170 uint32_t rdma_fence = SPDK_MLX5_WQE_CTRL_STRONG_ORDERING; 1171 struct ibv_sge *sge; 1172 int rc; 1173 uint16_t sge_count; 1174 1175 num_ops = spdk_min(num_ops, qp_slot >> 1); 1176 if (spdk_unlikely(!num_ops)) { 1177 return -EINVAL; 1178 } 1179 1180 mlx5_task->num_wrs = 0; 1181 /* At this moment we have as many requests as can be submitted to a qp */ 1182 rc = accel_mlx5_crc_task_fill_sge(mlx5_task, &sges); 1183 if (spdk_unlikely(rc)) { 1184 return rc; 1185 } 1186 rc = accel_mlx5_crc_task_configure_umr(mlx5_task, sges.src_sge, sges.src_sge_count, 1187 mlx5_task->mkeys[0], SPDK_MLX5_UMR_SIG_DOMAIN_WIRE, mlx5_task->base.nbytes, true, true); 1188 if (spdk_unlikely(rc)) { 1189 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 1190 return rc; 1191 } 1192 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1193 dev->stats.sig_umrs++; 1194 1195 if (mlx5_task->inplace) { 1196 sge = sges.src_sge; 1197 sge_count = sges.src_sge_count; 1198 } else { 1199 sge = sges.dst_sge; 1200 sge_count = sges.dst_sge_count; 1201 } 1202 1203 /* 1204 * Add the crc destination to the end of sges. A free entry must be available for CRC 1205 * because the task init function reserved it. 1206 */ 1207 assert(sge_count < ACCEL_MLX5_MAX_SGE); 1208 sge[sge_count].lkey = mlx5_task->psv->crc_lkey; 1209 sge[sge_count].addr = (uintptr_t)&mlx5_task->psv->crc; 1210 sge[sge_count++].length = sizeof(uint32_t); 1211 1212 if (spdk_unlikely(mlx5_task->psv->bits.error)) { 1213 rc = spdk_mlx5_qp_set_psv(qp->qp, mlx5_task->psv->psv_index, *mlx5_task->base.crc_dst, 0, 0); 1214 if (spdk_unlikely(rc)) { 1215 SPDK_ERRLOG("SET_PSV failed with %d\n", rc); 1216 return rc; 1217 } 1218 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1219 } 1220 1221 rc = spdk_mlx5_qp_rdma_read(qp->qp, sge, sge_count, 0, mlx5_task->mkeys[0]->mkey, 1222 (uint64_t)mlx5_task, rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 1223 if (spdk_unlikely(rc)) { 1224 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 1225 return rc; 1226 } 1227 mlx5_task->num_submitted_reqs++; 1228 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 1229 dev->stats.rdma_reads++; 1230 1231 return 0; 1232 } 1233 1234 static inline int 1235 accel_mlx5_crc_task_fill_umr_sge(struct accel_mlx5_qp *qp, struct ibv_sge *sge, 1236 struct accel_mlx5_iov_sgl *umr_iovs, struct spdk_memory_domain *domain, 1237 void *domain_ctx, struct accel_mlx5_iov_sgl *rdma_iovs, size_t *len) 1238 { 1239 int umr_idx = 0; 1240 int rdma_idx = 0; 1241 int umr_iovcnt = spdk_min(umr_iovs->iovcnt, (int)ACCEL_MLX5_MAX_SGE); 1242 int rdma_iovcnt = spdk_min(rdma_iovs->iovcnt, (int)ACCEL_MLX5_MAX_SGE); 1243 size_t umr_iov_offset; 1244 size_t rdma_iov_offset; 1245 size_t umr_len = 0; 1246 void *sge_addr; 1247 size_t sge_len; 1248 size_t umr_sge_len; 1249 size_t rdma_sge_len; 1250 int rc; 1251 1252 umr_iov_offset = umr_iovs->iov_offset; 1253 rdma_iov_offset = rdma_iovs->iov_offset; 1254 1255 while (umr_idx < umr_iovcnt && rdma_idx < rdma_iovcnt) { 1256 umr_sge_len = umr_iovs->iov[umr_idx].iov_len - umr_iov_offset; 1257 rdma_sge_len = rdma_iovs->iov[rdma_idx].iov_len - rdma_iov_offset; 1258 sge_addr = umr_iovs->iov[umr_idx].iov_base + umr_iov_offset; 1259 1260 if (umr_sge_len == rdma_sge_len) { 1261 rdma_idx++; 1262 umr_iov_offset = 0; 1263 rdma_iov_offset = 0; 1264 sge_len = umr_sge_len; 1265 } else if (umr_sge_len < rdma_sge_len) { 1266 umr_iov_offset = 0; 1267 rdma_iov_offset += umr_sge_len; 1268 sge_len = umr_sge_len; 1269 } else { 1270 size_t remaining; 1271 1272 remaining = umr_sge_len - rdma_sge_len; 1273 while (remaining) { 1274 rdma_idx++; 1275 if (rdma_idx == (int)ACCEL_MLX5_MAX_SGE) { 1276 break; 1277 } 1278 rdma_sge_len = rdma_iovs->iov[rdma_idx].iov_len; 1279 if (remaining == rdma_sge_len) { 1280 rdma_idx++; 1281 rdma_iov_offset = 0; 1282 umr_iov_offset = 0; 1283 remaining = 0; 1284 break; 1285 } 1286 if (remaining < rdma_sge_len) { 1287 rdma_iov_offset = remaining; 1288 umr_iov_offset = 0; 1289 remaining = 0; 1290 break; 1291 } 1292 remaining -= rdma_sge_len; 1293 } 1294 sge_len = umr_sge_len - remaining; 1295 } 1296 rc = accel_mlx5_translate_addr(sge_addr, sge_len, domain, domain_ctx, qp->dev, &sge[umr_idx]); 1297 if (spdk_unlikely(rc)) { 1298 return -EINVAL; 1299 } 1300 SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d] lkey %u, addr %p, len %u\n", umr_idx, sge[umr_idx].lkey, 1301 (void *)sge[umr_idx].addr, sge[umr_idx].length); 1302 umr_len += sge_len; 1303 umr_idx++; 1304 } 1305 accel_mlx5_iov_sgl_advance(umr_iovs, umr_len); 1306 accel_mlx5_iov_sgl_advance(rdma_iovs, umr_len); 1307 *len = umr_len; 1308 1309 return umr_idx; 1310 } 1311 1312 static inline int 1313 accel_mlx5_crc_task_process_multi_req(struct accel_mlx5_task *mlx5_task) 1314 { 1315 size_t umr_len[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 1316 struct ibv_sge sges[ACCEL_MLX5_MAX_SGE]; 1317 struct spdk_accel_task *task = &mlx5_task->base; 1318 struct accel_mlx5_qp *qp = mlx5_task->qp; 1319 struct accel_mlx5_dev *dev = qp->dev; 1320 struct accel_mlx5_iov_sgl umr_sgl; 1321 struct accel_mlx5_iov_sgl *umr_sgl_ptr; 1322 struct accel_mlx5_iov_sgl rdma_sgl; 1323 uint64_t umr_offset; 1324 uint32_t rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE; 1325 int sge_count; 1326 uint32_t remaining; 1327 int rc; 1328 uint16_t i; 1329 uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs, 1330 mlx5_task->num_ops); 1331 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1332 bool sig_init, sig_check_gen = false; 1333 1334 num_ops = spdk_min(num_ops, qp_slot >> 1); 1335 if (spdk_unlikely(!num_ops)) { 1336 return -EINVAL; 1337 } 1338 /* Init signature on the first UMR */ 1339 sig_init = !mlx5_task->num_submitted_reqs; 1340 1341 /* 1342 * accel_mlx5_crc_task_fill_umr_sge() and accel_mlx5_fill_block_sge() advance an IOV during iteration 1343 * on it. We must copy accel_mlx5_iov_sgl to iterate twice or more on the same IOV. 1344 * 1345 * In the in-place case, we iterate on the source IOV three times. That's why we need two copies of 1346 * the source accel_mlx5_iov_sgl. 1347 * 1348 * In the out-of-place case, we iterate on the source IOV once and on the destination IOV two times. 1349 * So, we need one copy of the destination accel_mlx5_iov_sgl. 1350 */ 1351 if (mlx5_task->inplace) { 1352 accel_mlx5_iov_sgl_init(&umr_sgl, mlx5_task->src.iov, mlx5_task->src.iovcnt); 1353 umr_sgl_ptr = &umr_sgl; 1354 accel_mlx5_iov_sgl_init(&rdma_sgl, mlx5_task->src.iov, mlx5_task->src.iovcnt); 1355 } else { 1356 umr_sgl_ptr = &mlx5_task->src; 1357 accel_mlx5_iov_sgl_init(&rdma_sgl, mlx5_task->dst.iov, mlx5_task->dst.iovcnt); 1358 } 1359 mlx5_task->num_wrs = 0; 1360 for (i = 0; i < num_ops; i++) { 1361 /* 1362 * The last request may have only CRC. Skip UMR in this case because the MKey from 1363 * the previous request is used. 1364 */ 1365 if (umr_sgl_ptr->iovcnt == 0) { 1366 assert((mlx5_task->num_completed_reqs + i + 1) == mlx5_task->num_reqs); 1367 break; 1368 } 1369 sge_count = accel_mlx5_crc_task_fill_umr_sge(qp, sges, umr_sgl_ptr, task->src_domain, 1370 task->src_domain_ctx, &rdma_sgl, &umr_len[i]); 1371 if (spdk_unlikely(sge_count <= 0)) { 1372 rc = (sge_count == 0) ? -EINVAL : sge_count; 1373 SPDK_ERRLOG("failed set UMR sge, rc %d\n", rc); 1374 return rc; 1375 } 1376 if (umr_sgl_ptr->iovcnt == 0) { 1377 /* 1378 * We post RDMA without UMR if the last request has only CRC. We use an MKey from 1379 * the last UMR in this case. Since the last request can be postponed to the next 1380 * call of this function, we must save the MKey to the task structure. 1381 */ 1382 mlx5_task->last_umr_len = umr_len[i]; 1383 mlx5_task->last_mkey_idx = i; 1384 sig_check_gen = true; 1385 } 1386 rc = accel_mlx5_crc_task_configure_umr(mlx5_task, sges, sge_count, mlx5_task->mkeys[i], 1387 SPDK_MLX5_UMR_SIG_DOMAIN_WIRE, umr_len[i], sig_init, 1388 sig_check_gen); 1389 if (spdk_unlikely(rc)) { 1390 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 1391 return rc; 1392 } 1393 sig_init = false; 1394 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1395 dev->stats.sig_umrs++; 1396 } 1397 1398 if (spdk_unlikely(mlx5_task->psv->bits.error)) { 1399 rc = spdk_mlx5_qp_set_psv(qp->qp, mlx5_task->psv->psv_index, *mlx5_task->base.crc_dst, 0, 0); 1400 if (spdk_unlikely(rc)) { 1401 SPDK_ERRLOG("SET_PSV failed with %d\n", rc); 1402 return rc; 1403 } 1404 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1405 } 1406 1407 for (i = 0; i < num_ops - 1; i++) { 1408 if (mlx5_task->inplace) { 1409 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->src, umr_len[i], &remaining, 1410 task->src_domain, task->src_domain_ctx); 1411 } else { 1412 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->dst, umr_len[i], &remaining, 1413 task->dst_domain, task->dst_domain_ctx); 1414 } 1415 if (spdk_unlikely(sge_count <= 0)) { 1416 rc = (sge_count == 0) ? -EINVAL : sge_count; 1417 SPDK_ERRLOG("failed set RDMA sge, rc %d\n", rc); 1418 return rc; 1419 } 1420 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges, sge_count, 0, mlx5_task->mkeys[i]->mkey, 1421 0, rdma_fence); 1422 if (spdk_unlikely(rc)) { 1423 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 1424 return rc; 1425 } 1426 mlx5_task->num_submitted_reqs++; 1427 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1428 dev->stats.rdma_reads++; 1429 rdma_fence = SPDK_MLX5_WQE_CTRL_STRONG_ORDERING; 1430 } 1431 if ((mlx5_task->inplace && mlx5_task->src.iovcnt == 0) || (!mlx5_task->inplace && 1432 mlx5_task->dst.iovcnt == 0)) { 1433 /* 1434 * The last RDMA does not have any data, only CRC. It also does not have a paired Mkey. 1435 * The CRC is handled in the previous MKey in this case. 1436 */ 1437 sge_count = 0; 1438 umr_offset = mlx5_task->last_umr_len; 1439 } else { 1440 umr_offset = 0; 1441 mlx5_task->last_mkey_idx = i; 1442 if (mlx5_task->inplace) { 1443 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->src, umr_len[i], &remaining, 1444 task->src_domain, task->src_domain_ctx); 1445 } else { 1446 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->dst, umr_len[i], &remaining, 1447 task->dst_domain, task->dst_domain_ctx); 1448 } 1449 if (spdk_unlikely(sge_count <= 0)) { 1450 rc = (sge_count == 0) ? -EINVAL : sge_count; 1451 SPDK_ERRLOG("failed set RDMA sge, rc %d\n", rc); 1452 return rc; 1453 } 1454 assert(remaining == 0); 1455 } 1456 if ((mlx5_task->num_completed_reqs + i + 1) == mlx5_task->num_reqs) { 1457 /* Ensure that there is a free sge for the CRC destination. */ 1458 assert(sge_count < (int)ACCEL_MLX5_MAX_SGE); 1459 /* Add the crc destination to the end of sges. */ 1460 sges[sge_count].lkey = mlx5_task->psv->crc_lkey; 1461 sges[sge_count].addr = (uintptr_t)&mlx5_task->psv->crc; 1462 sges[sge_count++].length = sizeof(uint32_t); 1463 } 1464 rdma_fence |= SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE; 1465 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges, sge_count, umr_offset, 1466 mlx5_task->mkeys[mlx5_task->last_mkey_idx]->mkey, 1467 (uint64_t)mlx5_task, rdma_fence); 1468 if (spdk_unlikely(rc)) { 1469 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 1470 return rc; 1471 } 1472 mlx5_task->num_submitted_reqs++; 1473 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 1474 dev->stats.rdma_reads++; 1475 1476 return 0; 1477 } 1478 1479 static inline int 1480 accel_mlx5_crc_task_process(struct accel_mlx5_task *mlx5_task) 1481 { 1482 int rc; 1483 1484 assert(mlx5_task->mlx5_opcode == ACCEL_MLX5_OPC_CRC32C); 1485 1486 SPDK_DEBUGLOG(accel_mlx5, "begin, crc task, %p, reqs: total %u, submitted %u, completed %u\n", 1487 mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 1488 1489 if (mlx5_task->num_reqs == 1) { 1490 rc = accel_mlx5_crc_task_process_one_req(mlx5_task); 1491 } else { 1492 rc = accel_mlx5_crc_task_process_multi_req(mlx5_task); 1493 } 1494 1495 if (rc == 0) { 1496 STAILQ_INSERT_TAIL(&mlx5_task->qp->in_hw, mlx5_task, link); 1497 SPDK_DEBUGLOG(accel_mlx5, "end, crc task, %p, reqs: total %u, submitted %u, completed %u\n", 1498 mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, 1499 mlx5_task->num_completed_reqs); 1500 } 1501 1502 return rc; 1503 } 1504 1505 static inline int 1506 accel_mlx5_task_alloc_crc_ctx(struct accel_mlx5_task *task, uint32_t qp_slot) 1507 { 1508 struct accel_mlx5_qp *qp = task->qp; 1509 struct accel_mlx5_dev *dev = qp->dev; 1510 1511 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task, dev->sig_mkeys))) { 1512 SPDK_DEBUGLOG(accel_mlx5, "no mkeys in signature mkey pool, dev %s\n", 1513 dev->dev_ctx->context->device->name); 1514 dev->stats.nomem_mkey++; 1515 return -ENOMEM; 1516 } 1517 task->psv = spdk_mempool_get(dev->dev_ctx->psv_pool); 1518 if (spdk_unlikely(!task->psv)) { 1519 SPDK_DEBUGLOG(accel_mlx5, "no reqs in psv pool, dev %s\n", dev->dev_ctx->context->device->name); 1520 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops); 1521 task->num_ops = 0; 1522 dev->stats.nomem_mkey++; 1523 return -ENOMEM; 1524 } 1525 /* One extra slot is needed for SET_PSV WQE to reset the error state in PSV. */ 1526 if (spdk_unlikely(task->psv->bits.error)) { 1527 uint32_t n_slots = task->num_ops * 2 + 1; 1528 1529 if (qp_slot < n_slots) { 1530 spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv); 1531 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops); 1532 dev->stats.nomem_qdepth++; 1533 task->num_ops = 0; 1534 return -ENOMEM; 1535 } 1536 } 1537 1538 return 0; 1539 } 1540 1541 static inline int 1542 accel_mlx5_crc_task_continue(struct accel_mlx5_task *task) 1543 { 1544 struct accel_mlx5_qp *qp = task->qp; 1545 struct accel_mlx5_dev *dev = qp->dev; 1546 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1547 int rc; 1548 1549 assert(task->num_reqs > task->num_completed_reqs); 1550 if (task->num_ops == 0) { 1551 /* No mkeys allocated, try to allocate now. */ 1552 rc = accel_mlx5_task_alloc_crc_ctx(task, qp_slot); 1553 if (spdk_unlikely(rc)) { 1554 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1555 return -ENOMEM; 1556 } 1557 } 1558 /* We need to post at least 1 UMR and 1 RDMA operation */ 1559 if (spdk_unlikely(qp_slot < 2)) { 1560 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1561 dev->stats.nomem_qdepth++; 1562 return -ENOMEM; 1563 } 1564 1565 return accel_mlx5_crc_task_process(task); 1566 } 1567 1568 static inline uint32_t 1569 accel_mlx5_get_crc_task_count(struct iovec *src_iov, uint32_t src_iovcnt, struct iovec *dst_iov, 1570 uint32_t dst_iovcnt) 1571 { 1572 uint32_t src_idx = 0; 1573 uint32_t dst_idx = 0; 1574 uint32_t num_ops = 1; 1575 uint32_t num_src_sge = 1; 1576 uint32_t num_dst_sge = 1; 1577 size_t src_offset = 0; 1578 size_t dst_offset = 0; 1579 uint32_t num_sge; 1580 size_t src_len; 1581 size_t dst_len; 1582 1583 /* One operation is enough if both iovs fit into ACCEL_MLX5_MAX_SGE. One SGE is reserved for CRC on dst_iov. */ 1584 if (src_iovcnt <= ACCEL_MLX5_MAX_SGE && (dst_iovcnt + 1) <= ACCEL_MLX5_MAX_SGE) { 1585 return 1; 1586 } 1587 1588 while (src_idx < src_iovcnt && dst_idx < dst_iovcnt) { 1589 if (num_src_sge > ACCEL_MLX5_MAX_SGE || num_dst_sge > ACCEL_MLX5_MAX_SGE) { 1590 num_ops++; 1591 num_src_sge = 1; 1592 num_dst_sge = 1; 1593 } 1594 src_len = src_iov[src_idx].iov_len - src_offset; 1595 dst_len = dst_iov[dst_idx].iov_len - dst_offset; 1596 1597 if (src_len == dst_len) { 1598 num_src_sge++; 1599 num_dst_sge++; 1600 src_offset = 0; 1601 dst_offset = 0; 1602 src_idx++; 1603 dst_idx++; 1604 continue; 1605 } 1606 if (src_len < dst_len) { 1607 /* Advance src_iov to reach the point that corresponds to the end of the current dst_iov. */ 1608 num_sge = accel_mlx5_advance_iovec(&src_iov[src_idx], 1609 spdk_min(ACCEL_MLX5_MAX_SGE + 1 - num_src_sge, 1610 src_iovcnt - src_idx), 1611 &src_offset, &dst_len); 1612 src_idx += num_sge; 1613 num_src_sge += num_sge; 1614 if (dst_len != 0) { 1615 /* 1616 * ACCEL_MLX5_MAX_SGE is reached on src_iov, and dst_len bytes 1617 * are left on the current dst_iov. 1618 */ 1619 dst_offset = dst_iov[dst_idx].iov_len - dst_len; 1620 } else { 1621 /* The src_iov advance is completed, shift to the next dst_iov. */ 1622 dst_idx++; 1623 num_dst_sge++; 1624 dst_offset = 0; 1625 } 1626 } else { /* src_len > dst_len */ 1627 /* Advance dst_iov to reach the point that corresponds to the end of the current src_iov. */ 1628 num_sge = accel_mlx5_advance_iovec(&dst_iov[dst_idx], 1629 spdk_min(ACCEL_MLX5_MAX_SGE + 1 - num_dst_sge, 1630 dst_iovcnt - dst_idx), 1631 &dst_offset, &src_len); 1632 dst_idx += num_sge; 1633 num_dst_sge += num_sge; 1634 if (src_len != 0) { 1635 /* 1636 * ACCEL_MLX5_MAX_SGE is reached on dst_iov, and src_len bytes 1637 * are left on the current src_iov. 1638 */ 1639 src_offset = src_iov[src_idx].iov_len - src_len; 1640 } else { 1641 /* The dst_iov advance is completed, shift to the next src_iov. */ 1642 src_idx++; 1643 num_src_sge++; 1644 src_offset = 0; 1645 } 1646 } 1647 } 1648 /* An extra operation is needed if no space is left on dst_iov because CRC takes one SGE. */ 1649 if (num_dst_sge > ACCEL_MLX5_MAX_SGE) { 1650 num_ops++; 1651 } 1652 1653 /* The above loop must reach the end of both iovs simultaneously because their size is the same. */ 1654 assert(src_idx == src_iovcnt); 1655 assert(dst_idx == dst_iovcnt); 1656 assert(src_offset == 0); 1657 assert(dst_offset == 0); 1658 1659 return num_ops; 1660 } 1661 1662 static inline int 1663 accel_mlx5_crc_task_init(struct accel_mlx5_task *mlx5_task) 1664 { 1665 struct spdk_accel_task *task = &mlx5_task->base; 1666 struct accel_mlx5_qp *qp = mlx5_task->qp; 1667 uint32_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp); 1668 int rc; 1669 1670 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 1671 if (mlx5_task->inplace) { 1672 /* One entry is reserved for CRC */ 1673 mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->src.iovcnt + 1, ACCEL_MLX5_MAX_SGE); 1674 } else { 1675 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 1676 mlx5_task->num_reqs = accel_mlx5_get_crc_task_count(mlx5_task->src.iov, mlx5_task->src.iovcnt, 1677 mlx5_task->dst.iov, mlx5_task->dst.iovcnt); 1678 } 1679 1680 rc = accel_mlx5_task_alloc_crc_ctx(mlx5_task, qp_slot); 1681 if (spdk_unlikely(rc)) { 1682 return rc; 1683 } 1684 1685 if (spdk_unlikely(qp_slot < 2)) { 1686 /* Queue is full, queue this task */ 1687 SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", qp->dev->dev_ctx->context->device->name, 1688 mlx5_task->qp); 1689 qp->dev->stats.nomem_qdepth++; 1690 return -ENOMEM; 1691 } 1692 return 0; 1693 } 1694 1695 static inline int 1696 accel_mlx5_crypto_mkey_task_init(struct accel_mlx5_task *mlx5_task) 1697 { 1698 struct spdk_accel_task *task = &mlx5_task->base; 1699 struct accel_mlx5_qp *qp = mlx5_task->qp; 1700 struct accel_mlx5_dev *dev = qp->dev; 1701 uint32_t num_blocks; 1702 int rc; 1703 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1704 bool crypto_key_ok; 1705 1706 if (spdk_unlikely(task->s.iovcnt > ACCEL_MLX5_MAX_SGE)) { 1707 /* With `external mkey` we can't split task or register several UMRs */ 1708 SPDK_ERRLOG("src buffer is too fragmented\n"); 1709 return -EINVAL; 1710 } 1711 if (spdk_unlikely(task->src_domain == spdk_accel_get_memory_domain())) { 1712 SPDK_ERRLOG("accel domain is not supported\n"); 1713 return -ENOTSUP; 1714 } 1715 if (spdk_unlikely(spdk_accel_sequence_next_task(task) != NULL)) { 1716 SPDK_ERRLOG("Mkey registration is only supported for single task\n"); 1717 return -ENOTSUP; 1718 } 1719 1720 crypto_key_ok = (task->crypto_key && task->crypto_key->module_if == &g_accel_mlx5.module && 1721 task->crypto_key->priv); 1722 if (spdk_unlikely(!crypto_key_ok)) { 1723 SPDK_ERRLOG("Wrong crypto key provided\n"); 1724 return -EINVAL; 1725 } 1726 if (spdk_unlikely(task->nbytes % mlx5_task->base.block_size != 0)) { 1727 SPDK_ERRLOG("src length %"PRIu64" is not a multiple of the block size %u\n", task->nbytes, 1728 mlx5_task->base.block_size); 1729 return -EINVAL; 1730 } 1731 1732 num_blocks = task->nbytes / mlx5_task->base.block_size; 1733 if (dev->crypto_multi_block) { 1734 if (spdk_unlikely(g_accel_mlx5.attr.crypto_split_blocks && 1735 num_blocks > g_accel_mlx5.attr.crypto_split_blocks)) { 1736 SPDK_ERRLOG("Number of blocks in task %u exceeds split threshold %u, can't handle\n", 1737 num_blocks, g_accel_mlx5.attr.crypto_split_blocks); 1738 return -E2BIG; 1739 } 1740 } else if (num_blocks != 1) { 1741 SPDK_ERRLOG("Task contains more than 1 block, can't handle\n"); 1742 return -E2BIG; 1743 } 1744 1745 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 1746 mlx5_task->num_blocks = num_blocks; 1747 mlx5_task->num_processed_blocks = 0; 1748 mlx5_task->num_reqs = 1; 1749 mlx5_task->blocks_per_req = num_blocks; 1750 1751 if (spdk_unlikely(qp_slot == 0)) { 1752 mlx5_task->num_ops = 0; 1753 dev->stats.nomem_qdepth++; 1754 return -ENOMEM; 1755 } 1756 rc = spdk_mlx5_mkey_pool_get_bulk(dev->crypto_mkeys, mlx5_task->mkeys, 1); 1757 if (spdk_unlikely(rc)) { 1758 mlx5_task->num_ops = 0; 1759 dev->stats.nomem_mkey++; 1760 return -ENOMEM; 1761 } 1762 mlx5_task->num_ops = 1; 1763 1764 SPDK_DEBUGLOG(accel_mlx5, "crypto_mkey task num_blocks %u, src_len %zu\n", mlx5_task->num_reqs, 1765 task->nbytes); 1766 1767 return 0; 1768 } 1769 1770 static inline int 1771 accel_mlx5_crypto_mkey_task_process(struct accel_mlx5_task *mlx5_task) 1772 { 1773 struct accel_mlx5_sge sge; 1774 struct spdk_accel_task *task = &mlx5_task->base; 1775 struct accel_mlx5_qp *qp = mlx5_task->qp; 1776 struct accel_mlx5_dev *dev = qp->dev; 1777 struct spdk_mlx5_crypto_dek_data dek_data; 1778 int rc; 1779 1780 if (spdk_unlikely(!mlx5_task->num_ops)) { 1781 return -EINVAL; 1782 } 1783 SPDK_DEBUGLOG(accel_mlx5, "begin, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx); 1784 1785 mlx5_task->num_wrs = 0; 1786 rc = spdk_mlx5_crypto_get_dek_data(task->crypto_key->priv, dev->dev_ctx->pd, &dek_data); 1787 if (spdk_unlikely(rc)) { 1788 return rc; 1789 } 1790 1791 rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sge, mlx5_task->mkeys[0]->mkey, 1792 mlx5_task->num_blocks, &dek_data, (uint64_t)mlx5_task, SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 1793 if (spdk_unlikely(rc)) { 1794 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 1795 return rc; 1796 } 1797 dev->stats.crypto_umrs++; 1798 mlx5_task->num_submitted_reqs++; 1799 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 1800 STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link); 1801 1802 SPDK_DEBUGLOG(accel_mlx5, "end, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx); 1803 1804 return 0; 1805 } 1806 1807 static inline int 1808 accel_mlx5_crypto_mkey_task_continue(struct accel_mlx5_task *task) 1809 { 1810 struct accel_mlx5_qp *qp = task->qp; 1811 struct accel_mlx5_dev *dev = qp->dev; 1812 int rc; 1813 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1814 1815 if (task->num_ops == 0) { 1816 rc = spdk_mlx5_mkey_pool_get_bulk(dev->crypto_mkeys, task->mkeys, 1); 1817 if (spdk_unlikely(rc)) { 1818 dev->stats.nomem_mkey++; 1819 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1820 return -ENOMEM; 1821 } 1822 task->num_ops = 1; 1823 } 1824 if (spdk_unlikely(qp_slot == 0)) { 1825 dev->stats.nomem_qdepth++; 1826 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1827 return -ENOMEM; 1828 } 1829 return accel_mlx5_crypto_mkey_task_process(task); 1830 } 1831 1832 static inline void 1833 accel_mlx5_crypto_mkey_task_complete(struct accel_mlx5_task *mlx5_task) 1834 { 1835 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 1836 1837 assert(mlx5_task->num_ops); 1838 assert(mlx5_task->num_processed_blocks == mlx5_task->num_blocks); 1839 assert(mlx5_task->base.seq); 1840 1841 spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, mlx5_task->mkeys, 1); 1842 spdk_accel_task_complete(&mlx5_task->base, 0); 1843 } 1844 1845 static int 1846 accel_mlx5_task_op_not_implemented(struct accel_mlx5_task *mlx5_task) 1847 { 1848 SPDK_ERRLOG("wrong function called\n"); 1849 SPDK_UNREACHABLE(); 1850 } 1851 1852 static void 1853 accel_mlx5_task_op_not_implemented_v(struct accel_mlx5_task *mlx5_task) 1854 { 1855 SPDK_ERRLOG("wrong function called\n"); 1856 SPDK_UNREACHABLE(); 1857 } 1858 1859 static int 1860 accel_mlx5_task_op_not_supported(struct accel_mlx5_task *mlx5_task) 1861 { 1862 SPDK_ERRLOG("Unsupported opcode %d\n", mlx5_task->base.op_code); 1863 1864 return -ENOTSUP; 1865 } 1866 1867 static struct accel_mlx5_task_operations g_accel_mlx5_tasks_ops[] = { 1868 [ACCEL_MLX5_OPC_COPY] = { 1869 .init = accel_mlx5_copy_task_init, 1870 .process = accel_mlx5_copy_task_process, 1871 .cont = accel_mlx5_copy_task_continue, 1872 .complete = accel_mlx5_copy_task_complete, 1873 }, 1874 [ACCEL_MLX5_OPC_CRYPTO] = { 1875 .init = accel_mlx5_crypto_task_init, 1876 .process = accel_mlx5_crypto_task_process, 1877 .cont = accel_mlx5_crypto_task_continue, 1878 .complete = accel_mlx5_crypto_task_complete, 1879 }, 1880 [ACCEL_MLX5_OPC_CRC32C] = { 1881 .init = accel_mlx5_crc_task_init, 1882 .process = accel_mlx5_crc_task_process, 1883 .cont = accel_mlx5_crc_task_continue, 1884 .complete = accel_mlx5_crc_task_complete, 1885 }, 1886 [ACCEL_MLX5_OPC_CRYPTO_MKEY] = { 1887 .init = accel_mlx5_crypto_mkey_task_init, 1888 .process = accel_mlx5_crypto_mkey_task_process, 1889 .cont = accel_mlx5_crypto_mkey_task_continue, 1890 .complete = accel_mlx5_crypto_mkey_task_complete, 1891 }, 1892 [ACCEL_MLX5_OPC_LAST] = { 1893 .init = accel_mlx5_task_op_not_supported, 1894 .process = accel_mlx5_task_op_not_implemented, 1895 .cont = accel_mlx5_task_op_not_implemented, 1896 .complete = accel_mlx5_task_op_not_implemented_v 1897 }, 1898 }; 1899 1900 static void 1901 accel_mlx5_memory_domain_transfer_cpl(void *ctx, int rc) 1902 { 1903 struct accel_mlx5_task *task = ctx; 1904 1905 assert(task->needs_data_transfer); 1906 task->needs_data_transfer = 0; 1907 1908 if (spdk_likely(!rc)) { 1909 SPDK_DEBUGLOG(accel_mlx5, "task %p, data transfer done\n", task); 1910 accel_mlx5_task_complete(task); 1911 } else { 1912 SPDK_ERRLOG("Task %p, data transfer failed, rc %d\n", task, rc); 1913 accel_mlx5_task_fail(task, rc); 1914 } 1915 } 1916 1917 static inline void 1918 accel_mlx5_memory_domain_transfer(struct accel_mlx5_task *task) 1919 { 1920 struct spdk_memory_domain_translation_result translation; 1921 struct spdk_accel_task *base = &task->base; 1922 struct accel_mlx5_dev *dev = task->qp->dev; 1923 int rc; 1924 1925 assert(task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO_MKEY); 1926 /* UMR is an offset in the addess space, so the start address is 0 */ 1927 translation.iov.iov_base = NULL; 1928 translation.iov.iov_len = base->nbytes; 1929 translation.iov_count = 1; 1930 translation.size = sizeof(translation); 1931 translation.rdma.rkey = task->mkeys[0]->mkey; 1932 translation.rdma.lkey = task->mkeys[0]->mkey; 1933 1934 SPDK_DEBUGLOG(accel_mlx5, "start transfer, task %p, dst_domain_ctx %p, mkey %u\n", task, 1935 task->base.dst_domain_ctx, task->mkeys[0]->mkey); 1936 rc = spdk_memory_domain_transfer_data(base->dst_domain, base->dst_domain_ctx, &translation.iov, 1, 1937 dev->dev_ctx->domain, task, &translation.iov, 1, &translation, 1938 accel_mlx5_memory_domain_transfer_cpl, task); 1939 if (spdk_unlikely(rc)) { 1940 SPDK_ERRLOG("Failed to start data transfer, task %p rc %d\n", task, rc); 1941 accel_mlx5_task_fail(task, rc); 1942 } 1943 } 1944 1945 static inline void 1946 accel_mlx5_task_complete(struct accel_mlx5_task *task) 1947 { 1948 struct spdk_accel_sequence *seq = task->base.seq; 1949 struct spdk_accel_task *next; 1950 bool driver_seq; 1951 1952 if (task->needs_data_transfer) { 1953 accel_mlx5_memory_domain_transfer(task); 1954 return; 1955 } 1956 1957 next = spdk_accel_sequence_next_task(&task->base); 1958 driver_seq = task->driver_seq; 1959 1960 assert(task->num_reqs == task->num_completed_reqs); 1961 SPDK_DEBUGLOG(accel_mlx5, "Complete task %p, opc %d\n", task, task->mlx5_opcode); 1962 1963 g_accel_mlx5_tasks_ops[task->mlx5_opcode].complete(task); 1964 1965 if (driver_seq) { 1966 struct spdk_io_channel *ch = task->qp->dev->ch; 1967 1968 assert(seq); 1969 if (next) { 1970 accel_mlx5_execute_sequence(ch, seq); 1971 } else { 1972 spdk_accel_sequence_continue(seq); 1973 } 1974 } 1975 } 1976 1977 static inline int 1978 accel_mlx5_task_continue(struct accel_mlx5_task *task) 1979 { 1980 struct accel_mlx5_qp *qp = task->qp; 1981 struct accel_mlx5_dev *dev = qp->dev; 1982 1983 if (spdk_unlikely(qp->recovering)) { 1984 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1985 return 0; 1986 } 1987 1988 return g_accel_mlx5_tasks_ops[task->mlx5_opcode].cont(task); 1989 } 1990 static inline void 1991 accel_mlx5_task_init_opcode(struct accel_mlx5_task *mlx5_task) 1992 { 1993 uint8_t base_opcode = mlx5_task->base.op_code; 1994 1995 switch (base_opcode) { 1996 case SPDK_ACCEL_OPC_COPY: 1997 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_COPY; 1998 break; 1999 case SPDK_ACCEL_OPC_ENCRYPT: 2000 assert(g_accel_mlx5.crypto_supported); 2001 mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE; 2002 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO; 2003 break; 2004 case SPDK_ACCEL_OPC_DECRYPT: 2005 assert(g_accel_mlx5.crypto_supported); 2006 mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_MEMORY; 2007 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO; 2008 break; 2009 case SPDK_ACCEL_OPC_CRC32C: 2010 mlx5_task->inplace = 1; 2011 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRC32C; 2012 break; 2013 case SPDK_ACCEL_OPC_COPY_CRC32C: 2014 mlx5_task->inplace = 0; 2015 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRC32C; 2016 break; 2017 default: 2018 SPDK_ERRLOG("wrong opcode %d\n", base_opcode); 2019 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_LAST; 2020 } 2021 } 2022 2023 static void 2024 accel_mlx5_post_poller_handler(void *fn_arg) 2025 { 2026 struct accel_mlx5_io_channel *ch = fn_arg; 2027 struct accel_mlx5_dev *dev; 2028 uint32_t i; 2029 2030 for (i = 0; i < ch->num_devs; i++) { 2031 dev = &ch->devs[i]; 2032 2033 if (dev->qp.ring_db) { 2034 spdk_mlx5_qp_complete_send(dev->qp.qp); 2035 dev->qp.ring_db = false; 2036 } 2037 } 2038 2039 ch->poller_handler_registered = false; 2040 } 2041 2042 static inline int 2043 _accel_mlx5_submit_tasks(struct accel_mlx5_io_channel *accel_ch, struct spdk_accel_task *task) 2044 { 2045 struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base); 2046 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 2047 int rc; 2048 2049 /* We should not receive any tasks if the module was not enabled */ 2050 assert(g_accel_mlx5.enabled); 2051 2052 dev->stats.opcodes[mlx5_task->mlx5_opcode]++; 2053 rc = g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].init(mlx5_task); 2054 if (spdk_unlikely(rc)) { 2055 if (rc == -ENOMEM) { 2056 SPDK_DEBUGLOG(accel_mlx5, "no reqs to handle new task %p (required %u), put to queue\n", mlx5_task, 2057 mlx5_task->num_reqs); 2058 STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link); 2059 return 0; 2060 } 2061 SPDK_ERRLOG("Task opc %d init failed, rc %d\n", task->op_code, rc); 2062 return rc; 2063 } 2064 2065 if (spdk_unlikely(mlx5_task->qp->recovering)) { 2066 STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link); 2067 return 0; 2068 } 2069 2070 if (!accel_ch->poller_handler_registered) { 2071 spdk_thread_register_post_poller_handler(accel_mlx5_post_poller_handler, accel_ch); 2072 /* Function above may fail to register our handler, in that case we ring doorbells on next polling 2073 * cycle. That is less efficient but still works */ 2074 accel_ch->poller_handler_registered = true; 2075 } 2076 2077 return g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].process(mlx5_task); 2078 } 2079 2080 static inline void 2081 accel_mlx5_task_assign_qp(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_io_channel *accel_ch) 2082 { 2083 struct accel_mlx5_dev *dev; 2084 2085 dev = &accel_ch->devs[accel_ch->dev_idx]; 2086 accel_ch->dev_idx++; 2087 if (accel_ch->dev_idx == accel_ch->num_devs) { 2088 accel_ch->dev_idx = 0; 2089 } 2090 2091 mlx5_task->qp = &dev->qp; 2092 } 2093 2094 static inline void 2095 accel_mlx5_task_reset(struct accel_mlx5_task *mlx5_task) 2096 { 2097 mlx5_task->num_completed_reqs = 0; 2098 mlx5_task->num_submitted_reqs = 0; 2099 mlx5_task->num_ops = 0; 2100 mlx5_task->num_processed_blocks = 0; 2101 mlx5_task->raw = 0; 2102 } 2103 2104 static int 2105 accel_mlx5_submit_tasks(struct spdk_io_channel *ch, struct spdk_accel_task *task) 2106 { 2107 struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base); 2108 struct accel_mlx5_io_channel *accel_ch = spdk_io_channel_get_ctx(ch); 2109 2110 accel_mlx5_task_assign_qp(mlx5_task, accel_ch); 2111 accel_mlx5_task_reset(mlx5_task); 2112 accel_mlx5_task_init_opcode(mlx5_task); 2113 2114 return _accel_mlx5_submit_tasks(accel_ch, task); 2115 } 2116 2117 static void accel_mlx5_recover_qp(struct accel_mlx5_qp *qp); 2118 2119 static int 2120 accel_mlx5_recover_qp_poller(void *arg) 2121 { 2122 struct accel_mlx5_qp *qp = arg; 2123 2124 spdk_poller_unregister(&qp->recover_poller); 2125 accel_mlx5_recover_qp(qp); 2126 return SPDK_POLLER_BUSY; 2127 } 2128 2129 static void 2130 accel_mlx5_recover_qp(struct accel_mlx5_qp *qp) 2131 { 2132 struct accel_mlx5_dev *dev = qp->dev; 2133 struct spdk_mlx5_qp_attr mlx5_qp_attr = {}; 2134 int rc; 2135 2136 SPDK_NOTICELOG("Recovering qp %p, core %u\n", qp, spdk_env_get_current_core()); 2137 if (qp->qp) { 2138 spdk_mlx5_qp_destroy(qp->qp); 2139 qp->qp = NULL; 2140 } 2141 2142 mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size; 2143 mlx5_qp_attr.cap.max_recv_wr = 0; 2144 mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE; 2145 mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE; 2146 2147 rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp); 2148 if (rc) { 2149 SPDK_ERRLOG("Failed to create mlx5 dma QP, rc %d. Retry in %d usec\n", 2150 rc, ACCEL_MLX5_RECOVER_POLLER_PERIOD_US); 2151 qp->recover_poller = SPDK_POLLER_REGISTER(accel_mlx5_recover_qp_poller, qp, 2152 ACCEL_MLX5_RECOVER_POLLER_PERIOD_US); 2153 return; 2154 } 2155 2156 qp->recovering = false; 2157 } 2158 2159 static inline void 2160 accel_mlx5_process_error_cpl(struct spdk_mlx5_cq_completion *wc, struct accel_mlx5_task *task) 2161 { 2162 struct accel_mlx5_qp *qp = task->qp; 2163 2164 if (wc->status != IBV_WC_WR_FLUSH_ERR) { 2165 SPDK_WARNLOG("RDMA: qp %p, task %p, WC status %d, core %u\n", 2166 qp, task, wc->status, spdk_env_get_current_core()); 2167 } else { 2168 SPDK_DEBUGLOG(accel_mlx5, 2169 "RDMA: qp %p, task %p, WC status %d, core %u\n", 2170 qp, task, wc->status, spdk_env_get_current_core()); 2171 } 2172 2173 qp->recovering = true; 2174 assert(task->num_completed_reqs <= task->num_submitted_reqs); 2175 if (task->num_completed_reqs == task->num_submitted_reqs) { 2176 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 2177 accel_mlx5_task_fail(task, -EIO); 2178 } 2179 } 2180 2181 static inline int64_t 2182 accel_mlx5_poll_cq(struct accel_mlx5_dev *dev) 2183 { 2184 struct spdk_mlx5_cq_completion wc[ACCEL_MLX5_MAX_WC]; 2185 struct accel_mlx5_task *task; 2186 struct accel_mlx5_qp *qp; 2187 int reaped, i, rc; 2188 uint16_t completed; 2189 2190 dev->stats.polls++; 2191 reaped = spdk_mlx5_cq_poll_completions(dev->cq, wc, ACCEL_MLX5_MAX_WC); 2192 if (spdk_unlikely(reaped < 0)) { 2193 SPDK_ERRLOG("Error polling CQ! (%d): %s\n", errno, spdk_strerror(errno)); 2194 return reaped; 2195 } else if (reaped == 0) { 2196 dev->stats.idle_polls++; 2197 return 0; 2198 } 2199 dev->stats.completions += reaped; 2200 2201 SPDK_DEBUGLOG(accel_mlx5, "Reaped %d cpls on dev %s\n", reaped, 2202 dev->dev_ctx->context->device->name); 2203 2204 for (i = 0; i < reaped; i++) { 2205 if (spdk_unlikely(!wc[i].wr_id)) { 2206 /* Unsignaled completion with error, ignore */ 2207 continue; 2208 } 2209 task = (struct accel_mlx5_task *)wc[i].wr_id; 2210 qp = task->qp; 2211 assert(task == STAILQ_FIRST(&qp->in_hw) && "submission mismatch"); 2212 assert(task->num_submitted_reqs > task->num_completed_reqs); 2213 completed = task->num_submitted_reqs - task->num_completed_reqs; 2214 assert((uint32_t)task->num_completed_reqs + completed <= UINT16_MAX); 2215 task->num_completed_reqs += completed; 2216 assert(qp->wrs_submitted >= task->num_wrs); 2217 qp->wrs_submitted -= task->num_wrs; 2218 assert(dev->wrs_in_cq > 0); 2219 dev->wrs_in_cq--; 2220 2221 if (spdk_unlikely(wc[i].status)) { 2222 accel_mlx5_process_error_cpl(&wc[i], task); 2223 if (qp->wrs_submitted == 0) { 2224 assert(STAILQ_EMPTY(&qp->in_hw)); 2225 accel_mlx5_recover_qp(qp); 2226 } 2227 continue; 2228 } 2229 2230 SPDK_DEBUGLOG(accel_mlx5, "task %p, remaining %u\n", task, 2231 task->num_reqs - task->num_completed_reqs); 2232 if (task->num_completed_reqs == task->num_reqs) { 2233 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 2234 accel_mlx5_task_complete(task); 2235 } else { 2236 assert(task->num_submitted_reqs < task->num_reqs); 2237 assert(task->num_completed_reqs == task->num_submitted_reqs); 2238 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 2239 rc = accel_mlx5_task_continue(task); 2240 if (spdk_unlikely(rc)) { 2241 if (rc != -ENOMEM) { 2242 accel_mlx5_task_fail(task, rc); 2243 } 2244 } 2245 } 2246 } 2247 2248 return reaped; 2249 } 2250 2251 static inline void 2252 accel_mlx5_resubmit_nomem_tasks(struct accel_mlx5_dev *dev) 2253 { 2254 struct accel_mlx5_task *task, *tmp, *last; 2255 int rc; 2256 2257 last = STAILQ_LAST(&dev->nomem, accel_mlx5_task, link); 2258 STAILQ_FOREACH_SAFE(task, &dev->nomem, link, tmp) { 2259 STAILQ_REMOVE_HEAD(&dev->nomem, link); 2260 rc = accel_mlx5_task_continue(task); 2261 if (spdk_unlikely(rc)) { 2262 if (rc != -ENOMEM) { 2263 accel_mlx5_task_fail(task, rc); 2264 } 2265 break; 2266 } 2267 /* If qpair is recovering, task is added back to the nomem list and 0 is returned. In that case we 2268 * need a special condition to iterate the list once and stop this FOREACH loop */ 2269 if (task == last) { 2270 break; 2271 } 2272 } 2273 } 2274 2275 static int 2276 accel_mlx5_poller(void *ctx) 2277 { 2278 struct accel_mlx5_io_channel *ch = ctx; 2279 struct accel_mlx5_dev *dev; 2280 2281 int64_t completions = 0, rc; 2282 uint32_t i; 2283 2284 /* reaped completions may register a post poller handler, that makes no sense in the scope of our own poller */ 2285 ch->poller_handler_registered = true; 2286 for (i = 0; i < ch->num_devs; i++) { 2287 dev = &ch->devs[i]; 2288 if (dev->wrs_in_cq) { 2289 rc = accel_mlx5_poll_cq(dev); 2290 if (spdk_unlikely(rc < 0)) { 2291 SPDK_ERRLOG("Error %"PRId64" on CQ, dev %s\n", rc, dev->dev_ctx->context->device->name); 2292 } 2293 completions += rc; 2294 if (dev->qp.ring_db) { 2295 spdk_mlx5_qp_complete_send(dev->qp.qp); 2296 dev->qp.ring_db = false; 2297 } 2298 } 2299 if (!STAILQ_EMPTY(&dev->nomem)) { 2300 accel_mlx5_resubmit_nomem_tasks(dev); 2301 } 2302 } 2303 ch->poller_handler_registered = false; 2304 2305 return !!completions; 2306 } 2307 2308 static bool 2309 accel_mlx5_supports_opcode(enum spdk_accel_opcode opc) 2310 { 2311 assert(g_accel_mlx5.enabled); 2312 2313 switch (opc) { 2314 case SPDK_ACCEL_OPC_COPY: 2315 return true; 2316 case SPDK_ACCEL_OPC_ENCRYPT: 2317 case SPDK_ACCEL_OPC_DECRYPT: 2318 return g_accel_mlx5.crypto_supported; 2319 case SPDK_ACCEL_OPC_CRC32C: 2320 case SPDK_ACCEL_OPC_COPY_CRC32C: 2321 return g_accel_mlx5.crc32c_supported; 2322 default: 2323 return false; 2324 } 2325 } 2326 2327 static struct spdk_io_channel * 2328 accel_mlx5_get_io_channel(void) 2329 { 2330 assert(g_accel_mlx5.enabled); 2331 return spdk_get_io_channel(&g_accel_mlx5); 2332 } 2333 2334 static int 2335 accel_mlx5_create_qp(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp) 2336 { 2337 struct spdk_mlx5_qp_attr mlx5_qp_attr = {}; 2338 int rc; 2339 2340 mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size; 2341 mlx5_qp_attr.cap.max_recv_wr = 0; 2342 mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE; 2343 mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE; 2344 2345 rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp); 2346 if (rc) { 2347 return rc; 2348 } 2349 2350 STAILQ_INIT(&qp->in_hw); 2351 qp->dev = dev; 2352 qp->verbs_qp = spdk_mlx5_qp_get_verbs_qp(qp->qp); 2353 assert(qp->verbs_qp); 2354 qp->wrs_max = g_accel_mlx5.attr.qp_size; 2355 2356 return 0; 2357 } 2358 2359 static void 2360 accel_mlx5_add_stats(struct accel_mlx5_stats *stats, const struct accel_mlx5_stats *to_add) 2361 { 2362 int i; 2363 2364 stats->crypto_umrs += to_add->crypto_umrs; 2365 stats->sig_umrs += to_add->sig_umrs; 2366 stats->rdma_reads += to_add->rdma_reads; 2367 stats->rdma_writes += to_add->rdma_writes; 2368 stats->polls += to_add->polls; 2369 stats->idle_polls += to_add->idle_polls; 2370 stats->completions += to_add->completions; 2371 stats->nomem_qdepth += to_add->nomem_qdepth; 2372 stats->nomem_mkey += to_add->nomem_mkey; 2373 for (i = 0; i < ACCEL_MLX5_OPC_LAST; i++) { 2374 stats->opcodes[i] += to_add->opcodes[i]; 2375 } 2376 } 2377 2378 static void 2379 accel_mlx5_destroy_cb(void *io_device, void *ctx_buf) 2380 { 2381 struct accel_mlx5_io_channel *ch = ctx_buf; 2382 struct accel_mlx5_dev *dev; 2383 uint32_t i; 2384 2385 spdk_poller_unregister(&ch->poller); 2386 for (i = 0; i < ch->num_devs; i++) { 2387 dev = &ch->devs[i]; 2388 spdk_mlx5_qp_destroy(dev->qp.qp); 2389 if (dev->cq) { 2390 spdk_mlx5_cq_destroy(dev->cq); 2391 } 2392 spdk_poller_unregister(&dev->qp.recover_poller); 2393 if (dev->mkeys) { 2394 spdk_mlx5_mkey_pool_put_ref(dev->mkeys); 2395 } 2396 if (dev->crypto_mkeys) { 2397 spdk_mlx5_mkey_pool_put_ref(dev->crypto_mkeys); 2398 } 2399 if (dev->sig_mkeys) { 2400 spdk_mlx5_mkey_pool_put_ref(dev->sig_mkeys); 2401 } 2402 spdk_rdma_utils_free_mem_map(&dev->mmap); 2403 spdk_spin_lock(&g_accel_mlx5.lock); 2404 accel_mlx5_add_stats(&g_accel_mlx5.stats, &dev->stats); 2405 spdk_spin_unlock(&g_accel_mlx5.lock); 2406 } 2407 free(ch->devs); 2408 } 2409 2410 static int 2411 accel_mlx5_create_cb(void *io_device, void *ctx_buf) 2412 { 2413 struct spdk_mlx5_cq_attr cq_attr = {}; 2414 struct accel_mlx5_io_channel *ch = ctx_buf; 2415 struct accel_mlx5_dev_ctx *dev_ctx; 2416 struct accel_mlx5_dev *dev; 2417 uint32_t i; 2418 int rc; 2419 2420 ch->devs = calloc(g_accel_mlx5.num_ctxs, sizeof(*ch->devs)); 2421 if (!ch->devs) { 2422 SPDK_ERRLOG("Memory allocation failed\n"); 2423 return -ENOMEM; 2424 } 2425 2426 for (i = 0; i < g_accel_mlx5.num_ctxs; i++) { 2427 dev_ctx = &g_accel_mlx5.dev_ctxs[i]; 2428 dev = &ch->devs[i]; 2429 dev->dev_ctx = dev_ctx; 2430 2431 assert(dev_ctx->mkeys); 2432 dev->mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, 0); 2433 if (!dev->mkeys) { 2434 SPDK_ERRLOG("Failed to get mkey pool channel, dev %s\n", dev_ctx->context->device->name); 2435 /* Should not happen since mkey pool is created on accel_mlx5 initialization. 2436 * We should not be here if pool creation failed */ 2437 assert(0); 2438 goto err_out; 2439 } 2440 2441 if (dev_ctx->crypto_mkeys) { 2442 dev->crypto_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO); 2443 if (!dev->crypto_mkeys) { 2444 SPDK_ERRLOG("Failed to get crypto mkey pool channel, dev %s\n", dev_ctx->context->device->name); 2445 /* Should not happen since mkey pool is created on accel_mlx5 initialization. 2446 * We should not be here if pool creation failed */ 2447 assert(0); 2448 goto err_out; 2449 } 2450 } 2451 if (dev_ctx->sig_mkeys) { 2452 dev->sig_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE); 2453 if (!dev->sig_mkeys) { 2454 SPDK_ERRLOG("Failed to get sig mkey pool channel, dev %s\n", dev_ctx->context->device->name); 2455 /* Should not happen since mkey pool is created on accel_mlx5 initialization. 2456 * We should not be here if pool creation failed */ 2457 assert(0); 2458 goto err_out; 2459 } 2460 } 2461 2462 memset(&cq_attr, 0, sizeof(cq_attr)); 2463 cq_attr.cqe_cnt = g_accel_mlx5.attr.qp_size; 2464 cq_attr.cqe_size = 64; 2465 cq_attr.cq_context = dev; 2466 2467 ch->num_devs++; 2468 rc = spdk_mlx5_cq_create(dev_ctx->pd, &cq_attr, &dev->cq); 2469 if (rc) { 2470 SPDK_ERRLOG("Failed to create mlx5 CQ, rc %d\n", rc); 2471 goto err_out; 2472 } 2473 2474 rc = accel_mlx5_create_qp(dev, &dev->qp); 2475 if (rc) { 2476 SPDK_ERRLOG("Failed to create mlx5 QP, rc %d\n", rc); 2477 goto err_out; 2478 } 2479 2480 dev->mmap = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL, 2481 IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE); 2482 if (!dev->mmap) { 2483 SPDK_ERRLOG("Failed to create memory map\n"); 2484 rc = -ENOMEM; 2485 goto err_out; 2486 } 2487 dev->crypto_multi_block = dev_ctx->crypto_multi_block; 2488 dev->crypto_split_blocks = dev_ctx->crypto_multi_block ? g_accel_mlx5.attr.crypto_split_blocks : 0; 2489 dev->wrs_in_cq_max = g_accel_mlx5.attr.qp_size; 2490 dev->ch = spdk_io_channel_from_ctx(ctx_buf); 2491 STAILQ_INIT(&dev->nomem); 2492 } 2493 2494 ch->poller = SPDK_POLLER_REGISTER(accel_mlx5_poller, ch, 0); 2495 2496 return 0; 2497 2498 err_out: 2499 accel_mlx5_destroy_cb(&g_accel_mlx5, ctx_buf); 2500 return rc; 2501 } 2502 2503 void 2504 accel_mlx5_get_default_attr(struct accel_mlx5_attr *attr) 2505 { 2506 assert(attr); 2507 2508 attr->qp_size = ACCEL_MLX5_QP_SIZE; 2509 attr->num_requests = ACCEL_MLX5_NUM_REQUESTS; 2510 attr->allowed_devs = NULL; 2511 attr->crypto_split_blocks = 0; 2512 attr->enable_driver = false; 2513 } 2514 2515 static void 2516 accel_mlx5_allowed_devs_free(void) 2517 { 2518 size_t i; 2519 2520 if (!g_accel_mlx5.allowed_devs) { 2521 return; 2522 } 2523 2524 for (i = 0; i < g_accel_mlx5.allowed_devs_count; i++) { 2525 free(g_accel_mlx5.allowed_devs[i]); 2526 } 2527 free(g_accel_mlx5.attr.allowed_devs); 2528 free(g_accel_mlx5.allowed_devs); 2529 g_accel_mlx5.attr.allowed_devs = NULL; 2530 g_accel_mlx5.allowed_devs = NULL; 2531 g_accel_mlx5.allowed_devs_count = 0; 2532 } 2533 2534 static int 2535 accel_mlx5_allowed_devs_parse(const char *allowed_devs) 2536 { 2537 char *str, *tmp, *tok, *sp = NULL; 2538 size_t devs_count = 0; 2539 2540 str = strdup(allowed_devs); 2541 if (!str) { 2542 return -ENOMEM; 2543 } 2544 2545 accel_mlx5_allowed_devs_free(); 2546 2547 tmp = str; 2548 while ((tmp = strchr(tmp, ',')) != NULL) { 2549 tmp++; 2550 devs_count++; 2551 } 2552 devs_count++; 2553 2554 g_accel_mlx5.allowed_devs = calloc(devs_count, sizeof(char *)); 2555 if (!g_accel_mlx5.allowed_devs) { 2556 free(str); 2557 return -ENOMEM; 2558 } 2559 2560 devs_count = 0; 2561 tok = strtok_r(str, ",", &sp); 2562 while (tok) { 2563 g_accel_mlx5.allowed_devs[devs_count] = strdup(tok); 2564 if (!g_accel_mlx5.allowed_devs[devs_count]) { 2565 free(str); 2566 accel_mlx5_allowed_devs_free(); 2567 return -ENOMEM; 2568 } 2569 tok = strtok_r(NULL, ",", &sp); 2570 devs_count++; 2571 g_accel_mlx5.allowed_devs_count++; 2572 } 2573 2574 free(str); 2575 2576 return 0; 2577 } 2578 2579 int 2580 accel_mlx5_enable(struct accel_mlx5_attr *attr) 2581 { 2582 int rc; 2583 2584 if (g_accel_mlx5.enabled) { 2585 return -EEXIST; 2586 } 2587 if (attr) { 2588 if (attr->num_requests / spdk_env_get_core_count() < ACCEL_MLX5_MAX_MKEYS_IN_TASK) { 2589 SPDK_ERRLOG("num requests per core must not be less than %u, current value %u\n", 2590 ACCEL_MLX5_MAX_MKEYS_IN_TASK, attr->num_requests / spdk_env_get_core_count()); 2591 return -EINVAL; 2592 } 2593 if (attr->qp_size < 8) { 2594 SPDK_ERRLOG("qp_size must be at least 8\n"); 2595 return -EINVAL; 2596 } 2597 g_accel_mlx5.attr = *attr; 2598 g_accel_mlx5.attr.allowed_devs = NULL; 2599 2600 if (attr->allowed_devs) { 2601 /* Contains a copy of user's string */ 2602 g_accel_mlx5.attr.allowed_devs = strndup(attr->allowed_devs, ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN); 2603 if (!g_accel_mlx5.attr.allowed_devs) { 2604 return -ENOMEM; 2605 } 2606 rc = accel_mlx5_allowed_devs_parse(g_accel_mlx5.attr.allowed_devs); 2607 if (rc) { 2608 return rc; 2609 } 2610 rc = spdk_mlx5_crypto_devs_allow((const char *const *)g_accel_mlx5.allowed_devs, 2611 g_accel_mlx5.allowed_devs_count); 2612 if (rc) { 2613 accel_mlx5_allowed_devs_free(); 2614 return rc; 2615 } 2616 } 2617 } else { 2618 accel_mlx5_get_default_attr(&g_accel_mlx5.attr); 2619 } 2620 2621 g_accel_mlx5.enabled = true; 2622 spdk_accel_module_list_add(&g_accel_mlx5.module); 2623 2624 return 0; 2625 } 2626 2627 static void 2628 accel_mlx5_psvs_release(struct accel_mlx5_dev_ctx *dev_ctx) 2629 { 2630 uint32_t i, num_psvs, num_psvs_in_pool; 2631 2632 if (!dev_ctx->psvs) { 2633 return; 2634 } 2635 2636 num_psvs = g_accel_mlx5.attr.num_requests; 2637 2638 for (i = 0; i < num_psvs; i++) { 2639 if (dev_ctx->psvs[i]) { 2640 spdk_mlx5_destroy_psv(dev_ctx->psvs[i]); 2641 dev_ctx->psvs[i] = NULL; 2642 } 2643 } 2644 free(dev_ctx->psvs); 2645 2646 if (!dev_ctx->psv_pool) { 2647 return; 2648 } 2649 num_psvs_in_pool = spdk_mempool_count(dev_ctx->psv_pool); 2650 if (num_psvs_in_pool != num_psvs) { 2651 SPDK_ERRLOG("Expected %u reqs in the pool, but got only %u\n", num_psvs, num_psvs_in_pool); 2652 } 2653 spdk_mempool_free(dev_ctx->psv_pool); 2654 } 2655 2656 static void 2657 accel_mlx5_free_resources(void) 2658 { 2659 struct accel_mlx5_dev_ctx *dev_ctx; 2660 uint32_t i; 2661 2662 for (i = 0; i < g_accel_mlx5.num_ctxs; i++) { 2663 dev_ctx = &g_accel_mlx5.dev_ctxs[i]; 2664 accel_mlx5_psvs_release(dev_ctx); 2665 if (dev_ctx->pd) { 2666 if (dev_ctx->mkeys) { 2667 spdk_mlx5_mkey_pool_destroy(0, dev_ctx->pd); 2668 } 2669 if (dev_ctx->crypto_mkeys) { 2670 spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO, dev_ctx->pd); 2671 } 2672 if (dev_ctx->sig_mkeys) { 2673 spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE, dev_ctx->pd); 2674 } 2675 spdk_rdma_utils_put_pd(dev_ctx->pd); 2676 } 2677 if (dev_ctx->domain) { 2678 spdk_rdma_utils_put_memory_domain(dev_ctx->domain); 2679 } 2680 } 2681 2682 free(g_accel_mlx5.dev_ctxs); 2683 g_accel_mlx5.dev_ctxs = NULL; 2684 g_accel_mlx5.initialized = false; 2685 } 2686 2687 static void 2688 accel_mlx5_deinit_cb(void *ctx) 2689 { 2690 accel_mlx5_free_resources(); 2691 spdk_spin_destroy(&g_accel_mlx5.lock); 2692 spdk_mlx5_umr_implementer_register(false); 2693 spdk_accel_module_finish(); 2694 } 2695 2696 static void 2697 accel_mlx5_deinit(void *ctx) 2698 { 2699 if (g_accel_mlx5.allowed_devs) { 2700 accel_mlx5_allowed_devs_free(); 2701 } 2702 spdk_mlx5_crypto_devs_allow(NULL, 0); 2703 if (g_accel_mlx5.initialized) { 2704 spdk_io_device_unregister(&g_accel_mlx5, accel_mlx5_deinit_cb); 2705 } else { 2706 spdk_accel_module_finish(); 2707 } 2708 } 2709 2710 static int 2711 accel_mlx5_mkeys_create(struct ibv_pd *pd, uint32_t num_mkeys, uint32_t flags) 2712 { 2713 struct spdk_mlx5_mkey_pool_param pool_param = {}; 2714 2715 pool_param.mkey_count = num_mkeys; 2716 pool_param.cache_per_thread = num_mkeys * 3 / 4 / spdk_env_get_core_count(); 2717 pool_param.flags = flags; 2718 2719 return spdk_mlx5_mkey_pool_init(&pool_param, pd); 2720 } 2721 2722 static void 2723 accel_mlx5_set_psv_in_pool(struct spdk_mempool *mp, void *cb_arg, void *_psv, unsigned obj_idx) 2724 { 2725 struct spdk_rdma_utils_memory_translation translation = {}; 2726 struct accel_mlx5_psv_pool_iter_cb_args *args = cb_arg; 2727 struct accel_mlx5_psv_wrapper *wrapper = _psv; 2728 struct accel_mlx5_dev_ctx *dev_ctx = args->dev; 2729 int rc; 2730 2731 if (args->rc) { 2732 return; 2733 } 2734 assert(obj_idx < g_accel_mlx5.attr.num_requests); 2735 assert(dev_ctx->psvs[obj_idx] != NULL); 2736 memset(wrapper, 0, sizeof(*wrapper)); 2737 wrapper->psv_index = dev_ctx->psvs[obj_idx]->index; 2738 2739 rc = spdk_rdma_utils_get_translation(args->map, &wrapper->crc, sizeof(uint32_t), &translation); 2740 if (rc) { 2741 SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", &wrapper->crc, sizeof(uint32_t)); 2742 args->rc = -EINVAL; 2743 } else { 2744 wrapper->crc_lkey = spdk_rdma_utils_memory_translation_get_lkey(&translation); 2745 } 2746 } 2747 2748 static int 2749 accel_mlx5_psvs_create(struct accel_mlx5_dev_ctx *dev_ctx) 2750 { 2751 struct accel_mlx5_psv_pool_iter_cb_args args = { 2752 .dev = dev_ctx 2753 }; 2754 char pool_name[32]; 2755 uint32_t i; 2756 uint32_t num_psvs = g_accel_mlx5.attr.num_requests; 2757 uint32_t cache_size; 2758 int rc; 2759 2760 dev_ctx->psvs = calloc(num_psvs, (sizeof(struct spdk_mlx5_psv *))); 2761 if (!dev_ctx->psvs) { 2762 SPDK_ERRLOG("Failed to alloc PSVs array\n"); 2763 return -ENOMEM; 2764 } 2765 for (i = 0; i < num_psvs; i++) { 2766 dev_ctx->psvs[i] = spdk_mlx5_create_psv(dev_ctx->pd); 2767 if (!dev_ctx->psvs[i]) { 2768 SPDK_ERRLOG("Failed to create PSV on dev %s\n", dev_ctx->context->device->name); 2769 return -EINVAL; 2770 } 2771 } 2772 2773 rc = snprintf(pool_name, sizeof(pool_name), "accel_psv_%s", dev_ctx->context->device->name); 2774 if (rc < 0) { 2775 assert(0); 2776 return -EINVAL; 2777 } 2778 cache_size = num_psvs * 3 / 4 / spdk_env_get_core_count(); 2779 args.map = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL, 2780 IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE); 2781 if (!args.map) { 2782 return -ENOMEM; 2783 } 2784 dev_ctx->psv_pool = spdk_mempool_create_ctor(pool_name, num_psvs, 2785 sizeof(struct accel_mlx5_psv_wrapper), 2786 cache_size, SPDK_ENV_SOCKET_ID_ANY, 2787 accel_mlx5_set_psv_in_pool, &args); 2788 spdk_rdma_utils_free_mem_map(&args.map); 2789 if (!dev_ctx->psv_pool) { 2790 SPDK_ERRLOG("Failed to create PSV memory pool\n"); 2791 return -ENOMEM; 2792 } 2793 if (args.rc) { 2794 SPDK_ERRLOG("Failed to init PSV memory pool objects, rc %d\n", args.rc); 2795 return args.rc; 2796 } 2797 2798 return 0; 2799 } 2800 2801 2802 static int 2803 accel_mlx5_dev_ctx_init(struct accel_mlx5_dev_ctx *dev_ctx, struct ibv_context *dev, 2804 struct spdk_mlx5_device_caps *caps) 2805 { 2806 struct ibv_pd *pd; 2807 int rc; 2808 2809 pd = spdk_rdma_utils_get_pd(dev); 2810 if (!pd) { 2811 SPDK_ERRLOG("Failed to get PD for context %p, dev %s\n", dev, dev->device->name); 2812 return -EINVAL; 2813 } 2814 dev_ctx->context = dev; 2815 dev_ctx->pd = pd; 2816 dev_ctx->domain = spdk_rdma_utils_get_memory_domain(pd); 2817 if (!dev_ctx->domain) { 2818 return -ENOMEM; 2819 } 2820 2821 rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, 0); 2822 if (rc) { 2823 SPDK_ERRLOG("Failed to create mkeys pool, rc %d, dev %s\n", rc, dev->device->name); 2824 return rc; 2825 } 2826 dev_ctx->mkeys = true; 2827 2828 if (g_accel_mlx5.crypto_supported) { 2829 dev_ctx->crypto_multi_block = caps->crypto.multi_block_be_tweak; 2830 if (!dev_ctx->crypto_multi_block && g_accel_mlx5.attr.crypto_split_blocks) { 2831 SPDK_WARNLOG("\"crypto_split_blocks\" is set but dev %s doesn't support multi block crypto\n", 2832 dev->device->name); 2833 } 2834 rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO); 2835 if (rc) { 2836 SPDK_ERRLOG("Failed to create crypto mkeys pool, rc %d, dev %s\n", rc, dev->device->name); 2837 return rc; 2838 } 2839 dev_ctx->crypto_mkeys = true; 2840 } 2841 if (g_accel_mlx5.crc32c_supported) { 2842 rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, 2843 SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE); 2844 if (rc) { 2845 SPDK_ERRLOG("Failed to create signature mkeys pool, rc %d, dev %s\n", rc, dev->device->name); 2846 return rc; 2847 } 2848 dev_ctx->sig_mkeys = true; 2849 rc = accel_mlx5_psvs_create(dev_ctx); 2850 if (rc) { 2851 SPDK_ERRLOG("Failed to create PSVs pool, rc %d, dev %s\n", rc, dev->device->name); 2852 return rc; 2853 } 2854 } 2855 2856 return 0; 2857 } 2858 2859 static struct ibv_context ** 2860 accel_mlx5_get_devices(int *_num_devs) 2861 { 2862 struct ibv_context **rdma_devs, **rdma_devs_out = NULL, *dev; 2863 struct ibv_device_attr dev_attr; 2864 size_t j; 2865 int num_devs = 0, i, rc; 2866 int num_devs_out = 0; 2867 bool dev_allowed; 2868 2869 rdma_devs = rdma_get_devices(&num_devs); 2870 if (!rdma_devs || !num_devs) { 2871 *_num_devs = 0; 2872 return NULL; 2873 } 2874 2875 rdma_devs_out = calloc(num_devs + 1, sizeof(struct ibv_context *)); 2876 if (!rdma_devs_out) { 2877 SPDK_ERRLOG("Memory allocation failed\n"); 2878 rdma_free_devices(rdma_devs); 2879 *_num_devs = 0; 2880 return NULL; 2881 } 2882 2883 for (i = 0; i < num_devs; i++) { 2884 dev = rdma_devs[i]; 2885 rc = ibv_query_device(dev, &dev_attr); 2886 if (rc) { 2887 SPDK_ERRLOG("Failed to query dev %s, skipping\n", dev->device->name); 2888 continue; 2889 } 2890 if (dev_attr.vendor_id != SPDK_MLX5_VENDOR_ID_MELLANOX) { 2891 SPDK_DEBUGLOG(accel_mlx5, "dev %s is not Mellanox device, skipping\n", dev->device->name); 2892 continue; 2893 } 2894 2895 if (g_accel_mlx5.allowed_devs_count) { 2896 dev_allowed = false; 2897 for (j = 0; j < g_accel_mlx5.allowed_devs_count; j++) { 2898 if (strcmp(g_accel_mlx5.allowed_devs[j], dev->device->name) == 0) { 2899 dev_allowed = true; 2900 break; 2901 } 2902 } 2903 if (!dev_allowed) { 2904 continue; 2905 } 2906 } 2907 2908 rdma_devs_out[num_devs_out] = dev; 2909 num_devs_out++; 2910 } 2911 2912 rdma_free_devices(rdma_devs); 2913 *_num_devs = num_devs_out; 2914 2915 return rdma_devs_out; 2916 } 2917 2918 static inline bool 2919 accel_mlx5_dev_supports_crypto(struct spdk_mlx5_device_caps *caps) 2920 { 2921 return caps->crypto_supported && !caps->crypto.wrapped_import_method_aes_xts && 2922 (caps->crypto.single_block_le_tweak || 2923 caps->crypto.multi_block_le_tweak || caps->crypto.multi_block_be_tweak); 2924 } 2925 2926 static int 2927 accel_mlx5_init(void) 2928 { 2929 struct spdk_mlx5_device_caps *caps; 2930 struct ibv_context **rdma_devs, *dev; 2931 int num_devs = 0, rc = 0, i; 2932 int best_dev = -1, first_dev = 0; 2933 int best_dev_stat = 0, dev_stat; 2934 bool supports_crypto; 2935 bool find_best_dev = g_accel_mlx5.allowed_devs_count == 0; 2936 2937 if (!g_accel_mlx5.enabled) { 2938 return -EINVAL; 2939 } 2940 2941 spdk_spin_init(&g_accel_mlx5.lock); 2942 rdma_devs = accel_mlx5_get_devices(&num_devs); 2943 if (!rdma_devs || !num_devs) { 2944 return -ENODEV; 2945 } 2946 caps = calloc(num_devs, sizeof(*caps)); 2947 if (!caps) { 2948 rc = -ENOMEM; 2949 goto cleanup; 2950 } 2951 2952 g_accel_mlx5.crypto_supported = true; 2953 g_accel_mlx5.crc32c_supported = true; 2954 g_accel_mlx5.num_ctxs = 0; 2955 2956 /* Iterate devices. We support an offload if all devices support it */ 2957 for (i = 0; i < num_devs; i++) { 2958 dev = rdma_devs[i]; 2959 2960 rc = spdk_mlx5_device_query_caps(dev, &caps[i]); 2961 if (rc) { 2962 SPDK_ERRLOG("Failed to get crypto caps, dev %s\n", dev->device->name); 2963 goto cleanup; 2964 } 2965 supports_crypto = accel_mlx5_dev_supports_crypto(&caps[i]); 2966 if (!supports_crypto) { 2967 SPDK_DEBUGLOG(accel_mlx5, "Disable crypto support because dev %s doesn't support it\n", 2968 rdma_devs[i]->device->name); 2969 g_accel_mlx5.crypto_supported = false; 2970 } 2971 if (!caps[i].crc32c_supported) { 2972 SPDK_DEBUGLOG(accel_mlx5, "Disable crc32c support because dev %s doesn't support it\n", 2973 rdma_devs[i]->device->name); 2974 g_accel_mlx5.crc32c_supported = false; 2975 } 2976 if (find_best_dev) { 2977 /* Find device which supports max number of offloads */ 2978 dev_stat = (int)supports_crypto + (int)caps[i].crc32c_supported; 2979 if (dev_stat > best_dev_stat) { 2980 best_dev_stat = dev_stat; 2981 best_dev = i; 2982 } 2983 } 2984 } 2985 2986 /* User didn't specify devices to use, try to select the best one */ 2987 if (find_best_dev) { 2988 if (best_dev == -1) { 2989 best_dev = 0; 2990 } 2991 g_accel_mlx5.crypto_supported = accel_mlx5_dev_supports_crypto(&caps[best_dev]); 2992 g_accel_mlx5.crc32c_supported = caps[best_dev].crc32c_supported; 2993 SPDK_NOTICELOG("Select dev %s, crypto %d, crc32c %d\n", rdma_devs[best_dev]->device->name, 2994 g_accel_mlx5.crypto_supported, g_accel_mlx5.crc32c_supported); 2995 first_dev = best_dev; 2996 num_devs = 1; 2997 if (g_accel_mlx5.crypto_supported) { 2998 const char *const dev_name[] = { rdma_devs[best_dev]->device->name }; 2999 /* Let mlx5 library know which device to use */ 3000 spdk_mlx5_crypto_devs_allow(dev_name, 1); 3001 } 3002 } else { 3003 SPDK_NOTICELOG("Found %d devices, crypto %d\n", num_devs, g_accel_mlx5.crypto_supported); 3004 } 3005 3006 g_accel_mlx5.dev_ctxs = calloc(num_devs, sizeof(*g_accel_mlx5.dev_ctxs)); 3007 if (!g_accel_mlx5.dev_ctxs) { 3008 SPDK_ERRLOG("Memory allocation failed\n"); 3009 rc = -ENOMEM; 3010 goto cleanup; 3011 } 3012 3013 for (i = first_dev; i < first_dev + num_devs; i++) { 3014 rc = accel_mlx5_dev_ctx_init(&g_accel_mlx5.dev_ctxs[g_accel_mlx5.num_ctxs++], 3015 rdma_devs[i], &caps[i]); 3016 if (rc) { 3017 goto cleanup; 3018 } 3019 } 3020 3021 SPDK_NOTICELOG("Accel framework mlx5 initialized, found %d devices.\n", num_devs); 3022 spdk_io_device_register(&g_accel_mlx5, accel_mlx5_create_cb, accel_mlx5_destroy_cb, 3023 sizeof(struct accel_mlx5_io_channel), "accel_mlx5"); 3024 g_accel_mlx5.initialized = true; 3025 free(rdma_devs); 3026 free(caps); 3027 3028 if (g_accel_mlx5.attr.enable_driver) { 3029 SPDK_NOTICELOG("Enabling mlx5 platform driver\n"); 3030 spdk_accel_driver_register(&g_accel_mlx5_driver); 3031 spdk_accel_set_driver(g_accel_mlx5_driver.name); 3032 spdk_mlx5_umr_implementer_register(true); 3033 } 3034 3035 return 0; 3036 3037 cleanup: 3038 free(rdma_devs); 3039 free(caps); 3040 accel_mlx5_free_resources(); 3041 spdk_spin_destroy(&g_accel_mlx5.lock); 3042 3043 return rc; 3044 } 3045 3046 static void 3047 accel_mlx5_write_config_json(struct spdk_json_write_ctx *w) 3048 { 3049 if (g_accel_mlx5.enabled) { 3050 spdk_json_write_object_begin(w); 3051 spdk_json_write_named_string(w, "method", "mlx5_scan_accel_module"); 3052 spdk_json_write_named_object_begin(w, "params"); 3053 spdk_json_write_named_uint16(w, "qp_size", g_accel_mlx5.attr.qp_size); 3054 spdk_json_write_named_uint32(w, "num_requests", g_accel_mlx5.attr.num_requests); 3055 if (g_accel_mlx5.attr.allowed_devs) { 3056 spdk_json_write_named_string(w, "allowed_devs", g_accel_mlx5.attr.allowed_devs); 3057 } 3058 spdk_json_write_named_uint16(w, "crypto_split_blocks", g_accel_mlx5.attr.crypto_split_blocks); 3059 spdk_json_write_named_bool(w, "enable_driver", g_accel_mlx5.attr.enable_driver); 3060 spdk_json_write_object_end(w); 3061 spdk_json_write_object_end(w); 3062 } 3063 } 3064 3065 static size_t 3066 accel_mlx5_get_ctx_size(void) 3067 { 3068 return sizeof(struct accel_mlx5_task); 3069 } 3070 3071 static int 3072 accel_mlx5_crypto_key_init(struct spdk_accel_crypto_key *key) 3073 { 3074 struct spdk_mlx5_crypto_dek_create_attr attr = {}; 3075 struct spdk_mlx5_crypto_keytag *keytag; 3076 int rc; 3077 3078 if (!key || !key->key || !key->key2 || !key->key_size || !key->key2_size) { 3079 return -EINVAL; 3080 } 3081 3082 attr.dek = calloc(1, key->key_size + key->key2_size); 3083 if (!attr.dek) { 3084 return -ENOMEM; 3085 } 3086 3087 memcpy(attr.dek, key->key, key->key_size); 3088 memcpy(attr.dek + key->key_size, key->key2, key->key2_size); 3089 attr.dek_len = key->key_size + key->key2_size; 3090 3091 rc = spdk_mlx5_crypto_keytag_create(&attr, &keytag); 3092 spdk_memset_s(attr.dek, attr.dek_len, 0, attr.dek_len); 3093 free(attr.dek); 3094 if (rc) { 3095 SPDK_ERRLOG("Failed to create a keytag, rc %d\n", rc); 3096 return rc; 3097 } 3098 3099 key->priv = keytag; 3100 3101 return 0; 3102 } 3103 3104 static void 3105 accel_mlx5_crypto_key_deinit(struct spdk_accel_crypto_key *key) 3106 { 3107 if (!key || key->module_if != &g_accel_mlx5.module || !key->priv) { 3108 return; 3109 } 3110 3111 spdk_mlx5_crypto_keytag_destroy(key->priv); 3112 } 3113 3114 static void 3115 accel_mlx5_dump_stats_json(struct spdk_json_write_ctx *w, const char *header, 3116 const struct accel_mlx5_stats *stats) 3117 { 3118 double idle_polls_percentage = 0; 3119 double cpls_per_poll = 0; 3120 uint64_t total_tasks = 0; 3121 int i; 3122 3123 if (stats->polls) { 3124 idle_polls_percentage = (double) stats->idle_polls * 100 / stats->polls; 3125 } 3126 if (stats->polls > stats->idle_polls) { 3127 cpls_per_poll = (double) stats->completions / (stats->polls - stats->idle_polls); 3128 } 3129 for (i = 0; i < ACCEL_MLX5_OPC_LAST; i++) { 3130 total_tasks += stats->opcodes[i]; 3131 } 3132 3133 spdk_json_write_named_object_begin(w, header); 3134 3135 spdk_json_write_named_object_begin(w, "umrs"); 3136 spdk_json_write_named_uint64(w, "crypto_umrs", stats->crypto_umrs); 3137 spdk_json_write_named_uint64(w, "sig_umrs", stats->sig_umrs); 3138 spdk_json_write_named_uint64(w, "total", stats->crypto_umrs + stats->sig_umrs); 3139 spdk_json_write_object_end(w); 3140 3141 spdk_json_write_named_object_begin(w, "rdma"); 3142 spdk_json_write_named_uint64(w, "read", stats->rdma_reads); 3143 spdk_json_write_named_uint64(w, "write", stats->rdma_writes); 3144 spdk_json_write_named_uint64(w, "total", stats->rdma_reads + stats->rdma_writes); 3145 spdk_json_write_object_end(w); 3146 3147 spdk_json_write_named_object_begin(w, "polling"); 3148 spdk_json_write_named_uint64(w, "polls", stats->polls); 3149 spdk_json_write_named_uint64(w, "idle_polls", stats->idle_polls); 3150 spdk_json_write_named_uint64(w, "completions", stats->completions); 3151 spdk_json_write_named_double(w, "idle_polls_percentage", idle_polls_percentage); 3152 spdk_json_write_named_double(w, "cpls_per_poll", cpls_per_poll); 3153 spdk_json_write_named_uint64(w, "nomem_qdepth", stats->nomem_qdepth); 3154 spdk_json_write_named_uint64(w, "nomem_mkey", stats->nomem_mkey); 3155 spdk_json_write_object_end(w); 3156 3157 spdk_json_write_named_object_begin(w, "tasks"); 3158 spdk_json_write_named_uint64(w, "copy", stats->opcodes[ACCEL_MLX5_OPC_COPY]); 3159 spdk_json_write_named_uint64(w, "crypto", stats->opcodes[ACCEL_MLX5_OPC_CRYPTO]); 3160 spdk_json_write_named_uint64(w, "crypto_mkey", stats->opcodes[ACCEL_MLX5_OPC_CRYPTO_MKEY]); 3161 spdk_json_write_named_uint64(w, "crc32c", stats->opcodes[ACCEL_MLX5_OPC_CRC32C]); 3162 spdk_json_write_named_uint64(w, "total", total_tasks); 3163 spdk_json_write_object_end(w); 3164 3165 spdk_json_write_object_end(w); 3166 } 3167 3168 static void 3169 accel_mlx5_dump_channel_stat(struct spdk_io_channel_iter *i) 3170 { 3171 struct accel_mlx5_stats ch_stat = {}; 3172 struct accel_mlx5_dump_stats_ctx *ctx; 3173 struct spdk_io_channel *_ch; 3174 struct accel_mlx5_io_channel *ch; 3175 struct accel_mlx5_dev *dev; 3176 uint32_t j; 3177 3178 ctx = spdk_io_channel_iter_get_ctx(i); 3179 _ch = spdk_io_channel_iter_get_channel(i); 3180 ch = spdk_io_channel_get_ctx(_ch); 3181 3182 if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 3183 spdk_json_write_object_begin(ctx->w); 3184 spdk_json_write_named_object_begin(ctx->w, spdk_thread_get_name(spdk_get_thread())); 3185 } 3186 if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) { 3187 spdk_json_write_named_array_begin(ctx->w, "devices"); 3188 } 3189 3190 for (j = 0; j < ch->num_devs; j++) { 3191 dev = &ch->devs[j]; 3192 /* Save grand total and channel stats */ 3193 accel_mlx5_add_stats(&ctx->total, &dev->stats); 3194 accel_mlx5_add_stats(&ch_stat, &dev->stats); 3195 if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) { 3196 spdk_json_write_object_begin(ctx->w); 3197 accel_mlx5_dump_stats_json(ctx->w, dev->dev_ctx->context->device->name, &dev->stats); 3198 spdk_json_write_object_end(ctx->w); 3199 } 3200 } 3201 3202 if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) { 3203 spdk_json_write_array_end(ctx->w); 3204 } 3205 if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 3206 accel_mlx5_dump_stats_json(ctx->w, "channel_total", &ch_stat); 3207 spdk_json_write_object_end(ctx->w); 3208 spdk_json_write_object_end(ctx->w); 3209 } 3210 3211 spdk_for_each_channel_continue(i, 0); 3212 } 3213 3214 static void 3215 accel_mlx5_dump_channel_stat_done(struct spdk_io_channel_iter *i, int status) 3216 { 3217 struct accel_mlx5_dump_stats_ctx *ctx; 3218 3219 ctx = spdk_io_channel_iter_get_ctx(i); 3220 3221 spdk_spin_lock(&g_accel_mlx5.lock); 3222 /* Add statistics from destroyed channels */ 3223 accel_mlx5_add_stats(&ctx->total, &g_accel_mlx5.stats); 3224 spdk_spin_unlock(&g_accel_mlx5.lock); 3225 3226 if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 3227 /* channels[] */ 3228 spdk_json_write_array_end(ctx->w); 3229 } 3230 3231 accel_mlx5_dump_stats_json(ctx->w, "total", &ctx->total); 3232 3233 /* Ends the whole response which was begun in accel_mlx5_dump_stats */ 3234 spdk_json_write_object_end(ctx->w); 3235 3236 ctx->cb(ctx->ctx, 0); 3237 free(ctx); 3238 } 3239 3240 int 3241 accel_mlx5_dump_stats(struct spdk_json_write_ctx *w, enum accel_mlx5_dump_state_level level, 3242 accel_mlx5_dump_stat_done_cb cb, void *ctx) 3243 { 3244 struct accel_mlx5_dump_stats_ctx *stat_ctx; 3245 3246 if (!w || !cb) { 3247 return -EINVAL; 3248 } 3249 if (!g_accel_mlx5.initialized) { 3250 return -ENODEV; 3251 } 3252 3253 stat_ctx = calloc(1, sizeof(*stat_ctx)); 3254 if (!stat_ctx) { 3255 return -ENOMEM; 3256 } 3257 stat_ctx->cb = cb; 3258 stat_ctx->ctx = ctx; 3259 stat_ctx->level = level; 3260 stat_ctx->w = w; 3261 3262 spdk_json_write_object_begin(w); 3263 3264 if (level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 3265 spdk_json_write_named_array_begin(w, "channels"); 3266 } 3267 3268 spdk_for_each_channel(&g_accel_mlx5, accel_mlx5_dump_channel_stat, stat_ctx, 3269 accel_mlx5_dump_channel_stat_done); 3270 3271 return 0; 3272 } 3273 3274 static bool 3275 accel_mlx5_crypto_supports_cipher(enum spdk_accel_cipher cipher, size_t key_size) 3276 { 3277 switch (cipher) { 3278 case SPDK_ACCEL_CIPHER_AES_XTS: 3279 return key_size == SPDK_ACCEL_AES_XTS_128_KEY_SIZE || key_size == SPDK_ACCEL_AES_XTS_256_KEY_SIZE; 3280 default: 3281 return false; 3282 } 3283 } 3284 3285 static int 3286 accel_mlx5_get_memory_domains(struct spdk_memory_domain **domains, int array_size) 3287 { 3288 int i, size; 3289 3290 if (!domains || !array_size) { 3291 return (int)g_accel_mlx5.num_ctxs; 3292 } 3293 3294 size = spdk_min(array_size, (int)g_accel_mlx5.num_ctxs); 3295 3296 for (i = 0; i < size; i++) { 3297 domains[i] = g_accel_mlx5.dev_ctxs[i].domain; 3298 } 3299 3300 return (int)g_accel_mlx5.num_ctxs; 3301 } 3302 3303 static inline struct accel_mlx5_dev * 3304 accel_mlx5_ch_get_dev_by_pd(struct accel_mlx5_io_channel *accel_ch, struct ibv_pd *pd) 3305 { 3306 uint32_t i; 3307 3308 for (i = 0; i < accel_ch->num_devs; i++) { 3309 if (accel_ch->devs[i].dev_ctx->pd == pd) { 3310 return &accel_ch->devs[i]; 3311 } 3312 } 3313 3314 return NULL; 3315 } 3316 3317 static inline int 3318 accel_mlx5_task_assign_qp_by_domain_pd(struct accel_mlx5_task *task, 3319 struct accel_mlx5_io_channel *acce_ch, struct spdk_memory_domain *domain) 3320 { 3321 struct spdk_memory_domain_rdma_ctx *domain_ctx; 3322 struct accel_mlx5_dev *dev; 3323 struct ibv_pd *domain_pd; 3324 size_t ctx_size; 3325 3326 domain_ctx = spdk_memory_domain_get_user_context(domain, &ctx_size); 3327 if (spdk_unlikely(!domain_ctx || domain_ctx->size != ctx_size)) { 3328 SPDK_ERRLOG("no domain context or wrong size, ctx ptr %p, size %zu\n", domain_ctx, ctx_size); 3329 return -ENOTSUP; 3330 } 3331 domain_pd = domain_ctx->ibv_pd; 3332 if (spdk_unlikely(!domain_pd)) { 3333 SPDK_ERRLOG("no destination domain PD, task %p", task); 3334 return -ENOTSUP; 3335 } 3336 dev = accel_mlx5_ch_get_dev_by_pd(acce_ch, domain_pd); 3337 if (spdk_unlikely(!dev)) { 3338 SPDK_ERRLOG("No dev for PD %p dev %s\n", domain_pd, domain_pd->context->device->name); 3339 return -ENODEV; 3340 } 3341 3342 if (spdk_unlikely(!dev)) { 3343 return -ENODEV; 3344 } 3345 task->qp = &dev->qp; 3346 3347 return 0; 3348 } 3349 3350 static inline int 3351 accel_mlx5_driver_examine_sequence(struct spdk_accel_sequence *seq, 3352 struct accel_mlx5_io_channel *accel_ch) 3353 { 3354 struct spdk_accel_task *first_base = spdk_accel_sequence_first_task(seq); 3355 struct accel_mlx5_task *first = SPDK_CONTAINEROF(first_base, struct accel_mlx5_task, base); 3356 struct spdk_accel_task *next_base = TAILQ_NEXT(first_base, seq_link); 3357 struct accel_mlx5_task *next; 3358 int rc; 3359 3360 accel_mlx5_task_reset(first); 3361 SPDK_DEBUGLOG(accel_mlx5, "first %p, opc %d; next %p, opc %d\n", first_base, first_base->op_code, 3362 next_base, next_base ? next_base->op_code : -1); 3363 if (next_base) { 3364 switch (first_base->op_code) { 3365 case SPDK_ACCEL_OPC_COPY: 3366 if (next_base->op_code == SPDK_ACCEL_OPC_DECRYPT && 3367 first_base->dst_domain && spdk_memory_domain_get_dma_device_type(first_base->dst_domain) == 3368 SPDK_DMA_DEVICE_TYPE_RDMA && TAILQ_NEXT(next_base, seq_link) == NULL) { 3369 next = SPDK_CONTAINEROF(next_base, struct accel_mlx5_task, base); 3370 rc = accel_mlx5_task_assign_qp_by_domain_pd(next, accel_ch, first_base->dst_domain); 3371 if (spdk_unlikely(rc)) { 3372 return rc; 3373 } 3374 /* Update decrypt task memory domain, complete copy task */ 3375 SPDK_DEBUGLOG(accel_mlx5, "Merge copy task (%p) and decrypt (%p)\n", first, next); 3376 next_base->dst_domain = first_base->dst_domain; 3377 next_base->dst_domain_ctx = first_base->dst_domain_ctx; 3378 accel_mlx5_task_reset(next); 3379 next->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO_MKEY; 3380 next->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE; 3381 next->needs_data_transfer = 1; 3382 next->inplace = 1; 3383 spdk_accel_task_complete(first_base, 0); 3384 return 0; 3385 } 3386 break; 3387 case SPDK_ACCEL_OPC_ENCRYPT: 3388 if (next_base->op_code == SPDK_ACCEL_OPC_COPY && 3389 next_base->dst_domain && spdk_memory_domain_get_dma_device_type(next_base->dst_domain) == 3390 SPDK_DMA_DEVICE_TYPE_RDMA && TAILQ_NEXT(next_base, seq_link) == NULL) { 3391 rc = accel_mlx5_task_assign_qp_by_domain_pd(first, accel_ch, next_base->dst_domain); 3392 if (spdk_unlikely(rc)) { 3393 return rc; 3394 } 3395 3396 /* Update encrypt task memory domain, complete copy task */ 3397 SPDK_DEBUGLOG(accel_mlx5, "Merge copy task (%p) and decrypt (%p)\n", 3398 SPDK_CONTAINEROF(next_base, 3399 struct accel_mlx5_task, base), first); 3400 first_base->dst_domain = next_base->dst_domain; 3401 first_base->dst_domain_ctx = next_base->dst_domain_ctx; 3402 first->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO_MKEY; 3403 first->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE; 3404 first->needs_data_transfer = 1; 3405 first->inplace = 1; 3406 spdk_accel_task_complete(next_base, 0); 3407 return 0; 3408 } 3409 break; 3410 3411 default: 3412 break; 3413 } 3414 } 3415 3416 SPDK_DEBUGLOG(accel_mlx5, "seq %p, task %p nothing to merge\n", seq, first_base); 3417 /* Nothing to merge, execute tasks one by one */ 3418 accel_mlx5_task_assign_qp(first, accel_ch); 3419 accel_mlx5_task_init_opcode(first); 3420 3421 return 0; 3422 } 3423 3424 static inline int 3425 accel_mlx5_execute_sequence(struct spdk_io_channel *ch, struct spdk_accel_sequence *seq) 3426 { 3427 struct accel_mlx5_io_channel *accel_ch = spdk_io_channel_get_ctx(ch); 3428 struct spdk_accel_task *task; 3429 struct accel_mlx5_task *mlx5_task; 3430 int rc; 3431 3432 rc = accel_mlx5_driver_examine_sequence(seq, accel_ch); 3433 if (spdk_unlikely(rc)) { 3434 return rc; 3435 } 3436 task = spdk_accel_sequence_first_task(seq); 3437 assert(task); 3438 mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base); 3439 mlx5_task->driver_seq = 1; 3440 3441 SPDK_DEBUGLOG(accel_mlx5, "driver starts seq %p, ch %p, task %p\n", seq, accel_ch, task); 3442 3443 return _accel_mlx5_submit_tasks(accel_ch, task); 3444 } 3445 3446 static struct accel_mlx5_module g_accel_mlx5 = { 3447 .module = { 3448 .module_init = accel_mlx5_init, 3449 .module_fini = accel_mlx5_deinit, 3450 .write_config_json = accel_mlx5_write_config_json, 3451 .get_ctx_size = accel_mlx5_get_ctx_size, 3452 .name = "mlx5", 3453 .supports_opcode = accel_mlx5_supports_opcode, 3454 .get_io_channel = accel_mlx5_get_io_channel, 3455 .submit_tasks = accel_mlx5_submit_tasks, 3456 .crypto_key_init = accel_mlx5_crypto_key_init, 3457 .crypto_key_deinit = accel_mlx5_crypto_key_deinit, 3458 .crypto_supports_cipher = accel_mlx5_crypto_supports_cipher, 3459 .get_memory_domains = accel_mlx5_get_memory_domains, 3460 } 3461 }; 3462 3463 static struct spdk_accel_driver g_accel_mlx5_driver = { 3464 .name = "mlx5", 3465 .execute_sequence = accel_mlx5_execute_sequence, 3466 .get_io_channel = accel_mlx5_get_io_channel 3467 }; 3468 3469 SPDK_LOG_REGISTER_COMPONENT(accel_mlx5) 3470