1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 */ 4 5 #include "spdk/env.h" 6 #include "spdk/thread.h" 7 #include "spdk/queue.h" 8 #include "spdk/log.h" 9 #include "spdk/string.h" 10 #include "spdk/likely.h" 11 #include "spdk/dma.h" 12 #include "spdk/json.h" 13 #include "spdk/util.h" 14 15 #include "spdk_internal/mlx5.h" 16 #include "spdk_internal/rdma_utils.h" 17 #include "spdk/accel_module.h" 18 #include "spdk_internal/assert.h" 19 #include "spdk_internal/sgl.h" 20 #include "accel_mlx5.h" 21 22 #include <infiniband/mlx5dv.h> 23 #include <rdma/rdma_cma.h> 24 25 #define ACCEL_MLX5_QP_SIZE (256u) 26 #define ACCEL_MLX5_NUM_REQUESTS (2048u - 1) 27 #define ACCEL_MLX5_RECOVER_POLLER_PERIOD_US (10000) 28 #define ACCEL_MLX5_MAX_SGE (16u) 29 #define ACCEL_MLX5_MAX_WC (64u) 30 #define ACCEL_MLX5_MAX_MKEYS_IN_TASK (16u) 31 32 /* Assume we have up to 16 devices */ 33 #define ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN ((SPDK_MLX5_DEV_MAX_NAME_LEN + 1) * 16) 34 35 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, task) \ 36 do { \ 37 assert((qp)->wrs_submitted < (qp)->wrs_max); \ 38 (qp)->wrs_submitted++; \ 39 assert((task)->num_wrs < UINT16_MAX); \ 40 (task)->num_wrs++; \ 41 } while (0) 42 43 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, task) \ 44 do { \ 45 assert((dev)->wrs_in_cq < (dev)->wrs_in_cq_max); \ 46 (dev)->wrs_in_cq++; \ 47 assert((qp)->wrs_submitted < (qp)->wrs_max); \ 48 (qp)->wrs_submitted++; \ 49 assert((task)->num_wrs < UINT16_MAX); \ 50 (task)->num_wrs++; \ 51 } while (0) 52 53 struct accel_mlx5_io_channel; 54 struct accel_mlx5_task; 55 56 struct accel_mlx5_dev_ctx { 57 struct ibv_context *context; 58 struct ibv_pd *pd; 59 struct spdk_memory_domain *domain; 60 struct spdk_mempool *psv_pool; 61 TAILQ_ENTRY(accel_mlx5_dev_ctx) link; 62 struct spdk_mlx5_psv **psvs; 63 bool crypto_mkeys; 64 bool sig_mkeys; 65 bool crypto_multi_block; 66 }; 67 68 enum accel_mlx5_opcode { 69 ACCEL_MLX5_OPC_COPY, 70 ACCEL_MLX5_OPC_CRYPTO, 71 ACCEL_MLX5_OPC_CRC32C, 72 ACCEL_MLX5_OPC_CRYPTO_MKEY, 73 ACCEL_MLX5_OPC_LAST 74 }; 75 76 SPDK_STATIC_ASSERT(ACCEL_MLX5_OPC_LAST <= 0xf, 77 "accel opcode exceeds 4 bits, update accel_mlx5 struct"); 78 79 struct accel_mlx5_stats { 80 uint64_t crypto_umrs; 81 uint64_t sig_umrs; 82 uint64_t rdma_reads; 83 uint64_t rdma_writes; 84 uint64_t polls; 85 uint64_t idle_polls; 86 uint64_t completions; 87 uint64_t nomem_qdepth; 88 uint64_t nomem_mkey; 89 uint64_t opcodes[ACCEL_MLX5_OPC_LAST]; 90 }; 91 92 struct accel_mlx5_module { 93 struct spdk_accel_module_if module; 94 struct accel_mlx5_stats stats; 95 struct spdk_spinlock lock; 96 struct accel_mlx5_dev_ctx *dev_ctxs; 97 uint32_t num_ctxs; 98 struct accel_mlx5_attr attr; 99 char **allowed_devs; 100 size_t allowed_devs_count; 101 bool initialized; 102 bool enabled; 103 bool crypto_supported; 104 bool crc32c_supported; 105 }; 106 107 struct accel_mlx5_sge { 108 uint32_t src_sge_count; 109 uint32_t dst_sge_count; 110 struct ibv_sge src_sge[ACCEL_MLX5_MAX_SGE]; 111 struct ibv_sge dst_sge[ACCEL_MLX5_MAX_SGE]; 112 }; 113 114 struct accel_mlx5_iov_sgl { 115 struct iovec *iov; 116 uint32_t iovcnt; 117 uint32_t iov_offset; 118 }; 119 120 struct accel_mlx5_psv_wrapper { 121 uint32_t psv_index; 122 struct { 123 uint32_t error : 1; 124 uint32_t reserved : 31; 125 } bits; 126 /* mlx5 engine requires DMAable memory, use this member to copy user's crc value since we don't know which 127 * memory it is in */ 128 uint32_t crc; 129 uint32_t crc_lkey; 130 }; 131 132 struct accel_mlx5_task { 133 struct spdk_accel_task base; 134 struct accel_mlx5_iov_sgl src; 135 struct accel_mlx5_iov_sgl dst; 136 struct accel_mlx5_qp *qp; 137 STAILQ_ENTRY(accel_mlx5_task) link; 138 uint16_t num_reqs; 139 uint16_t num_completed_reqs; 140 uint16_t num_submitted_reqs; 141 uint16_t num_ops; /* number of allocated mkeys or number of operations */ 142 uint16_t num_wrs; /* Number of outstanding operations which consume qp slot */ 143 union { 144 struct { 145 uint16_t blocks_per_req; 146 uint16_t num_processed_blocks; 147 uint16_t num_blocks; 148 }; 149 struct { 150 struct accel_mlx5_psv_wrapper *psv; 151 uint32_t last_umr_len; 152 uint8_t last_mkey_idx; 153 }; 154 }; 155 union { 156 uint16_t raw; 157 struct { 158 uint16_t inplace : 1; 159 uint16_t driver_seq : 1; 160 uint16_t needs_data_transfer : 1; 161 uint16_t enc_order : 2; 162 uint16_t mlx5_opcode: 4; 163 }; 164 }; 165 /* Keep this array last since not all elements might be accessed, this reduces amount of data to be 166 * cached */ 167 struct spdk_mlx5_mkey_pool_obj *mkeys[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 168 }; 169 170 SPDK_STATIC_ASSERT(ACCEL_MLX5_MAX_MKEYS_IN_TASK <= UINT8_MAX, "uint8_t is used to iterate mkeys"); 171 172 struct accel_mlx5_qp { 173 struct spdk_mlx5_qp *qp; 174 struct ibv_qp *verbs_qp; 175 struct accel_mlx5_dev *dev; 176 /* tasks submitted to HW. We can't complete a task even in error case until we reap completions for all 177 * submitted requests */ 178 STAILQ_HEAD(, accel_mlx5_task) in_hw; 179 uint16_t wrs_submitted; 180 uint16_t wrs_max; 181 bool recovering; 182 struct spdk_poller *recover_poller; 183 }; 184 185 struct accel_mlx5_dev { 186 struct accel_mlx5_qp qp; 187 struct spdk_mlx5_cq *cq; 188 struct spdk_mlx5_mkey_pool *crypto_mkeys; 189 struct spdk_mlx5_mkey_pool *sig_mkeys; 190 struct spdk_rdma_utils_mem_map *mmap; 191 struct accel_mlx5_dev_ctx *dev_ctx; 192 struct spdk_io_channel *ch; 193 uint16_t wrs_in_cq; 194 uint16_t wrs_in_cq_max; 195 uint16_t crypto_split_blocks; 196 bool crypto_multi_block; 197 /* Pending tasks waiting for requests resources */ 198 STAILQ_HEAD(, accel_mlx5_task) nomem; 199 TAILQ_ENTRY(accel_mlx5_dev) link; 200 struct accel_mlx5_stats stats; 201 }; 202 203 struct accel_mlx5_io_channel { 204 struct accel_mlx5_dev *devs; 205 struct spdk_poller *poller; 206 uint32_t num_devs; 207 /* Index in \b devs to be used for operations in round-robin way */ 208 uint32_t dev_idx; 209 }; 210 211 struct accel_mlx5_task_operations { 212 int (*init)(struct accel_mlx5_task *task); 213 int (*process)(struct accel_mlx5_task *task); 214 int (*cont)(struct accel_mlx5_task *task); 215 void (*complete)(struct accel_mlx5_task *task); 216 }; 217 218 struct accel_mlx5_psv_pool_iter_cb_args { 219 struct accel_mlx5_dev_ctx *dev; 220 struct spdk_rdma_utils_mem_map *map; 221 int rc; 222 }; 223 224 struct accel_mlx5_dump_stats_ctx { 225 struct accel_mlx5_stats total; 226 struct spdk_json_write_ctx *w; 227 enum accel_mlx5_dump_state_level level; 228 accel_mlx5_dump_stat_done_cb cb; 229 void *ctx; 230 }; 231 232 static struct accel_mlx5_module g_accel_mlx5; 233 static struct spdk_accel_driver g_accel_mlx5_driver; 234 235 static inline int accel_mlx5_execute_sequence(struct spdk_io_channel *ch, 236 struct spdk_accel_sequence *seq); 237 static inline void accel_mlx5_task_complete(struct accel_mlx5_task *mlx5_task); 238 239 static inline void 240 accel_mlx5_iov_sgl_init(struct accel_mlx5_iov_sgl *s, struct iovec *iov, uint32_t iovcnt) 241 { 242 s->iov = iov; 243 s->iovcnt = iovcnt; 244 s->iov_offset = 0; 245 } 246 247 static inline void 248 accel_mlx5_iov_sgl_advance(struct accel_mlx5_iov_sgl *s, uint32_t step) 249 { 250 s->iov_offset += step; 251 while (s->iovcnt > 0) { 252 assert(s->iov != NULL); 253 if (s->iov_offset < s->iov->iov_len) { 254 break; 255 } 256 257 s->iov_offset -= s->iov->iov_len; 258 s->iov++; 259 s->iovcnt--; 260 } 261 } 262 263 static inline void 264 accel_mlx5_iov_sgl_unwind(struct accel_mlx5_iov_sgl *s, uint32_t max_iovs, uint32_t step) 265 { 266 SPDK_DEBUGLOG(accel_mlx5, "iov %p, iovcnt %u, max %u, offset %u, step %u\n", s->iov, s->iovcnt, 267 max_iovs, s->iov_offset, step); 268 while (s->iovcnt <= max_iovs) { 269 assert(s->iov != NULL); 270 if (s->iov_offset >= step) { 271 s->iov_offset -= step; 272 SPDK_DEBUGLOG(accel_mlx5, "\tEND, iov %p, iovcnt %u, offset %u\n", s->iov, s->iovcnt, 273 s->iov_offset); 274 return; 275 } 276 step -= s->iov_offset; 277 s->iov--; 278 s->iovcnt++; 279 s->iov_offset = s->iov->iov_len; 280 SPDK_DEBUGLOG(accel_mlx5, "\tiov %p, iovcnt %u, offset %u, step %u\n", s->iov, s->iovcnt, 281 s->iov_offset, step); 282 } 283 284 SPDK_ERRLOG("Can't unwind iovs, remaining %u\n", step); 285 assert(0); 286 } 287 288 static inline int 289 accel_mlx5_sge_unwind(struct ibv_sge *sge, uint32_t sge_count, uint32_t step) 290 { 291 int i; 292 293 assert(sge_count > 0); 294 SPDK_DEBUGLOG(accel_mlx5, "sge %p, count %u, step %u\n", sge, sge_count, step); 295 for (i = (int)sge_count - 1; i >= 0; i--) { 296 if (sge[i].length > step) { 297 sge[i].length -= step; 298 SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step); 299 return (int)i + 1; 300 } 301 SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step); 302 step -= sge[i].length; 303 } 304 305 SPDK_ERRLOG("Can't unwind sge, remaining %u\n", step); 306 assert(step == 0); 307 308 return 0; 309 } 310 311 static inline void 312 accel_mlx5_crypto_task_complete(struct accel_mlx5_task *task) 313 { 314 struct accel_mlx5_dev *dev = task->qp->dev; 315 316 assert(task->num_ops); 317 spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops); 318 spdk_accel_task_complete(&task->base, 0); 319 } 320 321 static inline void 322 accel_mlx5_task_fail(struct accel_mlx5_task *task, int rc) 323 { 324 struct accel_mlx5_dev *dev = task->qp->dev; 325 struct spdk_accel_task *next; 326 struct spdk_accel_sequence *seq; 327 bool driver_seq; 328 329 assert(task->num_reqs == task->num_completed_reqs); 330 SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->base.op_code, rc); 331 332 if (task->num_ops) { 333 if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO || task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO_MKEY) { 334 spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops); 335 } 336 if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRC32C) { 337 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops); 338 spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv); 339 } 340 } 341 next = spdk_accel_sequence_next_task(&task->base); 342 seq = task->base.seq; 343 driver_seq = task->driver_seq; 344 345 assert(task->num_reqs == task->num_completed_reqs); 346 SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->mlx5_opcode, rc); 347 spdk_accel_task_complete(&task->base, rc); 348 349 if (driver_seq) { 350 struct spdk_io_channel *ch = task->qp->dev->ch; 351 352 assert(seq); 353 if (next) { 354 accel_mlx5_execute_sequence(ch, seq); 355 } else { 356 spdk_accel_sequence_continue(seq); 357 } 358 } 359 } 360 361 static int 362 accel_mlx5_translate_addr(void *addr, size_t size, struct spdk_memory_domain *domain, 363 void *domain_ctx, struct accel_mlx5_dev *dev, struct ibv_sge *sge) 364 { 365 struct spdk_rdma_utils_memory_translation map_translation; 366 struct spdk_memory_domain_translation_result domain_translation; 367 struct spdk_memory_domain_translation_ctx local_ctx; 368 int rc; 369 370 if (domain) { 371 domain_translation.size = sizeof(struct spdk_memory_domain_translation_result); 372 local_ctx.size = sizeof(local_ctx); 373 local_ctx.rdma.ibv_qp = dev->qp.verbs_qp; 374 rc = spdk_memory_domain_translate_data(domain, domain_ctx, dev->dev_ctx->domain, 375 &local_ctx, addr, size, &domain_translation); 376 if (spdk_unlikely(rc || domain_translation.iov_count != 1)) { 377 SPDK_ERRLOG("Memory domain translation failed, addr %p, length %zu, iovcnt %u\n", addr, size, 378 domain_translation.iov_count); 379 if (rc == 0) { 380 rc = -EINVAL; 381 } 382 383 return rc; 384 } 385 sge->lkey = domain_translation.rdma.lkey; 386 sge->addr = (uint64_t) domain_translation.iov.iov_base; 387 sge->length = domain_translation.iov.iov_len; 388 } else { 389 rc = spdk_rdma_utils_get_translation(dev->mmap, addr, size, 390 &map_translation); 391 if (spdk_unlikely(rc)) { 392 SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", addr, size); 393 return rc; 394 } 395 sge->lkey = spdk_rdma_utils_memory_translation_get_lkey(&map_translation); 396 sge->addr = (uint64_t)addr; 397 sge->length = size; 398 } 399 400 return 0; 401 } 402 403 static inline int 404 accel_mlx5_fill_block_sge(struct accel_mlx5_dev *dev, struct ibv_sge *sge, 405 struct accel_mlx5_iov_sgl *iovs, uint32_t len, uint32_t *_remaining, 406 struct spdk_memory_domain *domain, void *domain_ctx) 407 { 408 void *addr; 409 uint32_t remaining = len; 410 uint32_t size; 411 int i = 0; 412 int rc; 413 414 while (remaining && i < (int)ACCEL_MLX5_MAX_SGE) { 415 size = spdk_min(remaining, iovs->iov->iov_len - iovs->iov_offset); 416 addr = (void *)iovs->iov->iov_base + iovs->iov_offset; 417 rc = accel_mlx5_translate_addr(addr, size, domain, domain_ctx, dev, &sge[i]); 418 if (spdk_unlikely(rc)) { 419 return rc; 420 } 421 SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d]: lkey %u, len %u, addr %"PRIx64"\n", i, sge[i].lkey, 422 sge[i].length, sge[i].addr); 423 accel_mlx5_iov_sgl_advance(iovs, size); 424 i++; 425 assert(remaining >= size); 426 remaining -= size; 427 } 428 *_remaining = remaining; 429 430 return i; 431 } 432 433 static inline bool 434 accel_mlx5_compare_iovs(struct iovec *v1, struct iovec *v2, uint32_t iovcnt) 435 { 436 return memcmp(v1, v2, sizeof(*v1) * iovcnt) == 0; 437 } 438 439 static inline uint16_t 440 accel_mlx5_dev_get_available_slots(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp) 441 { 442 assert(qp->wrs_max >= qp->wrs_submitted); 443 assert(dev->wrs_in_cq_max >= dev->wrs_in_cq); 444 445 /* Each time we produce only 1 CQE, so we need 1 CQ slot */ 446 if (spdk_unlikely(dev->wrs_in_cq == dev->wrs_in_cq_max)) { 447 return 0; 448 } 449 450 return qp->wrs_max - qp->wrs_submitted; 451 } 452 453 static inline uint32_t 454 accel_mlx5_task_alloc_mkeys(struct accel_mlx5_task *task, struct spdk_mlx5_mkey_pool *pool) 455 { 456 uint32_t num_ops; 457 int rc; 458 459 assert(task->num_reqs > task->num_completed_reqs); 460 num_ops = task->num_reqs - task->num_completed_reqs; 461 num_ops = spdk_min(num_ops, ACCEL_MLX5_MAX_MKEYS_IN_TASK); 462 if (!num_ops) { 463 return 0; 464 } 465 rc = spdk_mlx5_mkey_pool_get_bulk(pool, task->mkeys, num_ops); 466 if (spdk_unlikely(rc)) { 467 return 0; 468 } 469 assert(num_ops <= UINT16_MAX); 470 task->num_ops = num_ops; 471 472 return num_ops; 473 } 474 475 static inline uint8_t 476 bs_to_bs_selector(uint32_t bs) 477 { 478 switch (bs) { 479 case 512: 480 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_512; 481 case 520: 482 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_520; 483 case 4096: 484 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4096; 485 case 4160: 486 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4160; 487 default: 488 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED; 489 } 490 } 491 492 static inline int 493 accel_mlx5_configure_crypto_umr(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge, 494 uint32_t mkey, uint32_t num_blocks, struct spdk_mlx5_crypto_dek_data *dek_data, 495 uint64_t wr_id, uint32_t flags) 496 { 497 struct spdk_mlx5_umr_crypto_attr cattr; 498 struct spdk_mlx5_umr_attr umr_attr; 499 struct accel_mlx5_qp *qp = mlx5_task->qp; 500 struct accel_mlx5_dev *dev = qp->dev; 501 struct spdk_accel_task *task = &mlx5_task->base; 502 uint32_t length, remaining = 0, block_size = task->block_size; 503 int rc; 504 505 length = num_blocks * block_size; 506 SPDK_DEBUGLOG(accel_mlx5, "task %p, domain %p, len %u, blocks %u\n", task, task->src_domain, length, 507 num_blocks); 508 rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src, length, &remaining, 509 task->src_domain, task->src_domain_ctx); 510 if (spdk_unlikely(rc <= 0)) { 511 if (rc == 0) { 512 rc = -EINVAL; 513 } 514 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 515 return rc; 516 } 517 sge->src_sge_count = rc; 518 if (spdk_unlikely(remaining)) { 519 uint32_t new_len = length - remaining; 520 uint32_t aligned_len, updated_num_blocks; 521 522 SPDK_DEBUGLOG(accel_mlx5, "Incorrect src iovs, handled %u out of %u bytes\n", new_len, length); 523 if (new_len < block_size) { 524 /* We need to process at least 1 block. If buffer is too fragmented, we can't do 525 * anything */ 526 return -ERANGE; 527 } 528 529 /* Regular integer division, we need to round down to prev block size */ 530 updated_num_blocks = new_len / block_size; 531 assert(updated_num_blocks); 532 assert(updated_num_blocks < num_blocks); 533 aligned_len = updated_num_blocks * block_size; 534 535 if (aligned_len < new_len) { 536 uint32_t dt = new_len - aligned_len; 537 538 /* We can't process part of block, need to unwind src iov_sgl and sge to the 539 * prev block boundary */ 540 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt); 541 accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt); 542 sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt); 543 if (!sge->src_sge_count) { 544 return -ERANGE; 545 } 546 } 547 SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len); 548 length = aligned_len; 549 num_blocks = updated_num_blocks; 550 } 551 552 cattr.xts_iv = task->iv + mlx5_task->num_processed_blocks; 553 cattr.keytag = 0; 554 cattr.dek_obj_id = dek_data->dek_obj_id; 555 cattr.tweak_mode = dek_data->tweak_mode; 556 cattr.enc_order = mlx5_task->enc_order; 557 cattr.bs_selector = bs_to_bs_selector(mlx5_task->base.block_size); 558 if (spdk_unlikely(cattr.bs_selector == SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED)) { 559 SPDK_ERRLOG("unsupported block size %u\n", mlx5_task->base.block_size); 560 return -EINVAL; 561 } 562 umr_attr.mkey = mkey; 563 umr_attr.sge = sge->src_sge; 564 565 if (!mlx5_task->inplace) { 566 SPDK_DEBUGLOG(accel_mlx5, "task %p, dst sge, domain %p, len %u\n", task, task->dst_domain, length); 567 rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, length, &remaining, 568 task->dst_domain, task->dst_domain_ctx); 569 if (spdk_unlikely(rc <= 0)) { 570 if (rc == 0) { 571 rc = -EINVAL; 572 } 573 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 574 return rc; 575 } 576 sge->dst_sge_count = rc; 577 if (spdk_unlikely(remaining)) { 578 uint32_t new_len = length - remaining; 579 uint32_t aligned_len, updated_num_blocks, dt; 580 581 SPDK_DEBUGLOG(accel_mlx5, "Incorrect dst iovs, handled %u out of %u bytes\n", new_len, length); 582 if (new_len < block_size) { 583 /* We need to process at least 1 block. If buffer is too fragmented, we can't do 584 * anything */ 585 return -ERANGE; 586 } 587 588 /* Regular integer division, we need to round down to prev block size */ 589 updated_num_blocks = new_len / block_size; 590 assert(updated_num_blocks); 591 assert(updated_num_blocks < num_blocks); 592 aligned_len = updated_num_blocks * block_size; 593 594 if (aligned_len < new_len) { 595 dt = new_len - aligned_len; 596 assert(dt > 0 && dt < length); 597 /* We can't process part of block, need to unwind src and dst iov_sgl and sge to the 598 * prev block boundary */ 599 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind dst sge for %u bytes\n", task, dt); 600 accel_mlx5_iov_sgl_unwind(&mlx5_task->dst, task->d.iovcnt, dt); 601 sge->dst_sge_count = accel_mlx5_sge_unwind(sge->dst_sge, sge->dst_sge_count, dt); 602 assert(sge->dst_sge_count > 0 && sge->dst_sge_count <= ACCEL_MLX5_MAX_SGE); 603 if (!sge->dst_sge_count) { 604 return -ERANGE; 605 } 606 } 607 assert(length > aligned_len); 608 dt = length - aligned_len; 609 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt); 610 /* The same for src iov_sgl and sge. In worst case we can unwind SRC 2 times */ 611 accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt); 612 sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt); 613 assert(sge->src_sge_count > 0 && sge->src_sge_count <= ACCEL_MLX5_MAX_SGE); 614 if (!sge->src_sge_count) { 615 return -ERANGE; 616 } 617 SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len); 618 length = aligned_len; 619 num_blocks = updated_num_blocks; 620 } 621 } 622 623 SPDK_DEBUGLOG(accel_mlx5, 624 "task %p: bs %u, iv %"PRIu64", enc_on_tx %d, tweak_mode %d, len %u, mkey %x, blocks %u\n", 625 mlx5_task, task->block_size, cattr.xts_iv, mlx5_task->enc_order, cattr.tweak_mode, length, mkey, 626 num_blocks); 627 628 umr_attr.sge_count = sge->src_sge_count; 629 umr_attr.umr_len = length; 630 assert((uint32_t)mlx5_task->num_processed_blocks + num_blocks <= UINT16_MAX); 631 mlx5_task->num_processed_blocks += num_blocks; 632 633 rc = spdk_mlx5_umr_configure_crypto(qp->qp, &umr_attr, &cattr, wr_id, flags); 634 635 return rc; 636 } 637 638 static inline int 639 accel_mlx5_crypto_task_process(struct accel_mlx5_task *mlx5_task) 640 { 641 struct accel_mlx5_sge sges[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 642 struct spdk_mlx5_crypto_dek_data dek_data; 643 struct accel_mlx5_qp *qp = mlx5_task->qp; 644 struct accel_mlx5_dev *dev = qp->dev; 645 /* First RDMA after UMR must have a SMALL_FENCE */ 646 uint32_t first_rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE; 647 uint16_t num_blocks; 648 uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs, 649 mlx5_task->num_ops); 650 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 651 uint16_t i; 652 int rc; 653 654 assert(qp_slot > 1); 655 num_ops = spdk_min(num_ops, qp_slot >> 1); 656 if (spdk_unlikely(!num_ops)) { 657 return -EINVAL; 658 } 659 660 rc = spdk_mlx5_crypto_get_dek_data(mlx5_task->base.crypto_key->priv, dev->dev_ctx->pd, &dek_data); 661 if (spdk_unlikely(rc)) { 662 return rc; 663 } 664 665 mlx5_task->num_wrs = 0; 666 SPDK_DEBUGLOG(accel_mlx5, "begin, task, %p, reqs: total %u, submitted %u, completed %u\n", 667 mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 668 for (i = 0; i < num_ops; i++) { 669 if (mlx5_task->num_submitted_reqs + i + 1 == mlx5_task->num_reqs) { 670 /* Last request may consume less than calculated if crypto_multi_block is true */ 671 assert(mlx5_task->num_blocks > mlx5_task->num_submitted_reqs); 672 num_blocks = mlx5_task->num_blocks - mlx5_task->num_processed_blocks; 673 } else { 674 num_blocks = mlx5_task->blocks_per_req; 675 } 676 677 rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sges[i], mlx5_task->mkeys[i]->mkey, num_blocks, 678 &dek_data, 0, 0); 679 if (spdk_unlikely(rc)) { 680 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 681 return rc; 682 } 683 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 684 dev->stats.crypto_umrs++; 685 } 686 687 /* Loop `num_ops - 1` for easy flags handling */ 688 for (i = 0; i < num_ops - 1; i++) { 689 /* UMR is used as a destination for RDMA_READ - from UMR to sge */ 690 if (mlx5_task->inplace) { 691 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0, 692 mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence); 693 } else { 694 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0, 695 mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence); 696 } 697 if (spdk_unlikely(rc)) { 698 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 699 return rc; 700 } 701 702 first_rdma_fence = 0; 703 assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs); 704 assert(mlx5_task->num_submitted_reqs < UINT16_MAX); 705 mlx5_task->num_submitted_reqs++; 706 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 707 dev->stats.rdma_reads++; 708 } 709 710 if (mlx5_task->inplace) { 711 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0, 712 mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 713 } else { 714 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0, 715 mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 716 } 717 if (spdk_unlikely(rc)) { 718 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 719 return rc; 720 } 721 722 assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs); 723 assert(mlx5_task->num_submitted_reqs < UINT16_MAX); 724 mlx5_task->num_submitted_reqs++; 725 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 726 dev->stats.rdma_reads++; 727 STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link); 728 729 if (spdk_unlikely(mlx5_task->num_submitted_reqs == mlx5_task->num_reqs && 730 mlx5_task->num_blocks > mlx5_task->num_processed_blocks)) { 731 /* We hit "out of sge 732 * entries" case with highly fragmented payload. In that case 733 * accel_mlx5_configure_crypto_umr function handled fewer data blocks than expected 734 * That means we need at least 1 more request to complete this task, this request will be 735 * executed once all submitted ones are completed */ 736 SPDK_DEBUGLOG(accel_mlx5, "task %p, processed %u/%u blocks, add extra req\n", mlx5_task, 737 mlx5_task->num_processed_blocks, mlx5_task->num_blocks); 738 mlx5_task->num_reqs++; 739 } 740 741 SPDK_DEBUGLOG(accel_mlx5, "end, task, %p, reqs: total %u, submitted %u, completed %u\n", mlx5_task, 742 mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 743 744 return 0; 745 } 746 747 static inline int 748 accel_mlx5_crypto_task_continue(struct accel_mlx5_task *task) 749 { 750 struct accel_mlx5_qp *qp = task->qp; 751 struct accel_mlx5_dev *dev = qp->dev; 752 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 753 754 assert(task->num_reqs > task->num_completed_reqs); 755 if (task->num_ops == 0) { 756 /* No mkeys allocated, try to allocate now */ 757 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task, dev->crypto_mkeys))) { 758 /* Pool is empty, queue this task */ 759 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 760 dev->stats.nomem_mkey++; 761 return -ENOMEM; 762 } 763 } 764 /* We need to post at least 1 UMR and 1 RDMA operation */ 765 if (spdk_unlikely(qp_slot < 2)) { 766 /* QP is full, queue this task */ 767 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 768 task->qp->dev->stats.nomem_qdepth++; 769 return -ENOMEM; 770 } 771 772 return accel_mlx5_crypto_task_process(task); 773 } 774 775 static inline int 776 accel_mlx5_crypto_task_init(struct accel_mlx5_task *mlx5_task) 777 { 778 struct spdk_accel_task *task = &mlx5_task->base; 779 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 780 uint64_t src_nbytes = task->nbytes; 781 #ifdef DEBUG 782 uint64_t dst_nbytes; 783 uint32_t i; 784 #endif 785 bool crypto_key_ok; 786 787 crypto_key_ok = (task->crypto_key && task->crypto_key->module_if == &g_accel_mlx5.module && 788 task->crypto_key->priv); 789 if (spdk_unlikely((task->nbytes % mlx5_task->base.block_size != 0) || !crypto_key_ok)) { 790 if (crypto_key_ok) { 791 SPDK_ERRLOG("src length %"PRIu64" is not a multiple of the block size %u\n", task->nbytes, 792 mlx5_task->base.block_size); 793 } else { 794 SPDK_ERRLOG("Wrong crypto key provided\n"); 795 } 796 return -EINVAL; 797 } 798 799 assert(src_nbytes / mlx5_task->base.block_size <= UINT16_MAX); 800 mlx5_task->num_blocks = src_nbytes / mlx5_task->base.block_size; 801 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 802 if (task->d.iovcnt == 0 || (task->d.iovcnt == task->s.iovcnt && 803 accel_mlx5_compare_iovs(task->d.iovs, task->s.iovs, task->s.iovcnt))) { 804 mlx5_task->inplace = 1; 805 } else { 806 #ifdef DEBUG 807 dst_nbytes = 0; 808 for (i = 0; i < task->d.iovcnt; i++) { 809 dst_nbytes += task->d.iovs[i].iov_len; 810 } 811 812 if (spdk_unlikely(src_nbytes != dst_nbytes)) { 813 return -EINVAL; 814 } 815 #endif 816 mlx5_task->inplace = 0; 817 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 818 } 819 820 if (dev->crypto_multi_block) { 821 if (dev->crypto_split_blocks) { 822 assert(SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks) <= UINT16_MAX); 823 mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks); 824 /* Last req may consume less blocks */ 825 mlx5_task->blocks_per_req = spdk_min(mlx5_task->num_blocks, dev->crypto_split_blocks); 826 } else { 827 if (task->s.iovcnt > ACCEL_MLX5_MAX_SGE || task->d.iovcnt > ACCEL_MLX5_MAX_SGE) { 828 uint32_t max_sge_count = spdk_max(task->s.iovcnt, task->d.iovcnt); 829 830 assert(SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE) <= UINT16_MAX); 831 mlx5_task->num_reqs = SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE); 832 mlx5_task->blocks_per_req = SPDK_CEIL_DIV(mlx5_task->num_blocks, mlx5_task->num_reqs); 833 } else { 834 mlx5_task->num_reqs = 1; 835 mlx5_task->blocks_per_req = mlx5_task->num_blocks; 836 } 837 } 838 } else { 839 mlx5_task->num_reqs = mlx5_task->num_blocks; 840 mlx5_task->blocks_per_req = 1; 841 } 842 843 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(mlx5_task, dev->crypto_mkeys))) { 844 /* Pool is empty, queue this task */ 845 SPDK_DEBUGLOG(accel_mlx5, "no reqs in pool, dev %s\n", dev->dev_ctx->context->device->name); 846 dev->stats.nomem_mkey++; 847 return -ENOMEM; 848 } 849 if (spdk_unlikely(accel_mlx5_dev_get_available_slots(dev, &dev->qp) < 2)) { 850 /* Queue is full, queue this task */ 851 SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", dev->dev_ctx->context->device->name, 852 mlx5_task->qp); 853 dev->stats.nomem_qdepth++; 854 return -ENOMEM; 855 } 856 857 SPDK_DEBUGLOG(accel_mlx5, "task %p, src_iovs %u, dst_iovs %u, num_reqs %u, " 858 "blocks/req %u, blocks %u, inplace %d\n", task, task->s.iovcnt, task->d.iovcnt, 859 mlx5_task->num_reqs, mlx5_task->blocks_per_req, mlx5_task->num_blocks, mlx5_task->inplace); 860 861 return 0; 862 } 863 864 static inline void 865 accel_mlx5_copy_task_complete(struct accel_mlx5_task *mlx5_task) 866 { 867 spdk_accel_task_complete(&mlx5_task->base, 0); 868 } 869 870 static inline int 871 accel_mlx5_copy_task_process_one(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_qp *qp, 872 uint64_t wrid, uint32_t fence) 873 { 874 struct spdk_accel_task *task = &mlx5_task->base; 875 struct accel_mlx5_sge sge; 876 uint32_t remaining = 0; 877 uint32_t dst_len; 878 int rc; 879 880 /* Limit one RDMA_WRITE by length of dst buffer. Not all src buffers may fit into one dst buffer due to 881 * limitation on ACCEL_MLX5_MAX_SGE. If this is the case then remaining is not zero */ 882 assert(mlx5_task->dst.iov->iov_len > mlx5_task->dst.iov_offset); 883 dst_len = mlx5_task->dst.iov->iov_len - mlx5_task->dst.iov_offset; 884 rc = accel_mlx5_fill_block_sge(qp->dev, sge.src_sge, &mlx5_task->src, dst_len, &remaining, 885 task->src_domain, task->src_domain_ctx); 886 if (spdk_unlikely(rc <= 0)) { 887 if (rc == 0) { 888 rc = -EINVAL; 889 } 890 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 891 return rc; 892 } 893 sge.src_sge_count = rc; 894 assert(dst_len > remaining); 895 dst_len -= remaining; 896 897 rc = accel_mlx5_fill_block_sge(qp->dev, sge.dst_sge, &mlx5_task->dst, dst_len, &remaining, 898 task->dst_domain, task->dst_domain_ctx); 899 if (spdk_unlikely(rc != 1)) { 900 /* We use single dst entry, any result other than 1 is an error */ 901 if (rc == 0) { 902 rc = -EINVAL; 903 } 904 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 905 return rc; 906 } 907 if (spdk_unlikely(remaining)) { 908 SPDK_ERRLOG("Incorrect dst length, remaining %u\n", remaining); 909 assert(0); 910 return -EINVAL; 911 } 912 913 rc = spdk_mlx5_qp_rdma_write(mlx5_task->qp->qp, sge.src_sge, sge.src_sge_count, 914 sge.dst_sge[0].addr, sge.dst_sge[0].lkey, wrid, fence); 915 if (spdk_unlikely(rc)) { 916 SPDK_ERRLOG("new RDMA WRITE failed with %d\n", rc); 917 return rc; 918 } 919 qp->dev->stats.rdma_writes++; 920 921 return 0; 922 } 923 924 static inline int 925 accel_mlx5_copy_task_process(struct accel_mlx5_task *mlx5_task) 926 { 927 928 struct accel_mlx5_qp *qp = mlx5_task->qp; 929 struct accel_mlx5_dev *dev = qp->dev; 930 uint16_t i; 931 int rc; 932 933 mlx5_task->num_wrs = 0; 934 assert(mlx5_task->num_reqs > 0); 935 assert(mlx5_task->num_ops > 0); 936 937 /* Handle n-1 reqs in order to simplify wrid and fence handling */ 938 for (i = 0; i < mlx5_task->num_ops - 1; i++) { 939 rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, 0, 0); 940 if (spdk_unlikely(rc)) { 941 return rc; 942 } 943 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 944 mlx5_task->num_submitted_reqs++; 945 } 946 947 rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, (uint64_t)mlx5_task, 948 SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 949 if (spdk_unlikely(rc)) { 950 return rc; 951 } 952 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 953 mlx5_task->num_submitted_reqs++; 954 STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link); 955 956 SPDK_DEBUGLOG(accel_mlx5, "end, copy task, %p\n", mlx5_task); 957 958 return 0; 959 } 960 961 static inline int 962 accel_mlx5_copy_task_continue(struct accel_mlx5_task *task) 963 { 964 struct accel_mlx5_qp *qp = task->qp; 965 struct accel_mlx5_dev *dev = qp->dev; 966 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 967 968 task->num_ops = spdk_min(qp_slot, task->num_reqs - task->num_completed_reqs); 969 if (spdk_unlikely(task->num_ops == 0)) { 970 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 971 dev->stats.nomem_qdepth++; 972 return -ENOMEM; 973 } 974 return accel_mlx5_copy_task_process(task); 975 } 976 977 static inline uint32_t 978 accel_mlx5_get_copy_task_count(struct iovec *src_iov, uint32_t src_iovcnt, 979 struct iovec *dst_iov, uint32_t dst_iovcnt) 980 { 981 uint32_t src = 0; 982 uint32_t dst = 0; 983 uint64_t src_offset = 0; 984 uint64_t dst_offset = 0; 985 uint32_t num_ops = 0; 986 uint32_t src_sge_count = 0; 987 988 while (src < src_iovcnt && dst < dst_iovcnt) { 989 uint64_t src_len = src_iov[src].iov_len - src_offset; 990 uint64_t dst_len = dst_iov[dst].iov_len - dst_offset; 991 992 if (dst_len < src_len) { 993 dst_offset = 0; 994 src_offset += dst_len; 995 dst++; 996 num_ops++; 997 src_sge_count = 0; 998 } else if (src_len < dst_len) { 999 dst_offset += src_len; 1000 src_offset = 0; 1001 src++; 1002 if (++src_sge_count >= ACCEL_MLX5_MAX_SGE) { 1003 num_ops++; 1004 src_sge_count = 0; 1005 } 1006 } else { 1007 dst_offset = 0; 1008 src_offset = 0; 1009 dst++; 1010 src++; 1011 num_ops++; 1012 src_sge_count = 0; 1013 } 1014 } 1015 1016 assert(src == src_iovcnt); 1017 assert(dst == dst_iovcnt); 1018 assert(src_offset == 0); 1019 assert(dst_offset == 0); 1020 return num_ops; 1021 } 1022 1023 static inline int 1024 accel_mlx5_copy_task_init(struct accel_mlx5_task *mlx5_task) 1025 { 1026 struct spdk_accel_task *task = &mlx5_task->base; 1027 struct accel_mlx5_qp *qp = mlx5_task->qp; 1028 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp); 1029 1030 if (spdk_likely(task->s.iovcnt <= ACCEL_MLX5_MAX_SGE)) { 1031 mlx5_task->num_reqs = task->d.iovcnt; 1032 } else if (task->d.iovcnt == 1) { 1033 mlx5_task->num_reqs = SPDK_CEIL_DIV(task->s.iovcnt, ACCEL_MLX5_MAX_SGE); 1034 } else { 1035 mlx5_task->num_reqs = accel_mlx5_get_copy_task_count(task->s.iovs, task->s.iovcnt, 1036 task->d.iovs, task->d.iovcnt); 1037 } 1038 mlx5_task->inplace = 0; 1039 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 1040 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 1041 mlx5_task->num_ops = spdk_min(qp_slot, mlx5_task->num_reqs); 1042 if (spdk_unlikely(!mlx5_task->num_ops)) { 1043 qp->dev->stats.nomem_qdepth++; 1044 return -ENOMEM; 1045 } 1046 SPDK_DEBUGLOG(accel_mlx5, "copy task num_reqs %u, num_ops %u\n", mlx5_task->num_reqs, 1047 mlx5_task->num_ops); 1048 1049 return 0; 1050 } 1051 1052 static inline uint32_t 1053 accel_mlx5_advance_iovec(struct iovec *iov, uint32_t iovcnt, size_t *iov_offset, size_t *len) 1054 { 1055 uint32_t i; 1056 size_t iov_len; 1057 1058 for (i = 0; *len != 0 && i < iovcnt; i++) { 1059 iov_len = iov[i].iov_len - *iov_offset; 1060 1061 if (iov_len < *len) { 1062 *iov_offset = 0; 1063 *len -= iov_len; 1064 continue; 1065 } 1066 if (iov_len == *len) { 1067 *iov_offset = 0; 1068 i++; 1069 } else { /* iov_len > *len */ 1070 *iov_offset += *len; 1071 } 1072 *len = 0; 1073 break; 1074 } 1075 1076 return i; 1077 } 1078 1079 static inline void 1080 accel_mlx5_crc_task_complete(struct accel_mlx5_task *mlx5_task) 1081 { 1082 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 1083 1084 *mlx5_task->base.crc_dst = mlx5_task->psv->crc ^ UINT32_MAX; 1085 /* Normal task completion without allocated mkeys is not possible */ 1086 assert(mlx5_task->num_ops); 1087 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, mlx5_task->mkeys, mlx5_task->num_ops); 1088 spdk_mempool_put(dev->dev_ctx->psv_pool, mlx5_task->psv); 1089 spdk_accel_task_complete(&mlx5_task->base, 0); 1090 } 1091 1092 static inline int 1093 accel_mlx5_crc_task_configure_umr(struct accel_mlx5_task *mlx5_task, struct ibv_sge *sge, 1094 uint32_t sge_count, struct spdk_mlx5_mkey_pool_obj *mkey, 1095 enum spdk_mlx5_umr_sig_domain sig_domain, uint32_t umr_len, 1096 bool sig_init, bool sig_check_gen) 1097 { 1098 struct spdk_mlx5_umr_sig_attr sattr = { 1099 .seed = mlx5_task->base.seed ^ UINT32_MAX, 1100 .psv_index = mlx5_task->psv->psv_index, 1101 .domain = sig_domain, 1102 .sigerr_count = mkey->sig.sigerr_count, 1103 .raw_data_size = umr_len, 1104 .init = sig_init, 1105 .check_gen = sig_check_gen, 1106 }; 1107 struct spdk_mlx5_umr_attr umr_attr = { 1108 .mkey = mkey->mkey, 1109 .umr_len = umr_len, 1110 .sge_count = sge_count, 1111 .sge = sge, 1112 }; 1113 1114 return spdk_mlx5_umr_configure_sig(mlx5_task->qp->qp, &umr_attr, &sattr, 0, 0); 1115 } 1116 1117 static inline int 1118 accel_mlx5_crc_task_fill_sge(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge) 1119 { 1120 struct spdk_accel_task *task = &mlx5_task->base; 1121 struct accel_mlx5_qp *qp = mlx5_task->qp; 1122 struct accel_mlx5_dev *dev = qp->dev; 1123 uint32_t remaining; 1124 int rc; 1125 1126 rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src, task->nbytes, &remaining, 1127 task->src_domain, task->src_domain_ctx); 1128 if (spdk_unlikely(rc <= 0)) { 1129 if (rc == 0) { 1130 rc = -EINVAL; 1131 } 1132 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 1133 return rc; 1134 } 1135 assert(remaining == 0); 1136 sge->src_sge_count = rc; 1137 1138 if (!mlx5_task->inplace) { 1139 rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, task->nbytes, &remaining, 1140 task->dst_domain, task->dst_domain_ctx); 1141 if (spdk_unlikely(rc <= 0)) { 1142 if (rc == 0) { 1143 rc = -EINVAL; 1144 } 1145 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 1146 return rc; 1147 } 1148 assert(remaining == 0); 1149 sge->dst_sge_count = rc; 1150 } 1151 1152 return 0; 1153 } 1154 1155 static inline int 1156 accel_mlx5_crc_task_process_one_req(struct accel_mlx5_task *mlx5_task) 1157 { 1158 struct accel_mlx5_sge sges; 1159 struct accel_mlx5_qp *qp = mlx5_task->qp; 1160 struct accel_mlx5_dev *dev = qp->dev; 1161 uint32_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs, 1162 mlx5_task->num_ops); 1163 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1164 uint32_t rdma_fence = SPDK_MLX5_WQE_CTRL_STRONG_ORDERING; 1165 struct ibv_sge *sge; 1166 int rc; 1167 uint16_t sge_count; 1168 1169 num_ops = spdk_min(num_ops, qp_slot >> 1); 1170 if (spdk_unlikely(!num_ops)) { 1171 return -EINVAL; 1172 } 1173 1174 mlx5_task->num_wrs = 0; 1175 /* At this moment we have as many requests as can be submitted to a qp */ 1176 rc = accel_mlx5_crc_task_fill_sge(mlx5_task, &sges); 1177 if (spdk_unlikely(rc)) { 1178 return rc; 1179 } 1180 rc = accel_mlx5_crc_task_configure_umr(mlx5_task, sges.src_sge, sges.src_sge_count, 1181 mlx5_task->mkeys[0], SPDK_MLX5_UMR_SIG_DOMAIN_WIRE, mlx5_task->base.nbytes, true, true); 1182 if (spdk_unlikely(rc)) { 1183 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 1184 return rc; 1185 } 1186 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1187 dev->stats.sig_umrs++; 1188 1189 if (mlx5_task->inplace) { 1190 sge = sges.src_sge; 1191 sge_count = sges.src_sge_count; 1192 } else { 1193 sge = sges.dst_sge; 1194 sge_count = sges.dst_sge_count; 1195 } 1196 1197 /* 1198 * Add the crc destination to the end of sges. A free entry must be available for CRC 1199 * because the task init function reserved it. 1200 */ 1201 assert(sge_count < ACCEL_MLX5_MAX_SGE); 1202 sge[sge_count].lkey = mlx5_task->psv->crc_lkey; 1203 sge[sge_count].addr = (uintptr_t)&mlx5_task->psv->crc; 1204 sge[sge_count++].length = sizeof(uint32_t); 1205 1206 if (spdk_unlikely(mlx5_task->psv->bits.error)) { 1207 rc = spdk_mlx5_qp_set_psv(qp->qp, mlx5_task->psv->psv_index, *mlx5_task->base.crc_dst, 0, 0); 1208 if (spdk_unlikely(rc)) { 1209 SPDK_ERRLOG("SET_PSV failed with %d\n", rc); 1210 return rc; 1211 } 1212 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1213 } 1214 1215 rc = spdk_mlx5_qp_rdma_read(qp->qp, sge, sge_count, 0, mlx5_task->mkeys[0]->mkey, 1216 (uint64_t)mlx5_task, rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 1217 if (spdk_unlikely(rc)) { 1218 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 1219 return rc; 1220 } 1221 mlx5_task->num_submitted_reqs++; 1222 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 1223 dev->stats.rdma_reads++; 1224 1225 return 0; 1226 } 1227 1228 static inline int 1229 accel_mlx5_crc_task_fill_umr_sge(struct accel_mlx5_qp *qp, struct ibv_sge *sge, 1230 struct accel_mlx5_iov_sgl *umr_iovs, struct spdk_memory_domain *domain, 1231 void *domain_ctx, struct accel_mlx5_iov_sgl *rdma_iovs, size_t *len) 1232 { 1233 int umr_idx = 0; 1234 int rdma_idx = 0; 1235 int umr_iovcnt = spdk_min(umr_iovs->iovcnt, (int)ACCEL_MLX5_MAX_SGE); 1236 int rdma_iovcnt = spdk_min(rdma_iovs->iovcnt, (int)ACCEL_MLX5_MAX_SGE); 1237 size_t umr_iov_offset; 1238 size_t rdma_iov_offset; 1239 size_t umr_len = 0; 1240 void *sge_addr; 1241 size_t sge_len; 1242 size_t umr_sge_len; 1243 size_t rdma_sge_len; 1244 int rc; 1245 1246 umr_iov_offset = umr_iovs->iov_offset; 1247 rdma_iov_offset = rdma_iovs->iov_offset; 1248 1249 while (umr_idx < umr_iovcnt && rdma_idx < rdma_iovcnt) { 1250 umr_sge_len = umr_iovs->iov[umr_idx].iov_len - umr_iov_offset; 1251 rdma_sge_len = rdma_iovs->iov[rdma_idx].iov_len - rdma_iov_offset; 1252 sge_addr = umr_iovs->iov[umr_idx].iov_base + umr_iov_offset; 1253 1254 if (umr_sge_len == rdma_sge_len) { 1255 rdma_idx++; 1256 umr_iov_offset = 0; 1257 rdma_iov_offset = 0; 1258 sge_len = umr_sge_len; 1259 } else if (umr_sge_len < rdma_sge_len) { 1260 umr_iov_offset = 0; 1261 rdma_iov_offset += umr_sge_len; 1262 sge_len = umr_sge_len; 1263 } else { 1264 size_t remaining; 1265 1266 remaining = umr_sge_len - rdma_sge_len; 1267 while (remaining) { 1268 rdma_idx++; 1269 if (rdma_idx == (int)ACCEL_MLX5_MAX_SGE) { 1270 break; 1271 } 1272 rdma_sge_len = rdma_iovs->iov[rdma_idx].iov_len; 1273 if (remaining == rdma_sge_len) { 1274 rdma_idx++; 1275 rdma_iov_offset = 0; 1276 umr_iov_offset = 0; 1277 remaining = 0; 1278 break; 1279 } 1280 if (remaining < rdma_sge_len) { 1281 rdma_iov_offset = remaining; 1282 umr_iov_offset = 0; 1283 remaining = 0; 1284 break; 1285 } 1286 remaining -= rdma_sge_len; 1287 } 1288 sge_len = umr_sge_len - remaining; 1289 } 1290 rc = accel_mlx5_translate_addr(sge_addr, sge_len, domain, domain_ctx, qp->dev, &sge[umr_idx]); 1291 if (spdk_unlikely(rc)) { 1292 return -EINVAL; 1293 } 1294 SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d] lkey %u, addr %p, len %u\n", umr_idx, sge[umr_idx].lkey, 1295 (void *)sge[umr_idx].addr, sge[umr_idx].length); 1296 umr_len += sge_len; 1297 umr_idx++; 1298 } 1299 accel_mlx5_iov_sgl_advance(umr_iovs, umr_len); 1300 accel_mlx5_iov_sgl_advance(rdma_iovs, umr_len); 1301 *len = umr_len; 1302 1303 return umr_idx; 1304 } 1305 1306 static inline int 1307 accel_mlx5_crc_task_process_multi_req(struct accel_mlx5_task *mlx5_task) 1308 { 1309 size_t umr_len[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 1310 struct ibv_sge sges[ACCEL_MLX5_MAX_SGE]; 1311 struct spdk_accel_task *task = &mlx5_task->base; 1312 struct accel_mlx5_qp *qp = mlx5_task->qp; 1313 struct accel_mlx5_dev *dev = qp->dev; 1314 struct accel_mlx5_iov_sgl umr_sgl; 1315 struct accel_mlx5_iov_sgl *umr_sgl_ptr; 1316 struct accel_mlx5_iov_sgl rdma_sgl; 1317 uint64_t umr_offset; 1318 uint32_t rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE; 1319 int sge_count; 1320 uint32_t remaining; 1321 int rc; 1322 uint16_t i; 1323 uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs, 1324 mlx5_task->num_ops); 1325 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1326 bool sig_init, sig_check_gen = false; 1327 1328 num_ops = spdk_min(num_ops, qp_slot >> 1); 1329 if (spdk_unlikely(!num_ops)) { 1330 return -EINVAL; 1331 } 1332 /* Init signature on the first UMR */ 1333 sig_init = !mlx5_task->num_submitted_reqs; 1334 1335 /* 1336 * accel_mlx5_crc_task_fill_umr_sge() and accel_mlx5_fill_block_sge() advance an IOV during iteration 1337 * on it. We must copy accel_mlx5_iov_sgl to iterate twice or more on the same IOV. 1338 * 1339 * In the in-place case, we iterate on the source IOV three times. That's why we need two copies of 1340 * the source accel_mlx5_iov_sgl. 1341 * 1342 * In the out-of-place case, we iterate on the source IOV once and on the destination IOV two times. 1343 * So, we need one copy of the destination accel_mlx5_iov_sgl. 1344 */ 1345 if (mlx5_task->inplace) { 1346 accel_mlx5_iov_sgl_init(&umr_sgl, mlx5_task->src.iov, mlx5_task->src.iovcnt); 1347 umr_sgl_ptr = &umr_sgl; 1348 accel_mlx5_iov_sgl_init(&rdma_sgl, mlx5_task->src.iov, mlx5_task->src.iovcnt); 1349 } else { 1350 umr_sgl_ptr = &mlx5_task->src; 1351 accel_mlx5_iov_sgl_init(&rdma_sgl, mlx5_task->dst.iov, mlx5_task->dst.iovcnt); 1352 } 1353 mlx5_task->num_wrs = 0; 1354 for (i = 0; i < num_ops; i++) { 1355 /* 1356 * The last request may have only CRC. Skip UMR in this case because the MKey from 1357 * the previous request is used. 1358 */ 1359 if (umr_sgl_ptr->iovcnt == 0) { 1360 assert((mlx5_task->num_completed_reqs + i + 1) == mlx5_task->num_reqs); 1361 break; 1362 } 1363 sge_count = accel_mlx5_crc_task_fill_umr_sge(qp, sges, umr_sgl_ptr, task->src_domain, 1364 task->src_domain_ctx, &rdma_sgl, &umr_len[i]); 1365 if (spdk_unlikely(sge_count <= 0)) { 1366 rc = (sge_count == 0) ? -EINVAL : sge_count; 1367 SPDK_ERRLOG("failed set UMR sge, rc %d\n", rc); 1368 return rc; 1369 } 1370 if (umr_sgl_ptr->iovcnt == 0) { 1371 /* 1372 * We post RDMA without UMR if the last request has only CRC. We use an MKey from 1373 * the last UMR in this case. Since the last request can be postponed to the next 1374 * call of this function, we must save the MKey to the task structure. 1375 */ 1376 mlx5_task->last_umr_len = umr_len[i]; 1377 mlx5_task->last_mkey_idx = i; 1378 sig_check_gen = true; 1379 } 1380 rc = accel_mlx5_crc_task_configure_umr(mlx5_task, sges, sge_count, mlx5_task->mkeys[i], 1381 SPDK_MLX5_UMR_SIG_DOMAIN_WIRE, umr_len[i], sig_init, 1382 sig_check_gen); 1383 if (spdk_unlikely(rc)) { 1384 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 1385 return rc; 1386 } 1387 sig_init = false; 1388 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1389 dev->stats.sig_umrs++; 1390 } 1391 1392 if (spdk_unlikely(mlx5_task->psv->bits.error)) { 1393 rc = spdk_mlx5_qp_set_psv(qp->qp, mlx5_task->psv->psv_index, *mlx5_task->base.crc_dst, 0, 0); 1394 if (spdk_unlikely(rc)) { 1395 SPDK_ERRLOG("SET_PSV failed with %d\n", rc); 1396 return rc; 1397 } 1398 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1399 } 1400 1401 for (i = 0; i < num_ops - 1; i++) { 1402 if (mlx5_task->inplace) { 1403 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->src, umr_len[i], &remaining, 1404 task->src_domain, task->src_domain_ctx); 1405 } else { 1406 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->dst, umr_len[i], &remaining, 1407 task->dst_domain, task->dst_domain_ctx); 1408 } 1409 if (spdk_unlikely(sge_count <= 0)) { 1410 rc = (sge_count == 0) ? -EINVAL : sge_count; 1411 SPDK_ERRLOG("failed set RDMA sge, rc %d\n", rc); 1412 return rc; 1413 } 1414 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges, sge_count, 0, mlx5_task->mkeys[i]->mkey, 1415 0, rdma_fence); 1416 if (spdk_unlikely(rc)) { 1417 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 1418 return rc; 1419 } 1420 mlx5_task->num_submitted_reqs++; 1421 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1422 dev->stats.rdma_reads++; 1423 rdma_fence = SPDK_MLX5_WQE_CTRL_STRONG_ORDERING; 1424 } 1425 if ((mlx5_task->inplace && mlx5_task->src.iovcnt == 0) || (!mlx5_task->inplace && 1426 mlx5_task->dst.iovcnt == 0)) { 1427 /* 1428 * The last RDMA does not have any data, only CRC. It also does not have a paired Mkey. 1429 * The CRC is handled in the previous MKey in this case. 1430 */ 1431 sge_count = 0; 1432 umr_offset = mlx5_task->last_umr_len; 1433 } else { 1434 umr_offset = 0; 1435 mlx5_task->last_mkey_idx = i; 1436 if (mlx5_task->inplace) { 1437 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->src, umr_len[i], &remaining, 1438 task->src_domain, task->src_domain_ctx); 1439 } else { 1440 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->dst, umr_len[i], &remaining, 1441 task->dst_domain, task->dst_domain_ctx); 1442 } 1443 if (spdk_unlikely(sge_count <= 0)) { 1444 rc = (sge_count == 0) ? -EINVAL : sge_count; 1445 SPDK_ERRLOG("failed set RDMA sge, rc %d\n", rc); 1446 return rc; 1447 } 1448 assert(remaining == 0); 1449 } 1450 if ((mlx5_task->num_completed_reqs + i + 1) == mlx5_task->num_reqs) { 1451 /* Ensure that there is a free sge for the CRC destination. */ 1452 assert(sge_count < (int)ACCEL_MLX5_MAX_SGE); 1453 /* Add the crc destination to the end of sges. */ 1454 sges[sge_count].lkey = mlx5_task->psv->crc_lkey; 1455 sges[sge_count].addr = (uintptr_t)&mlx5_task->psv->crc; 1456 sges[sge_count++].length = sizeof(uint32_t); 1457 } 1458 rdma_fence |= SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE; 1459 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges, sge_count, umr_offset, 1460 mlx5_task->mkeys[mlx5_task->last_mkey_idx]->mkey, 1461 (uint64_t)mlx5_task, rdma_fence); 1462 if (spdk_unlikely(rc)) { 1463 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 1464 return rc; 1465 } 1466 mlx5_task->num_submitted_reqs++; 1467 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 1468 dev->stats.rdma_reads++; 1469 1470 return 0; 1471 } 1472 1473 static inline int 1474 accel_mlx5_crc_task_process(struct accel_mlx5_task *mlx5_task) 1475 { 1476 int rc; 1477 1478 assert(mlx5_task->mlx5_opcode == ACCEL_MLX5_OPC_CRC32C); 1479 1480 SPDK_DEBUGLOG(accel_mlx5, "begin, crc task, %p, reqs: total %u, submitted %u, completed %u\n", 1481 mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 1482 1483 if (mlx5_task->num_reqs == 1) { 1484 rc = accel_mlx5_crc_task_process_one_req(mlx5_task); 1485 } else { 1486 rc = accel_mlx5_crc_task_process_multi_req(mlx5_task); 1487 } 1488 1489 if (rc == 0) { 1490 STAILQ_INSERT_TAIL(&mlx5_task->qp->in_hw, mlx5_task, link); 1491 SPDK_DEBUGLOG(accel_mlx5, "end, crc task, %p, reqs: total %u, submitted %u, completed %u\n", 1492 mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, 1493 mlx5_task->num_completed_reqs); 1494 } 1495 1496 return rc; 1497 } 1498 1499 static inline int 1500 accel_mlx5_task_alloc_crc_ctx(struct accel_mlx5_task *task, uint32_t qp_slot) 1501 { 1502 struct accel_mlx5_qp *qp = task->qp; 1503 struct accel_mlx5_dev *dev = qp->dev; 1504 1505 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task, dev->sig_mkeys))) { 1506 SPDK_DEBUGLOG(accel_mlx5, "no mkeys in signature mkey pool, dev %s\n", 1507 dev->dev_ctx->context->device->name); 1508 dev->stats.nomem_mkey++; 1509 return -ENOMEM; 1510 } 1511 task->psv = spdk_mempool_get(dev->dev_ctx->psv_pool); 1512 if (spdk_unlikely(!task->psv)) { 1513 SPDK_DEBUGLOG(accel_mlx5, "no reqs in psv pool, dev %s\n", dev->dev_ctx->context->device->name); 1514 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops); 1515 task->num_ops = 0; 1516 dev->stats.nomem_mkey++; 1517 return -ENOMEM; 1518 } 1519 /* One extra slot is needed for SET_PSV WQE to reset the error state in PSV. */ 1520 if (spdk_unlikely(task->psv->bits.error)) { 1521 uint32_t n_slots = task->num_ops * 2 + 1; 1522 1523 if (qp_slot < n_slots) { 1524 spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv); 1525 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops); 1526 dev->stats.nomem_qdepth++; 1527 task->num_ops = 0; 1528 return -ENOMEM; 1529 } 1530 } 1531 1532 return 0; 1533 } 1534 1535 static inline int 1536 accel_mlx5_crc_task_continue(struct accel_mlx5_task *task) 1537 { 1538 struct accel_mlx5_qp *qp = task->qp; 1539 struct accel_mlx5_dev *dev = qp->dev; 1540 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1541 int rc; 1542 1543 assert(task->num_reqs > task->num_completed_reqs); 1544 if (task->num_ops == 0) { 1545 /* No mkeys allocated, try to allocate now. */ 1546 rc = accel_mlx5_task_alloc_crc_ctx(task, qp_slot); 1547 if (spdk_unlikely(rc)) { 1548 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1549 return -ENOMEM; 1550 } 1551 } 1552 /* We need to post at least 1 UMR and 1 RDMA operation */ 1553 if (spdk_unlikely(qp_slot < 2)) { 1554 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1555 dev->stats.nomem_qdepth++; 1556 return -ENOMEM; 1557 } 1558 1559 return accel_mlx5_crc_task_process(task); 1560 } 1561 1562 static inline uint32_t 1563 accel_mlx5_get_crc_task_count(struct iovec *src_iov, uint32_t src_iovcnt, struct iovec *dst_iov, 1564 uint32_t dst_iovcnt) 1565 { 1566 uint32_t src_idx = 0; 1567 uint32_t dst_idx = 0; 1568 uint32_t num_ops = 1; 1569 uint32_t num_src_sge = 1; 1570 uint32_t num_dst_sge = 1; 1571 size_t src_offset = 0; 1572 size_t dst_offset = 0; 1573 uint32_t num_sge; 1574 size_t src_len; 1575 size_t dst_len; 1576 1577 /* One operation is enough if both iovs fit into ACCEL_MLX5_MAX_SGE. One SGE is reserved for CRC on dst_iov. */ 1578 if (src_iovcnt <= ACCEL_MLX5_MAX_SGE && (dst_iovcnt + 1) <= ACCEL_MLX5_MAX_SGE) { 1579 return 1; 1580 } 1581 1582 while (src_idx < src_iovcnt && dst_idx < dst_iovcnt) { 1583 if (num_src_sge > ACCEL_MLX5_MAX_SGE || num_dst_sge > ACCEL_MLX5_MAX_SGE) { 1584 num_ops++; 1585 num_src_sge = 1; 1586 num_dst_sge = 1; 1587 } 1588 src_len = src_iov[src_idx].iov_len - src_offset; 1589 dst_len = dst_iov[dst_idx].iov_len - dst_offset; 1590 1591 if (src_len == dst_len) { 1592 num_src_sge++; 1593 num_dst_sge++; 1594 src_offset = 0; 1595 dst_offset = 0; 1596 src_idx++; 1597 dst_idx++; 1598 continue; 1599 } 1600 if (src_len < dst_len) { 1601 /* Advance src_iov to reach the point that corresponds to the end of the current dst_iov. */ 1602 num_sge = accel_mlx5_advance_iovec(&src_iov[src_idx], 1603 spdk_min(ACCEL_MLX5_MAX_SGE + 1 - num_src_sge, 1604 src_iovcnt - src_idx), 1605 &src_offset, &dst_len); 1606 src_idx += num_sge; 1607 num_src_sge += num_sge; 1608 if (dst_len != 0) { 1609 /* 1610 * ACCEL_MLX5_MAX_SGE is reached on src_iov, and dst_len bytes 1611 * are left on the current dst_iov. 1612 */ 1613 dst_offset = dst_iov[dst_idx].iov_len - dst_len; 1614 } else { 1615 /* The src_iov advance is completed, shift to the next dst_iov. */ 1616 dst_idx++; 1617 num_dst_sge++; 1618 dst_offset = 0; 1619 } 1620 } else { /* src_len > dst_len */ 1621 /* Advance dst_iov to reach the point that corresponds to the end of the current src_iov. */ 1622 num_sge = accel_mlx5_advance_iovec(&dst_iov[dst_idx], 1623 spdk_min(ACCEL_MLX5_MAX_SGE + 1 - num_dst_sge, 1624 dst_iovcnt - dst_idx), 1625 &dst_offset, &src_len); 1626 dst_idx += num_sge; 1627 num_dst_sge += num_sge; 1628 if (src_len != 0) { 1629 /* 1630 * ACCEL_MLX5_MAX_SGE is reached on dst_iov, and src_len bytes 1631 * are left on the current src_iov. 1632 */ 1633 src_offset = src_iov[src_idx].iov_len - src_len; 1634 } else { 1635 /* The dst_iov advance is completed, shift to the next src_iov. */ 1636 src_idx++; 1637 num_src_sge++; 1638 src_offset = 0; 1639 } 1640 } 1641 } 1642 /* An extra operation is needed if no space is left on dst_iov because CRC takes one SGE. */ 1643 if (num_dst_sge > ACCEL_MLX5_MAX_SGE) { 1644 num_ops++; 1645 } 1646 1647 /* The above loop must reach the end of both iovs simultaneously because their size is the same. */ 1648 assert(src_idx == src_iovcnt); 1649 assert(dst_idx == dst_iovcnt); 1650 assert(src_offset == 0); 1651 assert(dst_offset == 0); 1652 1653 return num_ops; 1654 } 1655 1656 static inline int 1657 accel_mlx5_crc_task_init(struct accel_mlx5_task *mlx5_task) 1658 { 1659 struct spdk_accel_task *task = &mlx5_task->base; 1660 struct accel_mlx5_qp *qp = mlx5_task->qp; 1661 uint32_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp); 1662 int rc; 1663 1664 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 1665 if (mlx5_task->inplace) { 1666 /* One entry is reserved for CRC */ 1667 mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->src.iovcnt + 1, ACCEL_MLX5_MAX_SGE); 1668 } else { 1669 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 1670 mlx5_task->num_reqs = accel_mlx5_get_crc_task_count(mlx5_task->src.iov, mlx5_task->src.iovcnt, 1671 mlx5_task->dst.iov, mlx5_task->dst.iovcnt); 1672 } 1673 1674 rc = accel_mlx5_task_alloc_crc_ctx(mlx5_task, qp_slot); 1675 if (spdk_unlikely(rc)) { 1676 return rc; 1677 } 1678 1679 if (spdk_unlikely(qp_slot < 2)) { 1680 /* Queue is full, queue this task */ 1681 SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", qp->dev->dev_ctx->context->device->name, 1682 mlx5_task->qp); 1683 qp->dev->stats.nomem_qdepth++; 1684 return -ENOMEM; 1685 } 1686 return 0; 1687 } 1688 1689 static inline int 1690 accel_mlx5_crypto_mkey_task_init(struct accel_mlx5_task *mlx5_task) 1691 { 1692 struct spdk_accel_task *task = &mlx5_task->base; 1693 struct accel_mlx5_qp *qp = mlx5_task->qp; 1694 struct accel_mlx5_dev *dev = qp->dev; 1695 uint32_t num_blocks; 1696 int rc; 1697 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1698 bool crypto_key_ok; 1699 1700 if (spdk_unlikely(task->s.iovcnt > ACCEL_MLX5_MAX_SGE)) { 1701 /* With `external mkey` we can't split task or register several UMRs */ 1702 SPDK_ERRLOG("src buffer is too fragmented\n"); 1703 return -EINVAL; 1704 } 1705 if (spdk_unlikely(task->src_domain == spdk_accel_get_memory_domain())) { 1706 SPDK_ERRLOG("accel domain is not supported\n"); 1707 return -ENOTSUP; 1708 } 1709 if (spdk_unlikely(spdk_accel_sequence_next_task(task) != NULL)) { 1710 SPDK_ERRLOG("Mkey registration is only supported for single task\n"); 1711 return -ENOTSUP; 1712 } 1713 1714 crypto_key_ok = (task->crypto_key && task->crypto_key->module_if == &g_accel_mlx5.module && 1715 task->crypto_key->priv); 1716 if (spdk_unlikely(!crypto_key_ok)) { 1717 SPDK_ERRLOG("Wrong crypto key provided\n"); 1718 return -EINVAL; 1719 } 1720 if (spdk_unlikely(task->nbytes % mlx5_task->base.block_size != 0)) { 1721 SPDK_ERRLOG("src length %"PRIu64" is not a multiple of the block size %u\n", task->nbytes, 1722 mlx5_task->base.block_size); 1723 return -EINVAL; 1724 } 1725 1726 num_blocks = task->nbytes / mlx5_task->base.block_size; 1727 if (dev->crypto_multi_block) { 1728 if (spdk_unlikely(g_accel_mlx5.attr.crypto_split_blocks && 1729 num_blocks > g_accel_mlx5.attr.crypto_split_blocks)) { 1730 SPDK_ERRLOG("Number of blocks in task %u exceeds split threshold %u, can't handle\n", 1731 num_blocks, g_accel_mlx5.attr.crypto_split_blocks); 1732 return -E2BIG; 1733 } 1734 } else if (num_blocks != 1) { 1735 SPDK_ERRLOG("Task contains more than 1 block, can't handle\n"); 1736 return -E2BIG; 1737 } 1738 1739 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 1740 mlx5_task->num_blocks = num_blocks; 1741 mlx5_task->num_processed_blocks = 0; 1742 mlx5_task->num_reqs = 1; 1743 mlx5_task->blocks_per_req = num_blocks; 1744 1745 if (spdk_unlikely(qp_slot == 0)) { 1746 mlx5_task->num_ops = 0; 1747 dev->stats.nomem_qdepth++; 1748 return -ENOMEM; 1749 } 1750 rc = spdk_mlx5_mkey_pool_get_bulk(dev->crypto_mkeys, mlx5_task->mkeys, 1); 1751 if (spdk_unlikely(rc)) { 1752 mlx5_task->num_ops = 0; 1753 dev->stats.nomem_mkey++; 1754 return -ENOMEM; 1755 } 1756 mlx5_task->num_ops = 1; 1757 1758 SPDK_DEBUGLOG(accel_mlx5, "crypto_mkey task num_blocks %u, src_len %zu\n", mlx5_task->num_reqs, 1759 task->nbytes); 1760 1761 return 0; 1762 } 1763 1764 static inline int 1765 accel_mlx5_crypto_mkey_task_process(struct accel_mlx5_task *mlx5_task) 1766 { 1767 struct accel_mlx5_sge sge; 1768 struct spdk_accel_task *task = &mlx5_task->base; 1769 struct accel_mlx5_qp *qp = mlx5_task->qp; 1770 struct accel_mlx5_dev *dev = qp->dev; 1771 struct spdk_mlx5_crypto_dek_data dek_data; 1772 int rc; 1773 1774 if (spdk_unlikely(!mlx5_task->num_ops)) { 1775 return -EINVAL; 1776 } 1777 SPDK_DEBUGLOG(accel_mlx5, "begin, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx); 1778 1779 mlx5_task->num_wrs = 0; 1780 rc = spdk_mlx5_crypto_get_dek_data(task->crypto_key->priv, dev->dev_ctx->pd, &dek_data); 1781 if (spdk_unlikely(rc)) { 1782 return rc; 1783 } 1784 1785 rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sge, mlx5_task->mkeys[0]->mkey, 1786 mlx5_task->num_blocks, &dek_data, (uint64_t)mlx5_task, SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 1787 if (spdk_unlikely(rc)) { 1788 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 1789 return rc; 1790 } 1791 dev->stats.crypto_umrs++; 1792 mlx5_task->num_submitted_reqs++; 1793 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 1794 STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link); 1795 1796 SPDK_DEBUGLOG(accel_mlx5, "end, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx); 1797 1798 return 0; 1799 } 1800 1801 static inline int 1802 accel_mlx5_crypto_mkey_task_continue(struct accel_mlx5_task *task) 1803 { 1804 struct accel_mlx5_qp *qp = task->qp; 1805 struct accel_mlx5_dev *dev = qp->dev; 1806 int rc; 1807 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1808 1809 if (task->num_ops == 0) { 1810 rc = spdk_mlx5_mkey_pool_get_bulk(dev->crypto_mkeys, task->mkeys, 1); 1811 if (spdk_unlikely(rc)) { 1812 dev->stats.nomem_mkey++; 1813 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1814 return -ENOMEM; 1815 } 1816 task->num_ops = 1; 1817 } 1818 if (spdk_unlikely(qp_slot == 0)) { 1819 dev->stats.nomem_qdepth++; 1820 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1821 return -ENOMEM; 1822 } 1823 return accel_mlx5_crypto_mkey_task_process(task); 1824 } 1825 1826 static inline void 1827 accel_mlx5_crypto_mkey_task_complete(struct accel_mlx5_task *mlx5_task) 1828 { 1829 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 1830 1831 assert(mlx5_task->num_ops); 1832 assert(mlx5_task->num_processed_blocks == mlx5_task->num_blocks); 1833 assert(mlx5_task->base.seq); 1834 1835 spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, mlx5_task->mkeys, 1); 1836 spdk_accel_task_complete(&mlx5_task->base, 0); 1837 } 1838 1839 static int 1840 accel_mlx5_task_op_not_implemented(struct accel_mlx5_task *mlx5_task) 1841 { 1842 SPDK_ERRLOG("wrong function called\n"); 1843 SPDK_UNREACHABLE(); 1844 } 1845 1846 static void 1847 accel_mlx5_task_op_not_implemented_v(struct accel_mlx5_task *mlx5_task) 1848 { 1849 SPDK_ERRLOG("wrong function called\n"); 1850 SPDK_UNREACHABLE(); 1851 } 1852 1853 static int 1854 accel_mlx5_task_op_not_supported(struct accel_mlx5_task *mlx5_task) 1855 { 1856 SPDK_ERRLOG("Unsupported opcode %d\n", mlx5_task->base.op_code); 1857 1858 return -ENOTSUP; 1859 } 1860 1861 static struct accel_mlx5_task_operations g_accel_mlx5_tasks_ops[] = { 1862 [ACCEL_MLX5_OPC_COPY] = { 1863 .init = accel_mlx5_copy_task_init, 1864 .process = accel_mlx5_copy_task_process, 1865 .cont = accel_mlx5_copy_task_continue, 1866 .complete = accel_mlx5_copy_task_complete, 1867 }, 1868 [ACCEL_MLX5_OPC_CRYPTO] = { 1869 .init = accel_mlx5_crypto_task_init, 1870 .process = accel_mlx5_crypto_task_process, 1871 .cont = accel_mlx5_crypto_task_continue, 1872 .complete = accel_mlx5_crypto_task_complete, 1873 }, 1874 [ACCEL_MLX5_OPC_CRC32C] = { 1875 .init = accel_mlx5_crc_task_init, 1876 .process = accel_mlx5_crc_task_process, 1877 .cont = accel_mlx5_crc_task_continue, 1878 .complete = accel_mlx5_crc_task_complete, 1879 }, 1880 [ACCEL_MLX5_OPC_CRYPTO_MKEY] = { 1881 .init = accel_mlx5_crypto_mkey_task_init, 1882 .process = accel_mlx5_crypto_mkey_task_process, 1883 .cont = accel_mlx5_crypto_mkey_task_continue, 1884 .complete = accel_mlx5_crypto_mkey_task_complete, 1885 }, 1886 [ACCEL_MLX5_OPC_LAST] = { 1887 .init = accel_mlx5_task_op_not_supported, 1888 .process = accel_mlx5_task_op_not_implemented, 1889 .cont = accel_mlx5_task_op_not_implemented, 1890 .complete = accel_mlx5_task_op_not_implemented_v 1891 }, 1892 }; 1893 1894 static void 1895 accel_mlx5_memory_domain_transfer_cpl(void *ctx, int rc) 1896 { 1897 struct accel_mlx5_task *task = ctx; 1898 1899 assert(task->needs_data_transfer); 1900 task->needs_data_transfer = 0; 1901 1902 if (spdk_likely(!rc)) { 1903 SPDK_DEBUGLOG(accel_mlx5, "task %p, data transfer done\n", task); 1904 accel_mlx5_task_complete(task); 1905 } else { 1906 SPDK_ERRLOG("Task %p, data transfer failed, rc %d\n", task, rc); 1907 accel_mlx5_task_fail(task, rc); 1908 } 1909 } 1910 1911 static inline void 1912 accel_mlx5_memory_domain_transfer(struct accel_mlx5_task *task) 1913 { 1914 struct spdk_memory_domain_translation_result translation; 1915 struct spdk_accel_task *base = &task->base; 1916 struct accel_mlx5_dev *dev = task->qp->dev; 1917 int rc; 1918 1919 assert(task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO_MKEY); 1920 /* UMR is an offset in the addess space, so the start address is 0 */ 1921 translation.iov.iov_base = NULL; 1922 translation.iov.iov_len = base->nbytes; 1923 translation.iov_count = 1; 1924 translation.size = sizeof(translation); 1925 translation.rdma.rkey = task->mkeys[0]->mkey; 1926 translation.rdma.lkey = task->mkeys[0]->mkey; 1927 1928 SPDK_DEBUGLOG(accel_mlx5, "start transfer, task %p, dst_domain_ctx %p, mkey %u\n", task, 1929 task->base.dst_domain_ctx, task->mkeys[0]->mkey); 1930 rc = spdk_memory_domain_transfer_data(base->dst_domain, base->dst_domain_ctx, &translation.iov, 1, 1931 dev->dev_ctx->domain, task, &translation.iov, 1, &translation, 1932 accel_mlx5_memory_domain_transfer_cpl, task); 1933 if (spdk_unlikely(rc)) { 1934 SPDK_ERRLOG("Failed to start data transfer, task %p rc %d\n", task, rc); 1935 accel_mlx5_task_fail(task, rc); 1936 } 1937 } 1938 1939 static inline void 1940 accel_mlx5_task_complete(struct accel_mlx5_task *task) 1941 { 1942 struct spdk_accel_sequence *seq = task->base.seq; 1943 struct spdk_accel_task *next; 1944 bool driver_seq; 1945 1946 if (task->needs_data_transfer) { 1947 accel_mlx5_memory_domain_transfer(task); 1948 return; 1949 } 1950 1951 next = spdk_accel_sequence_next_task(&task->base); 1952 driver_seq = task->driver_seq; 1953 1954 assert(task->num_reqs == task->num_completed_reqs); 1955 SPDK_DEBUGLOG(accel_mlx5, "Complete task %p, opc %d\n", task, task->mlx5_opcode); 1956 1957 g_accel_mlx5_tasks_ops[task->mlx5_opcode].complete(task); 1958 1959 if (driver_seq) { 1960 struct spdk_io_channel *ch = task->qp->dev->ch; 1961 1962 assert(seq); 1963 if (next) { 1964 accel_mlx5_execute_sequence(ch, seq); 1965 } else { 1966 spdk_accel_sequence_continue(seq); 1967 } 1968 } 1969 } 1970 1971 static inline int 1972 accel_mlx5_task_continue(struct accel_mlx5_task *task) 1973 { 1974 struct accel_mlx5_qp *qp = task->qp; 1975 struct accel_mlx5_dev *dev = qp->dev; 1976 1977 if (spdk_unlikely(qp->recovering)) { 1978 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1979 return 0; 1980 } 1981 1982 return g_accel_mlx5_tasks_ops[task->mlx5_opcode].cont(task); 1983 } 1984 static inline void 1985 accel_mlx5_task_init_opcode(struct accel_mlx5_task *mlx5_task) 1986 { 1987 uint8_t base_opcode = mlx5_task->base.op_code; 1988 1989 switch (base_opcode) { 1990 case SPDK_ACCEL_OPC_COPY: 1991 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_COPY; 1992 break; 1993 case SPDK_ACCEL_OPC_ENCRYPT: 1994 assert(g_accel_mlx5.crypto_supported); 1995 mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE; 1996 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO; 1997 break; 1998 case SPDK_ACCEL_OPC_DECRYPT: 1999 assert(g_accel_mlx5.crypto_supported); 2000 mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_MEMORY; 2001 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO; 2002 break; 2003 case SPDK_ACCEL_OPC_CRC32C: 2004 mlx5_task->inplace = 1; 2005 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRC32C; 2006 break; 2007 case SPDK_ACCEL_OPC_COPY_CRC32C: 2008 mlx5_task->inplace = 0; 2009 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRC32C; 2010 break; 2011 default: 2012 SPDK_ERRLOG("wrong opcode %d\n", base_opcode); 2013 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_LAST; 2014 } 2015 } 2016 2017 static inline int 2018 _accel_mlx5_submit_tasks(struct accel_mlx5_io_channel *accel_ch, struct spdk_accel_task *task) 2019 { 2020 struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base); 2021 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 2022 int rc; 2023 2024 /* We should not receive any tasks if the module was not enabled */ 2025 assert(g_accel_mlx5.enabled); 2026 2027 dev->stats.opcodes[mlx5_task->mlx5_opcode]++; 2028 rc = g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].init(mlx5_task); 2029 if (spdk_unlikely(rc)) { 2030 if (rc == -ENOMEM) { 2031 SPDK_DEBUGLOG(accel_mlx5, "no reqs to handle new task %p (required %u), put to queue\n", mlx5_task, 2032 mlx5_task->num_reqs); 2033 STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link); 2034 return 0; 2035 } 2036 SPDK_ERRLOG("Task opc %d init failed, rc %d\n", task->op_code, rc); 2037 return rc; 2038 } 2039 2040 if (spdk_unlikely(mlx5_task->qp->recovering)) { 2041 STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link); 2042 return 0; 2043 } 2044 2045 return g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].process(mlx5_task); 2046 } 2047 2048 static inline void 2049 accel_mlx5_task_assign_qp(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_io_channel *accel_ch) 2050 { 2051 struct accel_mlx5_dev *dev; 2052 2053 dev = &accel_ch->devs[accel_ch->dev_idx]; 2054 accel_ch->dev_idx++; 2055 if (accel_ch->dev_idx == accel_ch->num_devs) { 2056 accel_ch->dev_idx = 0; 2057 } 2058 2059 mlx5_task->qp = &dev->qp; 2060 } 2061 2062 static inline void 2063 accel_mlx5_task_reset(struct accel_mlx5_task *mlx5_task) 2064 { 2065 mlx5_task->num_completed_reqs = 0; 2066 mlx5_task->num_submitted_reqs = 0; 2067 mlx5_task->num_ops = 0; 2068 mlx5_task->num_processed_blocks = 0; 2069 mlx5_task->raw = 0; 2070 } 2071 2072 static int 2073 accel_mlx5_submit_tasks(struct spdk_io_channel *ch, struct spdk_accel_task *task) 2074 { 2075 struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base); 2076 struct accel_mlx5_io_channel *accel_ch = spdk_io_channel_get_ctx(ch); 2077 2078 accel_mlx5_task_assign_qp(mlx5_task, accel_ch); 2079 accel_mlx5_task_reset(mlx5_task); 2080 accel_mlx5_task_init_opcode(mlx5_task); 2081 2082 return _accel_mlx5_submit_tasks(accel_ch, task); 2083 } 2084 2085 static void accel_mlx5_recover_qp(struct accel_mlx5_qp *qp); 2086 2087 static int 2088 accel_mlx5_recover_qp_poller(void *arg) 2089 { 2090 struct accel_mlx5_qp *qp = arg; 2091 2092 spdk_poller_unregister(&qp->recover_poller); 2093 accel_mlx5_recover_qp(qp); 2094 return SPDK_POLLER_BUSY; 2095 } 2096 2097 static void 2098 accel_mlx5_recover_qp(struct accel_mlx5_qp *qp) 2099 { 2100 struct accel_mlx5_dev *dev = qp->dev; 2101 struct spdk_mlx5_qp_attr mlx5_qp_attr = {}; 2102 int rc; 2103 2104 SPDK_NOTICELOG("Recovering qp %p, core %u\n", qp, spdk_env_get_current_core()); 2105 if (qp->qp) { 2106 spdk_mlx5_qp_destroy(qp->qp); 2107 qp->qp = NULL; 2108 } 2109 2110 mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size; 2111 mlx5_qp_attr.cap.max_recv_wr = 0; 2112 mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE; 2113 mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE; 2114 2115 rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp); 2116 if (rc) { 2117 SPDK_ERRLOG("Failed to create mlx5 dma QP, rc %d. Retry in %d usec\n", 2118 rc, ACCEL_MLX5_RECOVER_POLLER_PERIOD_US); 2119 qp->recover_poller = SPDK_POLLER_REGISTER(accel_mlx5_recover_qp_poller, qp, 2120 ACCEL_MLX5_RECOVER_POLLER_PERIOD_US); 2121 return; 2122 } 2123 2124 qp->recovering = false; 2125 } 2126 2127 static inline void 2128 accel_mlx5_process_error_cpl(struct spdk_mlx5_cq_completion *wc, struct accel_mlx5_task *task) 2129 { 2130 struct accel_mlx5_qp *qp = task->qp; 2131 2132 if (wc->status != IBV_WC_WR_FLUSH_ERR) { 2133 SPDK_WARNLOG("RDMA: qp %p, task %p, WC status %d, core %u\n", 2134 qp, task, wc->status, spdk_env_get_current_core()); 2135 } else { 2136 SPDK_DEBUGLOG(accel_mlx5, 2137 "RDMA: qp %p, task %p, WC status %d, core %u\n", 2138 qp, task, wc->status, spdk_env_get_current_core()); 2139 } 2140 2141 qp->recovering = true; 2142 assert(task->num_completed_reqs <= task->num_submitted_reqs); 2143 if (task->num_completed_reqs == task->num_submitted_reqs) { 2144 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 2145 accel_mlx5_task_fail(task, -EIO); 2146 } 2147 } 2148 2149 static inline int64_t 2150 accel_mlx5_poll_cq(struct accel_mlx5_dev *dev) 2151 { 2152 struct spdk_mlx5_cq_completion wc[ACCEL_MLX5_MAX_WC]; 2153 struct accel_mlx5_task *task; 2154 struct accel_mlx5_qp *qp; 2155 int reaped, i, rc; 2156 uint16_t completed; 2157 2158 dev->stats.polls++; 2159 reaped = spdk_mlx5_cq_poll_completions(dev->cq, wc, ACCEL_MLX5_MAX_WC); 2160 if (spdk_unlikely(reaped < 0)) { 2161 SPDK_ERRLOG("Error polling CQ! (%d): %s\n", errno, spdk_strerror(errno)); 2162 return reaped; 2163 } else if (reaped == 0) { 2164 dev->stats.idle_polls++; 2165 return 0; 2166 } 2167 dev->stats.completions += reaped; 2168 2169 SPDK_DEBUGLOG(accel_mlx5, "Reaped %d cpls on dev %s\n", reaped, 2170 dev->dev_ctx->context->device->name); 2171 2172 for (i = 0; i < reaped; i++) { 2173 if (spdk_unlikely(!wc[i].wr_id)) { 2174 /* Unsignaled completion with error, ignore */ 2175 continue; 2176 } 2177 task = (struct accel_mlx5_task *)wc[i].wr_id; 2178 qp = task->qp; 2179 assert(task == STAILQ_FIRST(&qp->in_hw) && "submission mismatch"); 2180 assert(task->num_submitted_reqs > task->num_completed_reqs); 2181 completed = task->num_submitted_reqs - task->num_completed_reqs; 2182 assert((uint32_t)task->num_completed_reqs + completed <= UINT16_MAX); 2183 task->num_completed_reqs += completed; 2184 assert(qp->wrs_submitted >= task->num_wrs); 2185 qp->wrs_submitted -= task->num_wrs; 2186 assert(dev->wrs_in_cq > 0); 2187 dev->wrs_in_cq--; 2188 2189 if (spdk_unlikely(wc[i].status)) { 2190 accel_mlx5_process_error_cpl(&wc[i], task); 2191 if (qp->wrs_submitted == 0) { 2192 assert(STAILQ_EMPTY(&qp->in_hw)); 2193 accel_mlx5_recover_qp(qp); 2194 } 2195 continue; 2196 } 2197 2198 SPDK_DEBUGLOG(accel_mlx5, "task %p, remaining %u\n", task, 2199 task->num_reqs - task->num_completed_reqs); 2200 if (task->num_completed_reqs == task->num_reqs) { 2201 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 2202 accel_mlx5_task_complete(task); 2203 } else { 2204 assert(task->num_submitted_reqs < task->num_reqs); 2205 assert(task->num_completed_reqs == task->num_submitted_reqs); 2206 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 2207 rc = accel_mlx5_task_continue(task); 2208 if (spdk_unlikely(rc)) { 2209 if (rc != -ENOMEM) { 2210 accel_mlx5_task_fail(task, rc); 2211 } 2212 } 2213 } 2214 } 2215 2216 return reaped; 2217 } 2218 2219 static inline void 2220 accel_mlx5_resubmit_nomem_tasks(struct accel_mlx5_dev *dev) 2221 { 2222 struct accel_mlx5_task *task, *tmp, *last; 2223 int rc; 2224 2225 last = STAILQ_LAST(&dev->nomem, accel_mlx5_task, link); 2226 STAILQ_FOREACH_SAFE(task, &dev->nomem, link, tmp) { 2227 STAILQ_REMOVE_HEAD(&dev->nomem, link); 2228 rc = accel_mlx5_task_continue(task); 2229 if (spdk_unlikely(rc)) { 2230 if (rc != -ENOMEM) { 2231 accel_mlx5_task_fail(task, rc); 2232 } 2233 break; 2234 } 2235 /* If qpair is recovering, task is added back to the nomem list and 0 is returned. In that case we 2236 * need a special condition to iterate the list once and stop this FOREACH loop */ 2237 if (task == last) { 2238 break; 2239 } 2240 } 2241 } 2242 2243 static int 2244 accel_mlx5_poller(void *ctx) 2245 { 2246 struct accel_mlx5_io_channel *ch = ctx; 2247 struct accel_mlx5_dev *dev; 2248 2249 int64_t completions = 0, rc; 2250 uint32_t i; 2251 2252 for (i = 0; i < ch->num_devs; i++) { 2253 dev = &ch->devs[i]; 2254 if (dev->wrs_in_cq) { 2255 rc = accel_mlx5_poll_cq(dev); 2256 if (spdk_unlikely(rc < 0)) { 2257 SPDK_ERRLOG("Error %"PRId64" on CQ, dev %s\n", rc, dev->dev_ctx->context->device->name); 2258 } 2259 completions += rc; 2260 if (dev->qp.wrs_submitted) { 2261 spdk_mlx5_qp_complete_send(dev->qp.qp); 2262 } 2263 } 2264 if (!STAILQ_EMPTY(&dev->nomem)) { 2265 accel_mlx5_resubmit_nomem_tasks(dev); 2266 } 2267 } 2268 2269 return !!completions; 2270 } 2271 2272 static bool 2273 accel_mlx5_supports_opcode(enum spdk_accel_opcode opc) 2274 { 2275 assert(g_accel_mlx5.enabled); 2276 2277 switch (opc) { 2278 case SPDK_ACCEL_OPC_COPY: 2279 return true; 2280 case SPDK_ACCEL_OPC_ENCRYPT: 2281 case SPDK_ACCEL_OPC_DECRYPT: 2282 return g_accel_mlx5.crypto_supported; 2283 case SPDK_ACCEL_OPC_CRC32C: 2284 case SPDK_ACCEL_OPC_COPY_CRC32C: 2285 return g_accel_mlx5.crc32c_supported; 2286 default: 2287 return false; 2288 } 2289 } 2290 2291 static struct spdk_io_channel * 2292 accel_mlx5_get_io_channel(void) 2293 { 2294 assert(g_accel_mlx5.enabled); 2295 return spdk_get_io_channel(&g_accel_mlx5); 2296 } 2297 2298 static int 2299 accel_mlx5_create_qp(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp) 2300 { 2301 struct spdk_mlx5_qp_attr mlx5_qp_attr = {}; 2302 int rc; 2303 2304 mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size; 2305 mlx5_qp_attr.cap.max_recv_wr = 0; 2306 mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE; 2307 mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE; 2308 2309 rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp); 2310 if (rc) { 2311 return rc; 2312 } 2313 2314 STAILQ_INIT(&qp->in_hw); 2315 qp->dev = dev; 2316 qp->verbs_qp = spdk_mlx5_qp_get_verbs_qp(qp->qp); 2317 assert(qp->verbs_qp); 2318 qp->wrs_max = g_accel_mlx5.attr.qp_size; 2319 2320 return 0; 2321 } 2322 2323 static void 2324 accel_mlx5_add_stats(struct accel_mlx5_stats *stats, const struct accel_mlx5_stats *to_add) 2325 { 2326 int i; 2327 2328 stats->crypto_umrs += to_add->crypto_umrs; 2329 stats->sig_umrs += to_add->sig_umrs; 2330 stats->rdma_reads += to_add->rdma_reads; 2331 stats->rdma_writes += to_add->rdma_writes; 2332 stats->polls += to_add->polls; 2333 stats->idle_polls += to_add->idle_polls; 2334 stats->completions += to_add->completions; 2335 stats->nomem_qdepth += to_add->nomem_qdepth; 2336 stats->nomem_mkey += to_add->nomem_mkey; 2337 for (i = 0; i < ACCEL_MLX5_OPC_LAST; i++) { 2338 stats->opcodes[i] += to_add->opcodes[i]; 2339 } 2340 } 2341 2342 static void 2343 accel_mlx5_destroy_cb(void *io_device, void *ctx_buf) 2344 { 2345 struct accel_mlx5_io_channel *ch = ctx_buf; 2346 struct accel_mlx5_dev *dev; 2347 uint32_t i; 2348 2349 spdk_poller_unregister(&ch->poller); 2350 for (i = 0; i < ch->num_devs; i++) { 2351 dev = &ch->devs[i]; 2352 spdk_mlx5_qp_destroy(dev->qp.qp); 2353 if (dev->cq) { 2354 spdk_mlx5_cq_destroy(dev->cq); 2355 } 2356 spdk_poller_unregister(&dev->qp.recover_poller); 2357 if (dev->crypto_mkeys) { 2358 spdk_mlx5_mkey_pool_put_ref(dev->crypto_mkeys); 2359 } 2360 if (dev->sig_mkeys) { 2361 spdk_mlx5_mkey_pool_put_ref(dev->sig_mkeys); 2362 } 2363 spdk_rdma_utils_free_mem_map(&dev->mmap); 2364 spdk_spin_lock(&g_accel_mlx5.lock); 2365 accel_mlx5_add_stats(&g_accel_mlx5.stats, &dev->stats); 2366 spdk_spin_unlock(&g_accel_mlx5.lock); 2367 } 2368 free(ch->devs); 2369 } 2370 2371 static int 2372 accel_mlx5_create_cb(void *io_device, void *ctx_buf) 2373 { 2374 struct spdk_mlx5_cq_attr cq_attr = {}; 2375 struct accel_mlx5_io_channel *ch = ctx_buf; 2376 struct accel_mlx5_dev_ctx *dev_ctx; 2377 struct accel_mlx5_dev *dev; 2378 uint32_t i; 2379 int rc; 2380 2381 ch->devs = calloc(g_accel_mlx5.num_ctxs, sizeof(*ch->devs)); 2382 if (!ch->devs) { 2383 SPDK_ERRLOG("Memory allocation failed\n"); 2384 return -ENOMEM; 2385 } 2386 2387 for (i = 0; i < g_accel_mlx5.num_ctxs; i++) { 2388 dev_ctx = &g_accel_mlx5.dev_ctxs[i]; 2389 dev = &ch->devs[i]; 2390 dev->dev_ctx = dev_ctx; 2391 2392 if (dev_ctx->crypto_mkeys) { 2393 dev->crypto_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO); 2394 if (!dev->crypto_mkeys) { 2395 SPDK_ERRLOG("Failed to get crypto mkey pool channel, dev %s\n", dev_ctx->context->device->name); 2396 /* Should not happen since mkey pool is created on accel_mlx5 initialization. 2397 * We should not be here if pool creation failed */ 2398 assert(0); 2399 goto err_out; 2400 } 2401 } 2402 if (dev_ctx->sig_mkeys) { 2403 dev->sig_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE); 2404 if (!dev->sig_mkeys) { 2405 SPDK_ERRLOG("Failed to get sig mkey pool channel, dev %s\n", dev_ctx->context->device->name); 2406 /* Should not happen since mkey pool is created on accel_mlx5 initialization. 2407 * We should not be here if pool creation failed */ 2408 assert(0); 2409 goto err_out; 2410 } 2411 } 2412 2413 memset(&cq_attr, 0, sizeof(cq_attr)); 2414 cq_attr.cqe_cnt = g_accel_mlx5.attr.qp_size; 2415 cq_attr.cqe_size = 64; 2416 cq_attr.cq_context = dev; 2417 2418 ch->num_devs++; 2419 rc = spdk_mlx5_cq_create(dev_ctx->pd, &cq_attr, &dev->cq); 2420 if (rc) { 2421 SPDK_ERRLOG("Failed to create mlx5 CQ, rc %d\n", rc); 2422 goto err_out; 2423 } 2424 2425 rc = accel_mlx5_create_qp(dev, &dev->qp); 2426 if (rc) { 2427 SPDK_ERRLOG("Failed to create mlx5 QP, rc %d\n", rc); 2428 goto err_out; 2429 } 2430 2431 dev->mmap = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL, 2432 IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE); 2433 if (!dev->mmap) { 2434 SPDK_ERRLOG("Failed to create memory map\n"); 2435 rc = -ENOMEM; 2436 goto err_out; 2437 } 2438 dev->crypto_multi_block = dev_ctx->crypto_multi_block; 2439 dev->crypto_split_blocks = dev_ctx->crypto_multi_block ? g_accel_mlx5.attr.crypto_split_blocks : 0; 2440 dev->wrs_in_cq_max = g_accel_mlx5.attr.qp_size; 2441 dev->ch = spdk_io_channel_from_ctx(ctx_buf); 2442 STAILQ_INIT(&dev->nomem); 2443 } 2444 2445 ch->poller = SPDK_POLLER_REGISTER(accel_mlx5_poller, ch, 0); 2446 2447 return 0; 2448 2449 err_out: 2450 accel_mlx5_destroy_cb(&g_accel_mlx5, ctx_buf); 2451 return rc; 2452 } 2453 2454 void 2455 accel_mlx5_get_default_attr(struct accel_mlx5_attr *attr) 2456 { 2457 assert(attr); 2458 2459 attr->qp_size = ACCEL_MLX5_QP_SIZE; 2460 attr->num_requests = ACCEL_MLX5_NUM_REQUESTS; 2461 attr->allowed_devs = NULL; 2462 attr->crypto_split_blocks = 0; 2463 attr->enable_driver = false; 2464 } 2465 2466 static void 2467 accel_mlx5_allowed_devs_free(void) 2468 { 2469 size_t i; 2470 2471 if (!g_accel_mlx5.allowed_devs) { 2472 return; 2473 } 2474 2475 for (i = 0; i < g_accel_mlx5.allowed_devs_count; i++) { 2476 free(g_accel_mlx5.allowed_devs[i]); 2477 } 2478 free(g_accel_mlx5.attr.allowed_devs); 2479 free(g_accel_mlx5.allowed_devs); 2480 g_accel_mlx5.attr.allowed_devs = NULL; 2481 g_accel_mlx5.allowed_devs = NULL; 2482 g_accel_mlx5.allowed_devs_count = 0; 2483 } 2484 2485 static int 2486 accel_mlx5_allowed_devs_parse(const char *allowed_devs) 2487 { 2488 char *str, *tmp, *tok, *sp = NULL; 2489 size_t devs_count = 0; 2490 2491 str = strdup(allowed_devs); 2492 if (!str) { 2493 return -ENOMEM; 2494 } 2495 2496 accel_mlx5_allowed_devs_free(); 2497 2498 tmp = str; 2499 while ((tmp = strchr(tmp, ',')) != NULL) { 2500 tmp++; 2501 devs_count++; 2502 } 2503 devs_count++; 2504 2505 g_accel_mlx5.allowed_devs = calloc(devs_count, sizeof(char *)); 2506 if (!g_accel_mlx5.allowed_devs) { 2507 free(str); 2508 return -ENOMEM; 2509 } 2510 2511 devs_count = 0; 2512 tok = strtok_r(str, ",", &sp); 2513 while (tok) { 2514 g_accel_mlx5.allowed_devs[devs_count] = strdup(tok); 2515 if (!g_accel_mlx5.allowed_devs[devs_count]) { 2516 free(str); 2517 accel_mlx5_allowed_devs_free(); 2518 return -ENOMEM; 2519 } 2520 tok = strtok_r(NULL, ",", &sp); 2521 devs_count++; 2522 g_accel_mlx5.allowed_devs_count++; 2523 } 2524 2525 free(str); 2526 2527 return 0; 2528 } 2529 2530 int 2531 accel_mlx5_enable(struct accel_mlx5_attr *attr) 2532 { 2533 int rc; 2534 2535 if (g_accel_mlx5.enabled) { 2536 return -EEXIST; 2537 } 2538 if (attr) { 2539 if (attr->num_requests / spdk_env_get_core_count() < ACCEL_MLX5_MAX_MKEYS_IN_TASK) { 2540 SPDK_ERRLOG("num requests per core must not be less than %u, current value %u\n", 2541 ACCEL_MLX5_MAX_MKEYS_IN_TASK, attr->num_requests / spdk_env_get_core_count()); 2542 return -EINVAL; 2543 } 2544 if (attr->qp_size < 8) { 2545 SPDK_ERRLOG("qp_size must be at least 8\n"); 2546 return -EINVAL; 2547 } 2548 g_accel_mlx5.attr = *attr; 2549 g_accel_mlx5.attr.allowed_devs = NULL; 2550 2551 if (attr->allowed_devs) { 2552 /* Contains a copy of user's string */ 2553 g_accel_mlx5.attr.allowed_devs = strndup(attr->allowed_devs, ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN); 2554 if (!g_accel_mlx5.attr.allowed_devs) { 2555 return -ENOMEM; 2556 } 2557 rc = accel_mlx5_allowed_devs_parse(g_accel_mlx5.attr.allowed_devs); 2558 if (rc) { 2559 return rc; 2560 } 2561 rc = spdk_mlx5_crypto_devs_allow((const char *const *)g_accel_mlx5.allowed_devs, 2562 g_accel_mlx5.allowed_devs_count); 2563 if (rc) { 2564 accel_mlx5_allowed_devs_free(); 2565 return rc; 2566 } 2567 } 2568 } else { 2569 accel_mlx5_get_default_attr(&g_accel_mlx5.attr); 2570 } 2571 2572 g_accel_mlx5.enabled = true; 2573 spdk_accel_module_list_add(&g_accel_mlx5.module); 2574 2575 return 0; 2576 } 2577 2578 static void 2579 accel_mlx5_psvs_release(struct accel_mlx5_dev_ctx *dev_ctx) 2580 { 2581 uint32_t i, num_psvs, num_psvs_in_pool; 2582 2583 if (!dev_ctx->psvs) { 2584 return; 2585 } 2586 2587 num_psvs = g_accel_mlx5.attr.num_requests; 2588 2589 for (i = 0; i < num_psvs; i++) { 2590 if (dev_ctx->psvs[i]) { 2591 spdk_mlx5_destroy_psv(dev_ctx->psvs[i]); 2592 dev_ctx->psvs[i] = NULL; 2593 } 2594 } 2595 free(dev_ctx->psvs); 2596 2597 if (!dev_ctx->psv_pool) { 2598 return; 2599 } 2600 num_psvs_in_pool = spdk_mempool_count(dev_ctx->psv_pool); 2601 if (num_psvs_in_pool != num_psvs) { 2602 SPDK_ERRLOG("Expected %u reqs in the pool, but got only %u\n", num_psvs, num_psvs_in_pool); 2603 } 2604 spdk_mempool_free(dev_ctx->psv_pool); 2605 } 2606 2607 static void 2608 accel_mlx5_free_resources(void) 2609 { 2610 struct accel_mlx5_dev_ctx *dev_ctx; 2611 uint32_t i; 2612 2613 for (i = 0; i < g_accel_mlx5.num_ctxs; i++) { 2614 dev_ctx = &g_accel_mlx5.dev_ctxs[i]; 2615 accel_mlx5_psvs_release(dev_ctx); 2616 if (dev_ctx->pd) { 2617 if (dev_ctx->crypto_mkeys) { 2618 spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO, dev_ctx->pd); 2619 } 2620 if (dev_ctx->sig_mkeys) { 2621 spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE, dev_ctx->pd); 2622 } 2623 spdk_rdma_utils_put_pd(dev_ctx->pd); 2624 } 2625 if (dev_ctx->domain) { 2626 spdk_rdma_utils_put_memory_domain(dev_ctx->domain); 2627 } 2628 } 2629 2630 free(g_accel_mlx5.dev_ctxs); 2631 g_accel_mlx5.dev_ctxs = NULL; 2632 g_accel_mlx5.initialized = false; 2633 } 2634 2635 static void 2636 accel_mlx5_deinit_cb(void *ctx) 2637 { 2638 accel_mlx5_free_resources(); 2639 spdk_spin_destroy(&g_accel_mlx5.lock); 2640 spdk_mlx5_umr_implementer_register(false); 2641 spdk_accel_module_finish(); 2642 } 2643 2644 static void 2645 accel_mlx5_deinit(void *ctx) 2646 { 2647 if (g_accel_mlx5.allowed_devs) { 2648 accel_mlx5_allowed_devs_free(); 2649 } 2650 spdk_mlx5_crypto_devs_allow(NULL, 0); 2651 if (g_accel_mlx5.initialized) { 2652 spdk_io_device_unregister(&g_accel_mlx5, accel_mlx5_deinit_cb); 2653 } else { 2654 spdk_accel_module_finish(); 2655 } 2656 } 2657 2658 static int 2659 accel_mlx5_mkeys_create(struct ibv_pd *pd, uint32_t num_mkeys, uint32_t flags) 2660 { 2661 struct spdk_mlx5_mkey_pool_param pool_param = {}; 2662 2663 pool_param.mkey_count = num_mkeys; 2664 pool_param.cache_per_thread = num_mkeys * 3 / 4 / spdk_env_get_core_count(); 2665 pool_param.flags = flags; 2666 2667 return spdk_mlx5_mkey_pool_init(&pool_param, pd); 2668 } 2669 2670 static void 2671 accel_mlx5_set_psv_in_pool(struct spdk_mempool *mp, void *cb_arg, void *_psv, unsigned obj_idx) 2672 { 2673 struct spdk_rdma_utils_memory_translation translation = {}; 2674 struct accel_mlx5_psv_pool_iter_cb_args *args = cb_arg; 2675 struct accel_mlx5_psv_wrapper *wrapper = _psv; 2676 struct accel_mlx5_dev_ctx *dev_ctx = args->dev; 2677 int rc; 2678 2679 if (args->rc) { 2680 return; 2681 } 2682 assert(obj_idx < g_accel_mlx5.attr.num_requests); 2683 assert(dev_ctx->psvs[obj_idx] != NULL); 2684 memset(wrapper, 0, sizeof(*wrapper)); 2685 wrapper->psv_index = dev_ctx->psvs[obj_idx]->index; 2686 2687 rc = spdk_rdma_utils_get_translation(args->map, &wrapper->crc, sizeof(uint32_t), &translation); 2688 if (rc) { 2689 SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", &wrapper->crc, sizeof(uint32_t)); 2690 args->rc = -EINVAL; 2691 } else { 2692 wrapper->crc_lkey = spdk_rdma_utils_memory_translation_get_lkey(&translation); 2693 } 2694 } 2695 2696 static int 2697 accel_mlx5_psvs_create(struct accel_mlx5_dev_ctx *dev_ctx) 2698 { 2699 struct accel_mlx5_psv_pool_iter_cb_args args = { 2700 .dev = dev_ctx 2701 }; 2702 char pool_name[32]; 2703 uint32_t i; 2704 uint32_t num_psvs = g_accel_mlx5.attr.num_requests; 2705 uint32_t cache_size; 2706 int rc; 2707 2708 dev_ctx->psvs = calloc(num_psvs, (sizeof(struct spdk_mlx5_psv *))); 2709 if (!dev_ctx->psvs) { 2710 SPDK_ERRLOG("Failed to alloc PSVs array\n"); 2711 return -ENOMEM; 2712 } 2713 for (i = 0; i < num_psvs; i++) { 2714 dev_ctx->psvs[i] = spdk_mlx5_create_psv(dev_ctx->pd); 2715 if (!dev_ctx->psvs[i]) { 2716 SPDK_ERRLOG("Failed to create PSV on dev %s\n", dev_ctx->context->device->name); 2717 return -EINVAL; 2718 } 2719 } 2720 2721 rc = snprintf(pool_name, sizeof(pool_name), "accel_psv_%s", dev_ctx->context->device->name); 2722 if (rc < 0) { 2723 assert(0); 2724 return -EINVAL; 2725 } 2726 cache_size = num_psvs * 3 / 4 / spdk_env_get_core_count(); 2727 args.map = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL, 2728 IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE); 2729 if (!args.map) { 2730 return -ENOMEM; 2731 } 2732 dev_ctx->psv_pool = spdk_mempool_create_ctor(pool_name, num_psvs, 2733 sizeof(struct accel_mlx5_psv_wrapper), 2734 cache_size, SPDK_ENV_SOCKET_ID_ANY, 2735 accel_mlx5_set_psv_in_pool, &args); 2736 spdk_rdma_utils_free_mem_map(&args.map); 2737 if (!dev_ctx->psv_pool) { 2738 SPDK_ERRLOG("Failed to create PSV memory pool\n"); 2739 return -ENOMEM; 2740 } 2741 if (args.rc) { 2742 SPDK_ERRLOG("Failed to init PSV memory pool objects, rc %d\n", args.rc); 2743 return args.rc; 2744 } 2745 2746 return 0; 2747 } 2748 2749 2750 static int 2751 accel_mlx5_dev_ctx_init(struct accel_mlx5_dev_ctx *dev_ctx, struct ibv_context *dev, 2752 struct spdk_mlx5_device_caps *caps) 2753 { 2754 struct ibv_pd *pd; 2755 int rc; 2756 2757 pd = spdk_rdma_utils_get_pd(dev); 2758 if (!pd) { 2759 SPDK_ERRLOG("Failed to get PD for context %p, dev %s\n", dev, dev->device->name); 2760 return -EINVAL; 2761 } 2762 dev_ctx->context = dev; 2763 dev_ctx->pd = pd; 2764 dev_ctx->domain = spdk_rdma_utils_get_memory_domain(pd); 2765 if (!dev_ctx->domain) { 2766 return -ENOMEM; 2767 } 2768 2769 if (g_accel_mlx5.crypto_supported) { 2770 dev_ctx->crypto_multi_block = caps->crypto.multi_block_be_tweak; 2771 if (!dev_ctx->crypto_multi_block && g_accel_mlx5.attr.crypto_split_blocks) { 2772 SPDK_WARNLOG("\"crypto_split_blocks\" is set but dev %s doesn't support multi block crypto\n", 2773 dev->device->name); 2774 } 2775 rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO); 2776 if (rc) { 2777 SPDK_ERRLOG("Failed to create crypto mkeys pool, rc %d, dev %s\n", rc, dev->device->name); 2778 return rc; 2779 } 2780 dev_ctx->crypto_mkeys = true; 2781 } 2782 if (g_accel_mlx5.crc32c_supported) { 2783 rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, 2784 SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE); 2785 if (rc) { 2786 SPDK_ERRLOG("Failed to create signature mkeys pool, rc %d, dev %s\n", rc, dev->device->name); 2787 return rc; 2788 } 2789 dev_ctx->sig_mkeys = true; 2790 rc = accel_mlx5_psvs_create(dev_ctx); 2791 if (rc) { 2792 SPDK_ERRLOG("Failed to create PSVs pool, rc %d, dev %s\n", rc, dev->device->name); 2793 return rc; 2794 } 2795 } 2796 2797 return 0; 2798 } 2799 2800 static struct ibv_context ** 2801 accel_mlx5_get_devices(int *_num_devs) 2802 { 2803 struct ibv_context **rdma_devs, **rdma_devs_out = NULL, *dev; 2804 struct ibv_device_attr dev_attr; 2805 size_t j; 2806 int num_devs = 0, i, rc; 2807 int num_devs_out = 0; 2808 bool dev_allowed; 2809 2810 rdma_devs = rdma_get_devices(&num_devs); 2811 if (!rdma_devs || !num_devs) { 2812 *_num_devs = 0; 2813 return NULL; 2814 } 2815 2816 rdma_devs_out = calloc(num_devs + 1, sizeof(struct ibv_context *)); 2817 if (!rdma_devs_out) { 2818 SPDK_ERRLOG("Memory allocation failed\n"); 2819 rdma_free_devices(rdma_devs); 2820 *_num_devs = 0; 2821 return NULL; 2822 } 2823 2824 for (i = 0; i < num_devs; i++) { 2825 dev = rdma_devs[i]; 2826 rc = ibv_query_device(dev, &dev_attr); 2827 if (rc) { 2828 SPDK_ERRLOG("Failed to query dev %s, skipping\n", dev->device->name); 2829 continue; 2830 } 2831 if (dev_attr.vendor_id != SPDK_MLX5_VENDOR_ID_MELLANOX) { 2832 SPDK_DEBUGLOG(accel_mlx5, "dev %s is not Mellanox device, skipping\n", dev->device->name); 2833 continue; 2834 } 2835 2836 if (g_accel_mlx5.allowed_devs_count) { 2837 dev_allowed = false; 2838 for (j = 0; j < g_accel_mlx5.allowed_devs_count; j++) { 2839 if (strcmp(g_accel_mlx5.allowed_devs[j], dev->device->name) == 0) { 2840 dev_allowed = true; 2841 break; 2842 } 2843 } 2844 if (!dev_allowed) { 2845 continue; 2846 } 2847 } 2848 2849 rdma_devs_out[num_devs_out] = dev; 2850 num_devs_out++; 2851 } 2852 2853 rdma_free_devices(rdma_devs); 2854 *_num_devs = num_devs_out; 2855 2856 return rdma_devs_out; 2857 } 2858 2859 static inline bool 2860 accel_mlx5_dev_supports_crypto(struct spdk_mlx5_device_caps *caps) 2861 { 2862 return caps->crypto_supported && !caps->crypto.wrapped_import_method_aes_xts && 2863 (caps->crypto.single_block_le_tweak || 2864 caps->crypto.multi_block_le_tweak || caps->crypto.multi_block_be_tweak); 2865 } 2866 2867 static int 2868 accel_mlx5_init(void) 2869 { 2870 struct spdk_mlx5_device_caps *caps; 2871 struct ibv_context **rdma_devs, *dev; 2872 int num_devs = 0, rc = 0, i; 2873 int best_dev = -1, first_dev = 0; 2874 int best_dev_stat = 0, dev_stat; 2875 bool supports_crypto; 2876 bool find_best_dev = g_accel_mlx5.allowed_devs_count == 0; 2877 2878 if (!g_accel_mlx5.enabled) { 2879 return -EINVAL; 2880 } 2881 2882 spdk_spin_init(&g_accel_mlx5.lock); 2883 rdma_devs = accel_mlx5_get_devices(&num_devs); 2884 if (!rdma_devs || !num_devs) { 2885 return -ENODEV; 2886 } 2887 caps = calloc(num_devs, sizeof(*caps)); 2888 if (!caps) { 2889 rc = -ENOMEM; 2890 goto cleanup; 2891 } 2892 2893 g_accel_mlx5.crypto_supported = true; 2894 g_accel_mlx5.crc32c_supported = true; 2895 g_accel_mlx5.num_ctxs = 0; 2896 2897 /* Iterate devices. We support an offload if all devices support it */ 2898 for (i = 0; i < num_devs; i++) { 2899 dev = rdma_devs[i]; 2900 2901 rc = spdk_mlx5_device_query_caps(dev, &caps[i]); 2902 if (rc) { 2903 SPDK_ERRLOG("Failed to get crypto caps, dev %s\n", dev->device->name); 2904 goto cleanup; 2905 } 2906 supports_crypto = accel_mlx5_dev_supports_crypto(&caps[i]); 2907 if (!supports_crypto) { 2908 SPDK_DEBUGLOG(accel_mlx5, "Disable crypto support because dev %s doesn't support it\n", 2909 rdma_devs[i]->device->name); 2910 g_accel_mlx5.crypto_supported = false; 2911 } 2912 if (!caps[i].crc32c_supported) { 2913 SPDK_DEBUGLOG(accel_mlx5, "Disable crc32c support because dev %s doesn't support it\n", 2914 rdma_devs[i]->device->name); 2915 g_accel_mlx5.crc32c_supported = false; 2916 } 2917 if (find_best_dev) { 2918 /* Find device which supports max number of offloads */ 2919 dev_stat = (int)supports_crypto + (int)caps[i].crc32c_supported; 2920 if (dev_stat > best_dev_stat) { 2921 best_dev_stat = dev_stat; 2922 best_dev = i; 2923 } 2924 } 2925 } 2926 2927 /* User didn't specify devices to use, try to select the best one */ 2928 if (find_best_dev) { 2929 if (best_dev == -1) { 2930 best_dev = 0; 2931 } 2932 g_accel_mlx5.crypto_supported = accel_mlx5_dev_supports_crypto(&caps[best_dev]); 2933 g_accel_mlx5.crc32c_supported = caps[best_dev].crc32c_supported; 2934 SPDK_NOTICELOG("Select dev %s, crypto %d, crc32c %d\n", rdma_devs[best_dev]->device->name, 2935 g_accel_mlx5.crypto_supported, g_accel_mlx5.crc32c_supported); 2936 first_dev = best_dev; 2937 num_devs = 1; 2938 if (g_accel_mlx5.crypto_supported) { 2939 const char *const dev_name[] = { rdma_devs[best_dev]->device->name }; 2940 /* Let mlx5 library know which device to use */ 2941 spdk_mlx5_crypto_devs_allow(dev_name, 1); 2942 } 2943 } else { 2944 SPDK_NOTICELOG("Found %d devices, crypto %d\n", num_devs, g_accel_mlx5.crypto_supported); 2945 } 2946 2947 g_accel_mlx5.dev_ctxs = calloc(num_devs, sizeof(*g_accel_mlx5.dev_ctxs)); 2948 if (!g_accel_mlx5.dev_ctxs) { 2949 SPDK_ERRLOG("Memory allocation failed\n"); 2950 rc = -ENOMEM; 2951 goto cleanup; 2952 } 2953 2954 for (i = first_dev; i < first_dev + num_devs; i++) { 2955 rc = accel_mlx5_dev_ctx_init(&g_accel_mlx5.dev_ctxs[g_accel_mlx5.num_ctxs++], 2956 rdma_devs[i], &caps[i]); 2957 if (rc) { 2958 goto cleanup; 2959 } 2960 } 2961 2962 SPDK_NOTICELOG("Accel framework mlx5 initialized, found %d devices.\n", num_devs); 2963 spdk_io_device_register(&g_accel_mlx5, accel_mlx5_create_cb, accel_mlx5_destroy_cb, 2964 sizeof(struct accel_mlx5_io_channel), "accel_mlx5"); 2965 g_accel_mlx5.initialized = true; 2966 free(rdma_devs); 2967 free(caps); 2968 2969 if (g_accel_mlx5.attr.enable_driver) { 2970 SPDK_NOTICELOG("Enabling mlx5 platform driver\n"); 2971 spdk_accel_driver_register(&g_accel_mlx5_driver); 2972 spdk_accel_set_driver(g_accel_mlx5_driver.name); 2973 spdk_mlx5_umr_implementer_register(true); 2974 } 2975 2976 return 0; 2977 2978 cleanup: 2979 free(rdma_devs); 2980 free(caps); 2981 accel_mlx5_free_resources(); 2982 spdk_spin_destroy(&g_accel_mlx5.lock); 2983 2984 return rc; 2985 } 2986 2987 static void 2988 accel_mlx5_write_config_json(struct spdk_json_write_ctx *w) 2989 { 2990 if (g_accel_mlx5.enabled) { 2991 spdk_json_write_object_begin(w); 2992 spdk_json_write_named_string(w, "method", "mlx5_scan_accel_module"); 2993 spdk_json_write_named_object_begin(w, "params"); 2994 spdk_json_write_named_uint16(w, "qp_size", g_accel_mlx5.attr.qp_size); 2995 spdk_json_write_named_uint32(w, "num_requests", g_accel_mlx5.attr.num_requests); 2996 if (g_accel_mlx5.attr.allowed_devs) { 2997 spdk_json_write_named_string(w, "allowed_devs", g_accel_mlx5.attr.allowed_devs); 2998 } 2999 spdk_json_write_named_uint16(w, "crypto_split_blocks", g_accel_mlx5.attr.crypto_split_blocks); 3000 spdk_json_write_named_bool(w, "enable_driver", g_accel_mlx5.attr.enable_driver); 3001 spdk_json_write_object_end(w); 3002 spdk_json_write_object_end(w); 3003 } 3004 } 3005 3006 static size_t 3007 accel_mlx5_get_ctx_size(void) 3008 { 3009 return sizeof(struct accel_mlx5_task); 3010 } 3011 3012 static int 3013 accel_mlx5_crypto_key_init(struct spdk_accel_crypto_key *key) 3014 { 3015 struct spdk_mlx5_crypto_dek_create_attr attr = {}; 3016 struct spdk_mlx5_crypto_keytag *keytag; 3017 int rc; 3018 3019 if (!key || !key->key || !key->key2 || !key->key_size || !key->key2_size) { 3020 return -EINVAL; 3021 } 3022 3023 attr.dek = calloc(1, key->key_size + key->key2_size); 3024 if (!attr.dek) { 3025 return -ENOMEM; 3026 } 3027 3028 memcpy(attr.dek, key->key, key->key_size); 3029 memcpy(attr.dek + key->key_size, key->key2, key->key2_size); 3030 attr.dek_len = key->key_size + key->key2_size; 3031 3032 rc = spdk_mlx5_crypto_keytag_create(&attr, &keytag); 3033 spdk_memset_s(attr.dek, attr.dek_len, 0, attr.dek_len); 3034 free(attr.dek); 3035 if (rc) { 3036 SPDK_ERRLOG("Failed to create a keytag, rc %d\n", rc); 3037 return rc; 3038 } 3039 3040 key->priv = keytag; 3041 3042 return 0; 3043 } 3044 3045 static void 3046 accel_mlx5_crypto_key_deinit(struct spdk_accel_crypto_key *key) 3047 { 3048 if (!key || key->module_if != &g_accel_mlx5.module || !key->priv) { 3049 return; 3050 } 3051 3052 spdk_mlx5_crypto_keytag_destroy(key->priv); 3053 } 3054 3055 static void 3056 accel_mlx5_dump_stats_json(struct spdk_json_write_ctx *w, const char *header, 3057 const struct accel_mlx5_stats *stats) 3058 { 3059 double idle_polls_percentage = 0; 3060 double cpls_per_poll = 0; 3061 uint64_t total_tasks = 0; 3062 int i; 3063 3064 if (stats->polls) { 3065 idle_polls_percentage = (double) stats->idle_polls * 100 / stats->polls; 3066 } 3067 if (stats->polls > stats->idle_polls) { 3068 cpls_per_poll = (double) stats->completions / (stats->polls - stats->idle_polls); 3069 } 3070 for (i = 0; i < ACCEL_MLX5_OPC_LAST; i++) { 3071 total_tasks += stats->opcodes[i]; 3072 } 3073 3074 spdk_json_write_named_object_begin(w, header); 3075 3076 spdk_json_write_named_object_begin(w, "umrs"); 3077 spdk_json_write_named_uint64(w, "crypto_umrs", stats->crypto_umrs); 3078 spdk_json_write_named_uint64(w, "sig_umrs", stats->sig_umrs); 3079 spdk_json_write_named_uint64(w, "total", stats->crypto_umrs + stats->sig_umrs); 3080 spdk_json_write_object_end(w); 3081 3082 spdk_json_write_named_object_begin(w, "rdma"); 3083 spdk_json_write_named_uint64(w, "read", stats->rdma_reads); 3084 spdk_json_write_named_uint64(w, "write", stats->rdma_writes); 3085 spdk_json_write_named_uint64(w, "total", stats->rdma_reads + stats->rdma_writes); 3086 spdk_json_write_object_end(w); 3087 3088 spdk_json_write_named_object_begin(w, "polling"); 3089 spdk_json_write_named_uint64(w, "polls", stats->polls); 3090 spdk_json_write_named_uint64(w, "idle_polls", stats->idle_polls); 3091 spdk_json_write_named_uint64(w, "completions", stats->completions); 3092 spdk_json_write_named_double(w, "idle_polls_percentage", idle_polls_percentage); 3093 spdk_json_write_named_double(w, "cpls_per_poll", cpls_per_poll); 3094 spdk_json_write_named_uint64(w, "nomem_qdepth", stats->nomem_qdepth); 3095 spdk_json_write_named_uint64(w, "nomem_mkey", stats->nomem_mkey); 3096 spdk_json_write_object_end(w); 3097 3098 spdk_json_write_named_object_begin(w, "tasks"); 3099 spdk_json_write_named_uint64(w, "copy", stats->opcodes[ACCEL_MLX5_OPC_COPY]); 3100 spdk_json_write_named_uint64(w, "crypto", stats->opcodes[ACCEL_MLX5_OPC_CRYPTO]); 3101 spdk_json_write_named_uint64(w, "crypto_mkey", stats->opcodes[ACCEL_MLX5_OPC_CRYPTO_MKEY]); 3102 spdk_json_write_named_uint64(w, "crc32c", stats->opcodes[ACCEL_MLX5_OPC_CRC32C]); 3103 spdk_json_write_named_uint64(w, "total", total_tasks); 3104 spdk_json_write_object_end(w); 3105 3106 spdk_json_write_object_end(w); 3107 } 3108 3109 static void 3110 accel_mlx5_dump_channel_stat(struct spdk_io_channel_iter *i) 3111 { 3112 struct accel_mlx5_stats ch_stat = {}; 3113 struct accel_mlx5_dump_stats_ctx *ctx; 3114 struct spdk_io_channel *_ch; 3115 struct accel_mlx5_io_channel *ch; 3116 struct accel_mlx5_dev *dev; 3117 uint32_t j; 3118 3119 ctx = spdk_io_channel_iter_get_ctx(i); 3120 _ch = spdk_io_channel_iter_get_channel(i); 3121 ch = spdk_io_channel_get_ctx(_ch); 3122 3123 if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 3124 spdk_json_write_object_begin(ctx->w); 3125 spdk_json_write_named_object_begin(ctx->w, spdk_thread_get_name(spdk_get_thread())); 3126 } 3127 if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) { 3128 spdk_json_write_named_array_begin(ctx->w, "devices"); 3129 } 3130 3131 for (j = 0; j < ch->num_devs; j++) { 3132 dev = &ch->devs[j]; 3133 /* Save grand total and channel stats */ 3134 accel_mlx5_add_stats(&ctx->total, &dev->stats); 3135 accel_mlx5_add_stats(&ch_stat, &dev->stats); 3136 if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) { 3137 spdk_json_write_object_begin(ctx->w); 3138 accel_mlx5_dump_stats_json(ctx->w, dev->dev_ctx->context->device->name, &dev->stats); 3139 spdk_json_write_object_end(ctx->w); 3140 } 3141 } 3142 3143 if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) { 3144 spdk_json_write_array_end(ctx->w); 3145 } 3146 if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 3147 accel_mlx5_dump_stats_json(ctx->w, "channel_total", &ch_stat); 3148 spdk_json_write_object_end(ctx->w); 3149 spdk_json_write_object_end(ctx->w); 3150 } 3151 3152 spdk_for_each_channel_continue(i, 0); 3153 } 3154 3155 static void 3156 accel_mlx5_dump_channel_stat_done(struct spdk_io_channel_iter *i, int status) 3157 { 3158 struct accel_mlx5_dump_stats_ctx *ctx; 3159 3160 ctx = spdk_io_channel_iter_get_ctx(i); 3161 3162 spdk_spin_lock(&g_accel_mlx5.lock); 3163 /* Add statistics from destroyed channels */ 3164 accel_mlx5_add_stats(&ctx->total, &g_accel_mlx5.stats); 3165 spdk_spin_unlock(&g_accel_mlx5.lock); 3166 3167 if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 3168 /* channels[] */ 3169 spdk_json_write_array_end(ctx->w); 3170 } 3171 3172 accel_mlx5_dump_stats_json(ctx->w, "total", &ctx->total); 3173 3174 /* Ends the whole response which was begun in accel_mlx5_dump_stats */ 3175 spdk_json_write_object_end(ctx->w); 3176 3177 ctx->cb(ctx->ctx, 0); 3178 free(ctx); 3179 } 3180 3181 int 3182 accel_mlx5_dump_stats(struct spdk_json_write_ctx *w, enum accel_mlx5_dump_state_level level, 3183 accel_mlx5_dump_stat_done_cb cb, void *ctx) 3184 { 3185 struct accel_mlx5_dump_stats_ctx *stat_ctx; 3186 3187 if (!w || !cb) { 3188 return -EINVAL; 3189 } 3190 if (!g_accel_mlx5.initialized) { 3191 return -ENODEV; 3192 } 3193 3194 stat_ctx = calloc(1, sizeof(*stat_ctx)); 3195 if (!stat_ctx) { 3196 return -ENOMEM; 3197 } 3198 stat_ctx->cb = cb; 3199 stat_ctx->ctx = ctx; 3200 stat_ctx->level = level; 3201 stat_ctx->w = w; 3202 3203 spdk_json_write_object_begin(w); 3204 3205 if (level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 3206 spdk_json_write_named_array_begin(w, "channels"); 3207 } 3208 3209 spdk_for_each_channel(&g_accel_mlx5, accel_mlx5_dump_channel_stat, stat_ctx, 3210 accel_mlx5_dump_channel_stat_done); 3211 3212 return 0; 3213 } 3214 3215 static bool 3216 accel_mlx5_crypto_supports_cipher(enum spdk_accel_cipher cipher, size_t key_size) 3217 { 3218 switch (cipher) { 3219 case SPDK_ACCEL_CIPHER_AES_XTS: 3220 return key_size == SPDK_ACCEL_AES_XTS_128_KEY_SIZE || key_size == SPDK_ACCEL_AES_XTS_256_KEY_SIZE; 3221 default: 3222 return false; 3223 } 3224 } 3225 3226 static int 3227 accel_mlx5_get_memory_domains(struct spdk_memory_domain **domains, int array_size) 3228 { 3229 int i, size; 3230 3231 if (!domains || !array_size) { 3232 return (int)g_accel_mlx5.num_ctxs; 3233 } 3234 3235 size = spdk_min(array_size, (int)g_accel_mlx5.num_ctxs); 3236 3237 for (i = 0; i < size; i++) { 3238 domains[i] = g_accel_mlx5.dev_ctxs[i].domain; 3239 } 3240 3241 return (int)g_accel_mlx5.num_ctxs; 3242 } 3243 3244 static inline struct accel_mlx5_dev * 3245 accel_mlx5_ch_get_dev_by_pd(struct accel_mlx5_io_channel *accel_ch, struct ibv_pd *pd) 3246 { 3247 uint32_t i; 3248 3249 for (i = 0; i < accel_ch->num_devs; i++) { 3250 if (accel_ch->devs[i].dev_ctx->pd == pd) { 3251 return &accel_ch->devs[i]; 3252 } 3253 } 3254 3255 return NULL; 3256 } 3257 3258 static inline int 3259 accel_mlx5_task_assign_qp_by_domain_pd(struct accel_mlx5_task *task, 3260 struct accel_mlx5_io_channel *acce_ch, struct spdk_memory_domain *domain) 3261 { 3262 struct spdk_memory_domain_rdma_ctx *domain_ctx; 3263 struct accel_mlx5_dev *dev; 3264 struct ibv_pd *domain_pd; 3265 size_t ctx_size; 3266 3267 domain_ctx = spdk_memory_domain_get_user_context(domain, &ctx_size); 3268 if (spdk_unlikely(!domain_ctx || domain_ctx->size != ctx_size)) { 3269 SPDK_ERRLOG("no domain context or wrong size, ctx ptr %p, size %zu\n", domain_ctx, ctx_size); 3270 return -ENOTSUP; 3271 } 3272 domain_pd = domain_ctx->ibv_pd; 3273 if (spdk_unlikely(!domain_pd)) { 3274 SPDK_ERRLOG("no destination domain PD, task %p", task); 3275 return -ENOTSUP; 3276 } 3277 dev = accel_mlx5_ch_get_dev_by_pd(acce_ch, domain_pd); 3278 if (spdk_unlikely(!dev)) { 3279 SPDK_ERRLOG("No dev for PD %p dev %s\n", domain_pd, domain_pd->context->device->name); 3280 return -ENODEV; 3281 } 3282 3283 if (spdk_unlikely(!dev)) { 3284 return -ENODEV; 3285 } 3286 task->qp = &dev->qp; 3287 3288 return 0; 3289 } 3290 3291 static inline int 3292 accel_mlx5_driver_examine_sequence(struct spdk_accel_sequence *seq, 3293 struct accel_mlx5_io_channel *accel_ch) 3294 { 3295 struct spdk_accel_task *first_base = spdk_accel_sequence_first_task(seq); 3296 struct accel_mlx5_task *first = SPDK_CONTAINEROF(first_base, struct accel_mlx5_task, base); 3297 struct spdk_accel_task *next_base = TAILQ_NEXT(first_base, seq_link); 3298 struct accel_mlx5_task *next; 3299 int rc; 3300 3301 accel_mlx5_task_reset(first); 3302 SPDK_DEBUGLOG(accel_mlx5, "first %p, opc %d; next %p, opc %d\n", first_base, first_base->op_code, 3303 next_base, next_base ? next_base->op_code : -1); 3304 if (next_base) { 3305 switch (first_base->op_code) { 3306 case SPDK_ACCEL_OPC_COPY: 3307 if (next_base->op_code == SPDK_ACCEL_OPC_DECRYPT && 3308 first_base->dst_domain && spdk_memory_domain_get_dma_device_type(first_base->dst_domain) == 3309 SPDK_DMA_DEVICE_TYPE_RDMA && TAILQ_NEXT(next_base, seq_link) == NULL) { 3310 next = SPDK_CONTAINEROF(next_base, struct accel_mlx5_task, base); 3311 rc = accel_mlx5_task_assign_qp_by_domain_pd(next, accel_ch, first_base->dst_domain); 3312 if (spdk_unlikely(rc)) { 3313 return rc; 3314 } 3315 /* Update decrypt task memory domain, complete copy task */ 3316 SPDK_DEBUGLOG(accel_mlx5, "Merge copy task (%p) and decrypt (%p)\n", first, next); 3317 next_base->dst_domain = first_base->dst_domain; 3318 next_base->dst_domain_ctx = first_base->dst_domain_ctx; 3319 accel_mlx5_task_reset(next); 3320 next->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO_MKEY; 3321 next->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE; 3322 next->needs_data_transfer = 1; 3323 next->inplace = 1; 3324 spdk_accel_task_complete(first_base, 0); 3325 return 0; 3326 } 3327 break; 3328 case SPDK_ACCEL_OPC_ENCRYPT: 3329 if (next_base->op_code == SPDK_ACCEL_OPC_COPY && 3330 next_base->dst_domain && spdk_memory_domain_get_dma_device_type(next_base->dst_domain) == 3331 SPDK_DMA_DEVICE_TYPE_RDMA && TAILQ_NEXT(next_base, seq_link) == NULL) { 3332 rc = accel_mlx5_task_assign_qp_by_domain_pd(first, accel_ch, next_base->dst_domain); 3333 if (spdk_unlikely(rc)) { 3334 return rc; 3335 } 3336 3337 /* Update encrypt task memory domain, complete copy task */ 3338 SPDK_DEBUGLOG(accel_mlx5, "Merge copy task (%p) and decrypt (%p)\n", 3339 SPDK_CONTAINEROF(next_base, 3340 struct accel_mlx5_task, base), first); 3341 first_base->dst_domain = next_base->dst_domain; 3342 first_base->dst_domain_ctx = next_base->dst_domain_ctx; 3343 first->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO_MKEY; 3344 first->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE; 3345 first->needs_data_transfer = 1; 3346 first->inplace = 1; 3347 spdk_accel_task_complete(next_base, 0); 3348 return 0; 3349 } 3350 break; 3351 3352 default: 3353 break; 3354 } 3355 } 3356 3357 SPDK_DEBUGLOG(accel_mlx5, "seq %p, task %p nothing to merge\n", seq, first_base); 3358 /* Nothing to merge, execute tasks one by one */ 3359 accel_mlx5_task_assign_qp(first, accel_ch); 3360 accel_mlx5_task_init_opcode(first); 3361 3362 return 0; 3363 } 3364 3365 static inline int 3366 accel_mlx5_execute_sequence(struct spdk_io_channel *ch, struct spdk_accel_sequence *seq) 3367 { 3368 struct accel_mlx5_io_channel *accel_ch = spdk_io_channel_get_ctx(ch); 3369 struct spdk_accel_task *task; 3370 struct accel_mlx5_task *mlx5_task; 3371 int rc; 3372 3373 rc = accel_mlx5_driver_examine_sequence(seq, accel_ch); 3374 if (spdk_unlikely(rc)) { 3375 return rc; 3376 } 3377 task = spdk_accel_sequence_first_task(seq); 3378 assert(task); 3379 mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base); 3380 mlx5_task->driver_seq = 1; 3381 3382 SPDK_DEBUGLOG(accel_mlx5, "driver starts seq %p, ch %p, task %p\n", seq, accel_ch, task); 3383 3384 return _accel_mlx5_submit_tasks(accel_ch, task); 3385 } 3386 3387 static struct accel_mlx5_module g_accel_mlx5 = { 3388 .module = { 3389 .module_init = accel_mlx5_init, 3390 .module_fini = accel_mlx5_deinit, 3391 .write_config_json = accel_mlx5_write_config_json, 3392 .get_ctx_size = accel_mlx5_get_ctx_size, 3393 .name = "mlx5", 3394 .supports_opcode = accel_mlx5_supports_opcode, 3395 .get_io_channel = accel_mlx5_get_io_channel, 3396 .submit_tasks = accel_mlx5_submit_tasks, 3397 .crypto_key_init = accel_mlx5_crypto_key_init, 3398 .crypto_key_deinit = accel_mlx5_crypto_key_deinit, 3399 .crypto_supports_cipher = accel_mlx5_crypto_supports_cipher, 3400 .get_memory_domains = accel_mlx5_get_memory_domains, 3401 } 3402 }; 3403 3404 static struct spdk_accel_driver g_accel_mlx5_driver = { 3405 .name = "mlx5", 3406 .execute_sequence = accel_mlx5_execute_sequence, 3407 .get_io_channel = accel_mlx5_get_io_channel 3408 }; 3409 3410 SPDK_LOG_REGISTER_COMPONENT(accel_mlx5) 3411