1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 */ 4 5 #include "spdk/env.h" 6 #include "spdk/thread.h" 7 #include "spdk/queue.h" 8 #include "spdk/log.h" 9 #include "spdk/string.h" 10 #include "spdk/likely.h" 11 #include "spdk/dma.h" 12 #include "spdk/json.h" 13 #include "spdk/util.h" 14 15 #include "spdk_internal/mlx5.h" 16 #include "spdk_internal/rdma_utils.h" 17 #include "spdk/accel_module.h" 18 #include "spdk_internal/assert.h" 19 #include "spdk_internal/sgl.h" 20 #include "accel_mlx5.h" 21 22 #include <infiniband/mlx5dv.h> 23 #include <rdma/rdma_cma.h> 24 25 #define ACCEL_MLX5_QP_SIZE (256u) 26 #define ACCEL_MLX5_NUM_REQUESTS (2048u - 1) 27 #define ACCEL_MLX5_RECOVER_POLLER_PERIOD_US (10000) 28 #define ACCEL_MLX5_MAX_SGE (16u) 29 #define ACCEL_MLX5_MAX_WC (64u) 30 #define ACCEL_MLX5_MAX_MKEYS_IN_TASK (16u) 31 32 /* Assume we have up to 16 devices */ 33 #define ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN ((SPDK_MLX5_DEV_MAX_NAME_LEN + 1) * 16) 34 35 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, task) \ 36 do { \ 37 assert((qp)->wrs_submitted < (qp)->wrs_max); \ 38 (qp)->wrs_submitted++; \ 39 (qp)->ring_db = true; \ 40 assert((task)->num_wrs < UINT16_MAX); \ 41 (task)->num_wrs++; \ 42 } while (0) 43 44 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, task) \ 45 do { \ 46 assert((dev)->wrs_in_cq < (dev)->wrs_in_cq_max); \ 47 (dev)->wrs_in_cq++; \ 48 assert((qp)->wrs_submitted < (qp)->wrs_max); \ 49 (qp)->wrs_submitted++; \ 50 (qp)->ring_db = true; \ 51 assert((task)->num_wrs < UINT16_MAX); \ 52 (task)->num_wrs++; \ 53 } while (0) 54 55 struct accel_mlx5_io_channel; 56 struct accel_mlx5_task; 57 58 struct accel_mlx5_dev_ctx { 59 struct ibv_context *context; 60 struct ibv_pd *pd; 61 struct spdk_memory_domain *domain; 62 struct spdk_mempool *psv_pool; 63 TAILQ_ENTRY(accel_mlx5_dev_ctx) link; 64 struct spdk_mlx5_psv **psvs; 65 bool mkeys; 66 bool crypto_mkeys; 67 bool sig_mkeys; 68 bool crypto_multi_block; 69 }; 70 71 enum accel_mlx5_opcode { 72 ACCEL_MLX5_OPC_COPY, 73 ACCEL_MLX5_OPC_CRYPTO, 74 ACCEL_MLX5_OPC_CRC32C, 75 ACCEL_MLX5_OPC_CRYPTO_MKEY, 76 ACCEL_MLX5_OPC_MKEY, 77 ACCEL_MLX5_OPC_LAST 78 }; 79 80 SPDK_STATIC_ASSERT(ACCEL_MLX5_OPC_LAST <= 0xf, 81 "accel opcode exceeds 4 bits, update accel_mlx5 struct"); 82 83 struct accel_mlx5_stats { 84 uint64_t crypto_umrs; 85 uint64_t sig_umrs; 86 uint64_t umrs; 87 uint64_t rdma_reads; 88 uint64_t rdma_writes; 89 uint64_t polls; 90 uint64_t idle_polls; 91 uint64_t completions; 92 uint64_t nomem_qdepth; 93 uint64_t nomem_mkey; 94 uint64_t opcodes[ACCEL_MLX5_OPC_LAST]; 95 }; 96 97 struct accel_mlx5_module { 98 struct spdk_accel_module_if module; 99 struct accel_mlx5_stats stats; 100 struct spdk_spinlock lock; 101 struct accel_mlx5_dev_ctx *dev_ctxs; 102 uint32_t num_ctxs; 103 struct accel_mlx5_attr attr; 104 char **allowed_devs; 105 size_t allowed_devs_count; 106 bool initialized; 107 bool enabled; 108 bool crypto_supported; 109 bool crc32c_supported; 110 }; 111 112 struct accel_mlx5_sge { 113 uint32_t src_sge_count; 114 uint32_t dst_sge_count; 115 struct ibv_sge src_sge[ACCEL_MLX5_MAX_SGE]; 116 struct ibv_sge dst_sge[ACCEL_MLX5_MAX_SGE]; 117 }; 118 119 struct accel_mlx5_iov_sgl { 120 struct iovec *iov; 121 uint32_t iovcnt; 122 uint32_t iov_offset; 123 }; 124 125 struct accel_mlx5_psv_wrapper { 126 uint32_t psv_index; 127 struct { 128 uint32_t error : 1; 129 uint32_t reserved : 31; 130 } bits; 131 /* mlx5 engine requires DMAable memory, use this member to copy user's crc value since we don't know which 132 * memory it is in */ 133 uint32_t crc; 134 uint32_t crc_lkey; 135 }; 136 137 struct accel_mlx5_task { 138 struct spdk_accel_task base; 139 struct accel_mlx5_iov_sgl src; 140 struct accel_mlx5_iov_sgl dst; 141 struct accel_mlx5_qp *qp; 142 STAILQ_ENTRY(accel_mlx5_task) link; 143 uint16_t num_reqs; 144 uint16_t num_completed_reqs; 145 uint16_t num_submitted_reqs; 146 uint16_t num_ops; /* number of allocated mkeys or number of operations */ 147 uint16_t num_wrs; /* Number of outstanding operations which consume qp slot */ 148 union { 149 struct { 150 uint16_t blocks_per_req; 151 uint16_t num_processed_blocks; 152 uint16_t num_blocks; 153 }; 154 struct { 155 struct accel_mlx5_psv_wrapper *psv; 156 uint32_t last_umr_len; 157 uint8_t last_mkey_idx; 158 }; 159 }; 160 union { 161 uint16_t raw; 162 struct { 163 uint16_t inplace : 1; 164 uint16_t driver_seq : 1; 165 uint16_t needs_data_transfer : 1; 166 uint16_t enc_order : 2; 167 uint16_t mlx5_opcode: 4; 168 }; 169 }; 170 /* Keep this array last since not all elements might be accessed, this reduces amount of data to be 171 * cached */ 172 struct spdk_mlx5_mkey_pool_obj *mkeys[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 173 }; 174 175 SPDK_STATIC_ASSERT(ACCEL_MLX5_MAX_MKEYS_IN_TASK <= UINT8_MAX, "uint8_t is used to iterate mkeys"); 176 177 struct accel_mlx5_qp { 178 struct spdk_mlx5_qp *qp; 179 struct ibv_qp *verbs_qp; 180 struct accel_mlx5_dev *dev; 181 /* tasks submitted to HW. We can't complete a task even in error case until we reap completions for all 182 * submitted requests */ 183 STAILQ_HEAD(, accel_mlx5_task) in_hw; 184 uint16_t wrs_submitted; 185 uint16_t wrs_max; 186 bool ring_db; 187 bool recovering; 188 struct spdk_poller *recover_poller; 189 }; 190 191 struct accel_mlx5_dev { 192 struct accel_mlx5_qp qp; 193 struct spdk_mlx5_cq *cq; 194 struct spdk_mlx5_mkey_pool *mkeys; 195 struct spdk_mlx5_mkey_pool *crypto_mkeys; 196 struct spdk_mlx5_mkey_pool *sig_mkeys; 197 struct spdk_rdma_utils_mem_map *mmap; 198 struct accel_mlx5_dev_ctx *dev_ctx; 199 struct spdk_io_channel *ch; 200 uint16_t wrs_in_cq; 201 uint16_t wrs_in_cq_max; 202 uint16_t crypto_split_blocks; 203 bool crypto_multi_block; 204 /* Pending tasks waiting for requests resources */ 205 STAILQ_HEAD(, accel_mlx5_task) nomem; 206 TAILQ_ENTRY(accel_mlx5_dev) link; 207 struct accel_mlx5_stats stats; 208 }; 209 210 struct accel_mlx5_io_channel { 211 struct accel_mlx5_dev *devs; 212 struct spdk_poller *poller; 213 uint16_t num_devs; 214 /* Index in \b devs to be used for operations in round-robin way */ 215 uint16_t dev_idx; 216 bool poller_handler_registered; 217 }; 218 219 struct accel_mlx5_task_operations { 220 int (*init)(struct accel_mlx5_task *task); 221 int (*process)(struct accel_mlx5_task *task); 222 int (*cont)(struct accel_mlx5_task *task); 223 void (*complete)(struct accel_mlx5_task *task); 224 }; 225 226 struct accel_mlx5_psv_pool_iter_cb_args { 227 struct accel_mlx5_dev_ctx *dev; 228 struct spdk_rdma_utils_mem_map *map; 229 int rc; 230 }; 231 232 struct accel_mlx5_dump_stats_ctx { 233 struct accel_mlx5_stats total; 234 struct spdk_json_write_ctx *w; 235 enum accel_mlx5_dump_state_level level; 236 accel_mlx5_dump_stat_done_cb cb; 237 void *ctx; 238 }; 239 240 static struct accel_mlx5_module g_accel_mlx5; 241 static struct spdk_accel_driver g_accel_mlx5_driver; 242 243 static inline int accel_mlx5_execute_sequence(struct spdk_io_channel *ch, 244 struct spdk_accel_sequence *seq); 245 static inline void accel_mlx5_task_complete(struct accel_mlx5_task *mlx5_task); 246 247 static inline void 248 accel_mlx5_iov_sgl_init(struct accel_mlx5_iov_sgl *s, struct iovec *iov, uint32_t iovcnt) 249 { 250 s->iov = iov; 251 s->iovcnt = iovcnt; 252 s->iov_offset = 0; 253 } 254 255 static inline void 256 accel_mlx5_iov_sgl_advance(struct accel_mlx5_iov_sgl *s, uint32_t step) 257 { 258 s->iov_offset += step; 259 while (s->iovcnt > 0) { 260 assert(s->iov != NULL); 261 if (s->iov_offset < s->iov->iov_len) { 262 break; 263 } 264 265 s->iov_offset -= s->iov->iov_len; 266 s->iov++; 267 s->iovcnt--; 268 } 269 } 270 271 static inline void 272 accel_mlx5_iov_sgl_unwind(struct accel_mlx5_iov_sgl *s, uint32_t max_iovs, uint32_t step) 273 { 274 SPDK_DEBUGLOG(accel_mlx5, "iov %p, iovcnt %u, max %u, offset %u, step %u\n", s->iov, s->iovcnt, 275 max_iovs, s->iov_offset, step); 276 while (s->iovcnt <= max_iovs) { 277 assert(s->iov != NULL); 278 if (s->iov_offset >= step) { 279 s->iov_offset -= step; 280 SPDK_DEBUGLOG(accel_mlx5, "\tEND, iov %p, iovcnt %u, offset %u\n", s->iov, s->iovcnt, 281 s->iov_offset); 282 return; 283 } 284 step -= s->iov_offset; 285 s->iov--; 286 s->iovcnt++; 287 s->iov_offset = s->iov->iov_len; 288 SPDK_DEBUGLOG(accel_mlx5, "\tiov %p, iovcnt %u, offset %u, step %u\n", s->iov, s->iovcnt, 289 s->iov_offset, step); 290 } 291 292 SPDK_ERRLOG("Can't unwind iovs, remaining %u\n", step); 293 assert(0); 294 } 295 296 static inline int 297 accel_mlx5_sge_unwind(struct ibv_sge *sge, uint32_t sge_count, uint32_t step) 298 { 299 int i; 300 301 assert(sge_count > 0); 302 SPDK_DEBUGLOG(accel_mlx5, "sge %p, count %u, step %u\n", sge, sge_count, step); 303 for (i = (int)sge_count - 1; i >= 0; i--) { 304 if (sge[i].length > step) { 305 sge[i].length -= step; 306 SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step); 307 return (int)i + 1; 308 } 309 SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step); 310 step -= sge[i].length; 311 } 312 313 SPDK_ERRLOG("Can't unwind sge, remaining %u\n", step); 314 assert(step == 0); 315 316 return 0; 317 } 318 319 static inline void 320 accel_mlx5_crypto_task_complete(struct accel_mlx5_task *task) 321 { 322 struct accel_mlx5_dev *dev = task->qp->dev; 323 324 assert(task->num_ops); 325 spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops); 326 spdk_accel_task_complete(&task->base, 0); 327 } 328 329 static inline void 330 accel_mlx5_task_fail(struct accel_mlx5_task *task, int rc) 331 { 332 struct accel_mlx5_dev *dev = task->qp->dev; 333 struct spdk_accel_task *next; 334 struct spdk_accel_sequence *seq; 335 bool driver_seq; 336 337 assert(task->num_reqs == task->num_completed_reqs); 338 SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->base.op_code, rc); 339 340 if (task->num_ops) { 341 if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO || task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO_MKEY) { 342 spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops); 343 } 344 if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRC32C) { 345 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops); 346 spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv); 347 } 348 if (task->mlx5_opcode == ACCEL_MLX5_OPC_MKEY) { 349 spdk_mlx5_mkey_pool_put_bulk(dev->mkeys, task->mkeys, task->num_ops); 350 } 351 } 352 next = spdk_accel_sequence_next_task(&task->base); 353 seq = task->base.seq; 354 driver_seq = task->driver_seq; 355 356 assert(task->num_reqs == task->num_completed_reqs); 357 SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->mlx5_opcode, rc); 358 spdk_accel_task_complete(&task->base, rc); 359 360 if (driver_seq) { 361 struct spdk_io_channel *ch = task->qp->dev->ch; 362 363 assert(seq); 364 if (next) { 365 accel_mlx5_execute_sequence(ch, seq); 366 } else { 367 spdk_accel_sequence_continue(seq); 368 } 369 } 370 } 371 372 static int 373 accel_mlx5_translate_addr(void *addr, size_t size, struct spdk_memory_domain *domain, 374 void *domain_ctx, struct accel_mlx5_dev *dev, struct ibv_sge *sge) 375 { 376 struct spdk_rdma_utils_memory_translation map_translation; 377 struct spdk_memory_domain_translation_result domain_translation; 378 struct spdk_memory_domain_translation_ctx local_ctx; 379 int rc; 380 381 if (domain) { 382 domain_translation.size = sizeof(struct spdk_memory_domain_translation_result); 383 local_ctx.size = sizeof(local_ctx); 384 local_ctx.rdma.ibv_qp = dev->qp.verbs_qp; 385 rc = spdk_memory_domain_translate_data(domain, domain_ctx, dev->dev_ctx->domain, 386 &local_ctx, addr, size, &domain_translation); 387 if (spdk_unlikely(rc || domain_translation.iov_count != 1)) { 388 SPDK_ERRLOG("Memory domain translation failed, addr %p, length %zu, iovcnt %u\n", addr, size, 389 domain_translation.iov_count); 390 if (rc == 0) { 391 rc = -EINVAL; 392 } 393 394 return rc; 395 } 396 sge->lkey = domain_translation.rdma.lkey; 397 sge->addr = (uint64_t) domain_translation.iov.iov_base; 398 sge->length = domain_translation.iov.iov_len; 399 } else { 400 rc = spdk_rdma_utils_get_translation(dev->mmap, addr, size, 401 &map_translation); 402 if (spdk_unlikely(rc)) { 403 SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", addr, size); 404 return rc; 405 } 406 sge->lkey = spdk_rdma_utils_memory_translation_get_lkey(&map_translation); 407 sge->addr = (uint64_t)addr; 408 sge->length = size; 409 } 410 411 return 0; 412 } 413 414 static inline int 415 accel_mlx5_fill_block_sge(struct accel_mlx5_dev *dev, struct ibv_sge *sge, 416 struct accel_mlx5_iov_sgl *iovs, uint32_t len, uint32_t *_remaining, 417 struct spdk_memory_domain *domain, void *domain_ctx) 418 { 419 void *addr; 420 uint32_t remaining = len; 421 uint32_t size; 422 int i = 0; 423 int rc; 424 425 while (remaining && i < (int)ACCEL_MLX5_MAX_SGE) { 426 size = spdk_min(remaining, iovs->iov->iov_len - iovs->iov_offset); 427 addr = (void *)iovs->iov->iov_base + iovs->iov_offset; 428 rc = accel_mlx5_translate_addr(addr, size, domain, domain_ctx, dev, &sge[i]); 429 if (spdk_unlikely(rc)) { 430 return rc; 431 } 432 SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d]: lkey %u, len %u, addr %"PRIx64"\n", i, sge[i].lkey, 433 sge[i].length, sge[i].addr); 434 accel_mlx5_iov_sgl_advance(iovs, size); 435 i++; 436 assert(remaining >= size); 437 remaining -= size; 438 } 439 *_remaining = remaining; 440 441 return i; 442 } 443 444 static inline bool 445 accel_mlx5_compare_iovs(struct iovec *v1, struct iovec *v2, uint32_t iovcnt) 446 { 447 return memcmp(v1, v2, sizeof(*v1) * iovcnt) == 0; 448 } 449 450 static inline uint16_t 451 accel_mlx5_dev_get_available_slots(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp) 452 { 453 assert(qp->wrs_max >= qp->wrs_submitted); 454 assert(dev->wrs_in_cq_max >= dev->wrs_in_cq); 455 456 /* Each time we produce only 1 CQE, so we need 1 CQ slot */ 457 if (spdk_unlikely(dev->wrs_in_cq == dev->wrs_in_cq_max)) { 458 return 0; 459 } 460 461 return qp->wrs_max - qp->wrs_submitted; 462 } 463 464 static inline uint32_t 465 accel_mlx5_task_alloc_mkeys(struct accel_mlx5_task *task, struct spdk_mlx5_mkey_pool *pool) 466 { 467 uint32_t num_ops; 468 int rc; 469 470 assert(task->num_reqs > task->num_completed_reqs); 471 num_ops = task->num_reqs - task->num_completed_reqs; 472 num_ops = spdk_min(num_ops, ACCEL_MLX5_MAX_MKEYS_IN_TASK); 473 if (!num_ops) { 474 return 0; 475 } 476 rc = spdk_mlx5_mkey_pool_get_bulk(pool, task->mkeys, num_ops); 477 if (spdk_unlikely(rc)) { 478 return 0; 479 } 480 assert(num_ops <= UINT16_MAX); 481 task->num_ops = num_ops; 482 483 return num_ops; 484 } 485 486 static inline uint8_t 487 bs_to_bs_selector(uint32_t bs) 488 { 489 switch (bs) { 490 case 512: 491 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_512; 492 case 520: 493 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_520; 494 case 4096: 495 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4096; 496 case 4160: 497 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4160; 498 default: 499 return SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED; 500 } 501 } 502 503 static inline int 504 accel_mlx5_configure_crypto_umr(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge, 505 uint32_t mkey, uint32_t num_blocks, struct spdk_mlx5_crypto_dek_data *dek_data, 506 uint64_t wr_id, uint32_t flags) 507 { 508 struct spdk_mlx5_umr_crypto_attr cattr; 509 struct spdk_mlx5_umr_attr umr_attr; 510 struct accel_mlx5_qp *qp = mlx5_task->qp; 511 struct accel_mlx5_dev *dev = qp->dev; 512 struct spdk_accel_task *task = &mlx5_task->base; 513 uint32_t length, remaining = 0, block_size = task->block_size; 514 int rc; 515 516 length = num_blocks * block_size; 517 SPDK_DEBUGLOG(accel_mlx5, "task %p, domain %p, len %u, blocks %u\n", task, task->src_domain, length, 518 num_blocks); 519 rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src, length, &remaining, 520 task->src_domain, task->src_domain_ctx); 521 if (spdk_unlikely(rc <= 0)) { 522 if (rc == 0) { 523 rc = -EINVAL; 524 } 525 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 526 return rc; 527 } 528 sge->src_sge_count = rc; 529 if (spdk_unlikely(remaining)) { 530 uint32_t new_len = length - remaining; 531 uint32_t aligned_len, updated_num_blocks; 532 533 SPDK_DEBUGLOG(accel_mlx5, "Incorrect src iovs, handled %u out of %u bytes\n", new_len, length); 534 if (new_len < block_size) { 535 /* We need to process at least 1 block. If buffer is too fragmented, we can't do 536 * anything */ 537 return -ERANGE; 538 } 539 540 /* Regular integer division, we need to round down to prev block size */ 541 updated_num_blocks = new_len / block_size; 542 assert(updated_num_blocks); 543 assert(updated_num_blocks < num_blocks); 544 aligned_len = updated_num_blocks * block_size; 545 546 if (aligned_len < new_len) { 547 uint32_t dt = new_len - aligned_len; 548 549 /* We can't process part of block, need to unwind src iov_sgl and sge to the 550 * prev block boundary */ 551 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt); 552 accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt); 553 sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt); 554 if (!sge->src_sge_count) { 555 return -ERANGE; 556 } 557 } 558 SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len); 559 length = aligned_len; 560 num_blocks = updated_num_blocks; 561 } 562 563 cattr.xts_iv = task->iv + mlx5_task->num_processed_blocks; 564 cattr.keytag = 0; 565 cattr.dek_obj_id = dek_data->dek_obj_id; 566 cattr.tweak_mode = dek_data->tweak_mode; 567 cattr.enc_order = mlx5_task->enc_order; 568 cattr.bs_selector = bs_to_bs_selector(mlx5_task->base.block_size); 569 if (spdk_unlikely(cattr.bs_selector == SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED)) { 570 SPDK_ERRLOG("unsupported block size %u\n", mlx5_task->base.block_size); 571 return -EINVAL; 572 } 573 umr_attr.mkey = mkey; 574 umr_attr.sge = sge->src_sge; 575 576 if (!mlx5_task->inplace) { 577 SPDK_DEBUGLOG(accel_mlx5, "task %p, dst sge, domain %p, len %u\n", task, task->dst_domain, length); 578 rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, length, &remaining, 579 task->dst_domain, task->dst_domain_ctx); 580 if (spdk_unlikely(rc <= 0)) { 581 if (rc == 0) { 582 rc = -EINVAL; 583 } 584 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 585 return rc; 586 } 587 sge->dst_sge_count = rc; 588 if (spdk_unlikely(remaining)) { 589 uint32_t new_len = length - remaining; 590 uint32_t aligned_len, updated_num_blocks, dt; 591 592 SPDK_DEBUGLOG(accel_mlx5, "Incorrect dst iovs, handled %u out of %u bytes\n", new_len, length); 593 if (new_len < block_size) { 594 /* We need to process at least 1 block. If buffer is too fragmented, we can't do 595 * anything */ 596 return -ERANGE; 597 } 598 599 /* Regular integer division, we need to round down to prev block size */ 600 updated_num_blocks = new_len / block_size; 601 assert(updated_num_blocks); 602 assert(updated_num_blocks < num_blocks); 603 aligned_len = updated_num_blocks * block_size; 604 605 if (aligned_len < new_len) { 606 dt = new_len - aligned_len; 607 assert(dt > 0 && dt < length); 608 /* We can't process part of block, need to unwind src and dst iov_sgl and sge to the 609 * prev block boundary */ 610 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind dst sge for %u bytes\n", task, dt); 611 accel_mlx5_iov_sgl_unwind(&mlx5_task->dst, task->d.iovcnt, dt); 612 sge->dst_sge_count = accel_mlx5_sge_unwind(sge->dst_sge, sge->dst_sge_count, dt); 613 assert(sge->dst_sge_count > 0 && sge->dst_sge_count <= ACCEL_MLX5_MAX_SGE); 614 if (!sge->dst_sge_count) { 615 return -ERANGE; 616 } 617 } 618 assert(length > aligned_len); 619 dt = length - aligned_len; 620 SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt); 621 /* The same for src iov_sgl and sge. In worst case we can unwind SRC 2 times */ 622 accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt); 623 sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt); 624 assert(sge->src_sge_count > 0 && sge->src_sge_count <= ACCEL_MLX5_MAX_SGE); 625 if (!sge->src_sge_count) { 626 return -ERANGE; 627 } 628 SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len); 629 length = aligned_len; 630 num_blocks = updated_num_blocks; 631 } 632 } 633 634 SPDK_DEBUGLOG(accel_mlx5, 635 "task %p: bs %u, iv %"PRIu64", enc_on_tx %d, tweak_mode %d, len %u, mkey %x, blocks %u\n", 636 mlx5_task, task->block_size, cattr.xts_iv, mlx5_task->enc_order, cattr.tweak_mode, length, mkey, 637 num_blocks); 638 639 umr_attr.sge_count = sge->src_sge_count; 640 umr_attr.umr_len = length; 641 assert((uint32_t)mlx5_task->num_processed_blocks + num_blocks <= UINT16_MAX); 642 mlx5_task->num_processed_blocks += num_blocks; 643 644 rc = spdk_mlx5_umr_configure_crypto(qp->qp, &umr_attr, &cattr, wr_id, flags); 645 646 return rc; 647 } 648 649 static inline int 650 accel_mlx5_crypto_task_process(struct accel_mlx5_task *mlx5_task) 651 { 652 struct accel_mlx5_sge sges[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 653 struct spdk_mlx5_crypto_dek_data dek_data; 654 struct accel_mlx5_qp *qp = mlx5_task->qp; 655 struct accel_mlx5_dev *dev = qp->dev; 656 /* First RDMA after UMR must have a SMALL_FENCE */ 657 uint32_t first_rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE; 658 uint16_t num_blocks; 659 uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs, 660 mlx5_task->num_ops); 661 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 662 uint16_t i; 663 int rc; 664 665 assert(qp_slot > 1); 666 num_ops = spdk_min(num_ops, qp_slot >> 1); 667 if (spdk_unlikely(!num_ops)) { 668 return -EINVAL; 669 } 670 671 rc = spdk_mlx5_crypto_get_dek_data(mlx5_task->base.crypto_key->priv, dev->dev_ctx->pd, &dek_data); 672 if (spdk_unlikely(rc)) { 673 return rc; 674 } 675 676 mlx5_task->num_wrs = 0; 677 SPDK_DEBUGLOG(accel_mlx5, "begin, task, %p, reqs: total %u, submitted %u, completed %u\n", 678 mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 679 for (i = 0; i < num_ops; i++) { 680 if (mlx5_task->num_submitted_reqs + i + 1 == mlx5_task->num_reqs) { 681 /* Last request may consume less than calculated if crypto_multi_block is true */ 682 assert(mlx5_task->num_blocks > mlx5_task->num_submitted_reqs); 683 num_blocks = mlx5_task->num_blocks - mlx5_task->num_processed_blocks; 684 } else { 685 num_blocks = mlx5_task->blocks_per_req; 686 } 687 688 rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sges[i], mlx5_task->mkeys[i]->mkey, num_blocks, 689 &dek_data, 0, 0); 690 if (spdk_unlikely(rc)) { 691 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 692 return rc; 693 } 694 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 695 dev->stats.crypto_umrs++; 696 } 697 698 /* Loop `num_ops - 1` for easy flags handling */ 699 for (i = 0; i < num_ops - 1; i++) { 700 /* UMR is used as a destination for RDMA_READ - from UMR to sge */ 701 if (mlx5_task->inplace) { 702 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0, 703 mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence); 704 } else { 705 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0, 706 mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence); 707 } 708 if (spdk_unlikely(rc)) { 709 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 710 return rc; 711 } 712 713 first_rdma_fence = 0; 714 assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs); 715 assert(mlx5_task->num_submitted_reqs < UINT16_MAX); 716 mlx5_task->num_submitted_reqs++; 717 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 718 dev->stats.rdma_reads++; 719 } 720 721 if (mlx5_task->inplace) { 722 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0, 723 mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 724 } else { 725 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0, 726 mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 727 } 728 if (spdk_unlikely(rc)) { 729 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 730 return rc; 731 } 732 733 assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs); 734 assert(mlx5_task->num_submitted_reqs < UINT16_MAX); 735 mlx5_task->num_submitted_reqs++; 736 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 737 dev->stats.rdma_reads++; 738 STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link); 739 740 if (spdk_unlikely(mlx5_task->num_submitted_reqs == mlx5_task->num_reqs && 741 mlx5_task->num_blocks > mlx5_task->num_processed_blocks)) { 742 /* We hit "out of sge 743 * entries" case with highly fragmented payload. In that case 744 * accel_mlx5_configure_crypto_umr function handled fewer data blocks than expected 745 * That means we need at least 1 more request to complete this task, this request will be 746 * executed once all submitted ones are completed */ 747 SPDK_DEBUGLOG(accel_mlx5, "task %p, processed %u/%u blocks, add extra req\n", mlx5_task, 748 mlx5_task->num_processed_blocks, mlx5_task->num_blocks); 749 mlx5_task->num_reqs++; 750 } 751 752 SPDK_DEBUGLOG(accel_mlx5, "end, task, %p, reqs: total %u, submitted %u, completed %u\n", mlx5_task, 753 mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 754 755 return 0; 756 } 757 758 static inline int 759 accel_mlx5_crypto_task_continue(struct accel_mlx5_task *task) 760 { 761 struct accel_mlx5_qp *qp = task->qp; 762 struct accel_mlx5_dev *dev = qp->dev; 763 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 764 765 assert(task->num_reqs > task->num_completed_reqs); 766 if (task->num_ops == 0) { 767 /* No mkeys allocated, try to allocate now */ 768 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task, dev->crypto_mkeys))) { 769 /* Pool is empty, queue this task */ 770 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 771 dev->stats.nomem_mkey++; 772 return -ENOMEM; 773 } 774 } 775 /* We need to post at least 1 UMR and 1 RDMA operation */ 776 if (spdk_unlikely(qp_slot < 2)) { 777 /* QP is full, queue this task */ 778 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 779 task->qp->dev->stats.nomem_qdepth++; 780 return -ENOMEM; 781 } 782 783 return accel_mlx5_crypto_task_process(task); 784 } 785 786 static inline int 787 accel_mlx5_crypto_task_init(struct accel_mlx5_task *mlx5_task) 788 { 789 struct spdk_accel_task *task = &mlx5_task->base; 790 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 791 uint64_t src_nbytes = task->nbytes; 792 #ifdef DEBUG 793 uint64_t dst_nbytes; 794 uint32_t i; 795 #endif 796 bool crypto_key_ok; 797 798 crypto_key_ok = (task->crypto_key && task->crypto_key->module_if == &g_accel_mlx5.module && 799 task->crypto_key->priv); 800 if (spdk_unlikely((task->nbytes % mlx5_task->base.block_size != 0) || !crypto_key_ok)) { 801 if (crypto_key_ok) { 802 SPDK_ERRLOG("src length %"PRIu64" is not a multiple of the block size %u\n", task->nbytes, 803 mlx5_task->base.block_size); 804 } else { 805 SPDK_ERRLOG("Wrong crypto key provided\n"); 806 } 807 return -EINVAL; 808 } 809 810 assert(src_nbytes / mlx5_task->base.block_size <= UINT16_MAX); 811 mlx5_task->num_blocks = src_nbytes / mlx5_task->base.block_size; 812 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 813 if (task->d.iovcnt == 0 || (task->d.iovcnt == task->s.iovcnt && 814 accel_mlx5_compare_iovs(task->d.iovs, task->s.iovs, task->s.iovcnt))) { 815 mlx5_task->inplace = 1; 816 } else { 817 #ifdef DEBUG 818 dst_nbytes = 0; 819 for (i = 0; i < task->d.iovcnt; i++) { 820 dst_nbytes += task->d.iovs[i].iov_len; 821 } 822 823 if (spdk_unlikely(src_nbytes != dst_nbytes)) { 824 return -EINVAL; 825 } 826 #endif 827 mlx5_task->inplace = 0; 828 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 829 } 830 831 if (dev->crypto_multi_block) { 832 if (dev->crypto_split_blocks) { 833 assert(SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks) <= UINT16_MAX); 834 mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks); 835 /* Last req may consume less blocks */ 836 mlx5_task->blocks_per_req = spdk_min(mlx5_task->num_blocks, dev->crypto_split_blocks); 837 } else { 838 if (task->s.iovcnt > ACCEL_MLX5_MAX_SGE || task->d.iovcnt > ACCEL_MLX5_MAX_SGE) { 839 uint32_t max_sge_count = spdk_max(task->s.iovcnt, task->d.iovcnt); 840 841 assert(SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE) <= UINT16_MAX); 842 mlx5_task->num_reqs = SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE); 843 mlx5_task->blocks_per_req = SPDK_CEIL_DIV(mlx5_task->num_blocks, mlx5_task->num_reqs); 844 } else { 845 mlx5_task->num_reqs = 1; 846 mlx5_task->blocks_per_req = mlx5_task->num_blocks; 847 } 848 } 849 } else { 850 mlx5_task->num_reqs = mlx5_task->num_blocks; 851 mlx5_task->blocks_per_req = 1; 852 } 853 854 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(mlx5_task, dev->crypto_mkeys))) { 855 /* Pool is empty, queue this task */ 856 SPDK_DEBUGLOG(accel_mlx5, "no reqs in pool, dev %s\n", dev->dev_ctx->context->device->name); 857 dev->stats.nomem_mkey++; 858 return -ENOMEM; 859 } 860 if (spdk_unlikely(accel_mlx5_dev_get_available_slots(dev, &dev->qp) < 2)) { 861 /* Queue is full, queue this task */ 862 SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", dev->dev_ctx->context->device->name, 863 mlx5_task->qp); 864 dev->stats.nomem_qdepth++; 865 return -ENOMEM; 866 } 867 868 SPDK_DEBUGLOG(accel_mlx5, "task %p, src_iovs %u, dst_iovs %u, num_reqs %u, " 869 "blocks/req %u, blocks %u, inplace %d\n", task, task->s.iovcnt, task->d.iovcnt, 870 mlx5_task->num_reqs, mlx5_task->blocks_per_req, mlx5_task->num_blocks, mlx5_task->inplace); 871 872 return 0; 873 } 874 875 static inline void 876 accel_mlx5_copy_task_complete(struct accel_mlx5_task *mlx5_task) 877 { 878 spdk_accel_task_complete(&mlx5_task->base, 0); 879 } 880 881 static inline int 882 accel_mlx5_copy_task_process_one(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_qp *qp, 883 uint64_t wrid, uint32_t fence) 884 { 885 struct spdk_accel_task *task = &mlx5_task->base; 886 struct accel_mlx5_sge sge; 887 uint32_t remaining = 0; 888 uint32_t dst_len; 889 int rc; 890 891 /* Limit one RDMA_WRITE by length of dst buffer. Not all src buffers may fit into one dst buffer due to 892 * limitation on ACCEL_MLX5_MAX_SGE. If this is the case then remaining is not zero */ 893 assert(mlx5_task->dst.iov->iov_len > mlx5_task->dst.iov_offset); 894 dst_len = mlx5_task->dst.iov->iov_len - mlx5_task->dst.iov_offset; 895 rc = accel_mlx5_fill_block_sge(qp->dev, sge.src_sge, &mlx5_task->src, dst_len, &remaining, 896 task->src_domain, task->src_domain_ctx); 897 if (spdk_unlikely(rc <= 0)) { 898 if (rc == 0) { 899 rc = -EINVAL; 900 } 901 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 902 return rc; 903 } 904 sge.src_sge_count = rc; 905 assert(dst_len > remaining); 906 dst_len -= remaining; 907 908 rc = accel_mlx5_fill_block_sge(qp->dev, sge.dst_sge, &mlx5_task->dst, dst_len, &remaining, 909 task->dst_domain, task->dst_domain_ctx); 910 if (spdk_unlikely(rc != 1)) { 911 /* We use single dst entry, any result other than 1 is an error */ 912 if (rc == 0) { 913 rc = -EINVAL; 914 } 915 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 916 return rc; 917 } 918 if (spdk_unlikely(remaining)) { 919 SPDK_ERRLOG("Incorrect dst length, remaining %u\n", remaining); 920 assert(0); 921 return -EINVAL; 922 } 923 924 rc = spdk_mlx5_qp_rdma_write(mlx5_task->qp->qp, sge.src_sge, sge.src_sge_count, 925 sge.dst_sge[0].addr, sge.dst_sge[0].lkey, wrid, fence); 926 if (spdk_unlikely(rc)) { 927 SPDK_ERRLOG("new RDMA WRITE failed with %d\n", rc); 928 return rc; 929 } 930 qp->dev->stats.rdma_writes++; 931 932 return 0; 933 } 934 935 static inline int 936 accel_mlx5_copy_task_process(struct accel_mlx5_task *mlx5_task) 937 { 938 939 struct accel_mlx5_qp *qp = mlx5_task->qp; 940 struct accel_mlx5_dev *dev = qp->dev; 941 uint16_t i; 942 int rc; 943 944 mlx5_task->num_wrs = 0; 945 assert(mlx5_task->num_reqs > 0); 946 assert(mlx5_task->num_ops > 0); 947 948 /* Handle n-1 reqs in order to simplify wrid and fence handling */ 949 for (i = 0; i < mlx5_task->num_ops - 1; i++) { 950 rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, 0, 0); 951 if (spdk_unlikely(rc)) { 952 return rc; 953 } 954 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 955 mlx5_task->num_submitted_reqs++; 956 } 957 958 rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, (uint64_t)mlx5_task, 959 SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 960 if (spdk_unlikely(rc)) { 961 return rc; 962 } 963 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 964 mlx5_task->num_submitted_reqs++; 965 STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link); 966 967 SPDK_DEBUGLOG(accel_mlx5, "end, copy task, %p\n", mlx5_task); 968 969 return 0; 970 } 971 972 static inline int 973 accel_mlx5_copy_task_continue(struct accel_mlx5_task *task) 974 { 975 struct accel_mlx5_qp *qp = task->qp; 976 struct accel_mlx5_dev *dev = qp->dev; 977 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 978 979 task->num_ops = spdk_min(qp_slot, task->num_reqs - task->num_completed_reqs); 980 if (spdk_unlikely(task->num_ops == 0)) { 981 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 982 dev->stats.nomem_qdepth++; 983 return -ENOMEM; 984 } 985 return accel_mlx5_copy_task_process(task); 986 } 987 988 static inline uint32_t 989 accel_mlx5_get_copy_task_count(struct iovec *src_iov, uint32_t src_iovcnt, 990 struct iovec *dst_iov, uint32_t dst_iovcnt) 991 { 992 uint32_t src = 0; 993 uint32_t dst = 0; 994 uint64_t src_offset = 0; 995 uint64_t dst_offset = 0; 996 uint32_t num_ops = 0; 997 uint32_t src_sge_count = 0; 998 999 while (src < src_iovcnt && dst < dst_iovcnt) { 1000 uint64_t src_len = src_iov[src].iov_len - src_offset; 1001 uint64_t dst_len = dst_iov[dst].iov_len - dst_offset; 1002 1003 if (dst_len < src_len) { 1004 dst_offset = 0; 1005 src_offset += dst_len; 1006 dst++; 1007 num_ops++; 1008 src_sge_count = 0; 1009 } else if (src_len < dst_len) { 1010 dst_offset += src_len; 1011 src_offset = 0; 1012 src++; 1013 if (++src_sge_count >= ACCEL_MLX5_MAX_SGE) { 1014 num_ops++; 1015 src_sge_count = 0; 1016 } 1017 } else { 1018 dst_offset = 0; 1019 src_offset = 0; 1020 dst++; 1021 src++; 1022 num_ops++; 1023 src_sge_count = 0; 1024 } 1025 } 1026 1027 assert(src == src_iovcnt); 1028 assert(dst == dst_iovcnt); 1029 assert(src_offset == 0); 1030 assert(dst_offset == 0); 1031 return num_ops; 1032 } 1033 1034 static inline int 1035 accel_mlx5_copy_task_init(struct accel_mlx5_task *mlx5_task) 1036 { 1037 struct spdk_accel_task *task = &mlx5_task->base; 1038 struct accel_mlx5_qp *qp = mlx5_task->qp; 1039 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp); 1040 1041 if (spdk_likely(task->s.iovcnt <= ACCEL_MLX5_MAX_SGE)) { 1042 mlx5_task->num_reqs = task->d.iovcnt; 1043 } else if (task->d.iovcnt == 1) { 1044 mlx5_task->num_reqs = SPDK_CEIL_DIV(task->s.iovcnt, ACCEL_MLX5_MAX_SGE); 1045 } else { 1046 mlx5_task->num_reqs = accel_mlx5_get_copy_task_count(task->s.iovs, task->s.iovcnt, 1047 task->d.iovs, task->d.iovcnt); 1048 } 1049 mlx5_task->inplace = 0; 1050 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 1051 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 1052 mlx5_task->num_ops = spdk_min(qp_slot, mlx5_task->num_reqs); 1053 if (spdk_unlikely(!mlx5_task->num_ops)) { 1054 qp->dev->stats.nomem_qdepth++; 1055 return -ENOMEM; 1056 } 1057 SPDK_DEBUGLOG(accel_mlx5, "copy task num_reqs %u, num_ops %u\n", mlx5_task->num_reqs, 1058 mlx5_task->num_ops); 1059 1060 return 0; 1061 } 1062 1063 static inline uint32_t 1064 accel_mlx5_advance_iovec(struct iovec *iov, uint32_t iovcnt, size_t *iov_offset, size_t *len) 1065 { 1066 uint32_t i; 1067 size_t iov_len; 1068 1069 for (i = 0; *len != 0 && i < iovcnt; i++) { 1070 iov_len = iov[i].iov_len - *iov_offset; 1071 1072 if (iov_len < *len) { 1073 *iov_offset = 0; 1074 *len -= iov_len; 1075 continue; 1076 } 1077 if (iov_len == *len) { 1078 *iov_offset = 0; 1079 i++; 1080 } else { /* iov_len > *len */ 1081 *iov_offset += *len; 1082 } 1083 *len = 0; 1084 break; 1085 } 1086 1087 return i; 1088 } 1089 1090 static inline void 1091 accel_mlx5_crc_task_complete(struct accel_mlx5_task *mlx5_task) 1092 { 1093 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 1094 1095 *mlx5_task->base.crc_dst = mlx5_task->psv->crc ^ UINT32_MAX; 1096 /* Normal task completion without allocated mkeys is not possible */ 1097 assert(mlx5_task->num_ops); 1098 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, mlx5_task->mkeys, mlx5_task->num_ops); 1099 spdk_mempool_put(dev->dev_ctx->psv_pool, mlx5_task->psv); 1100 spdk_accel_task_complete(&mlx5_task->base, 0); 1101 } 1102 1103 static inline int 1104 accel_mlx5_crc_task_configure_umr(struct accel_mlx5_task *mlx5_task, struct ibv_sge *sge, 1105 uint32_t sge_count, struct spdk_mlx5_mkey_pool_obj *mkey, 1106 enum spdk_mlx5_umr_sig_domain sig_domain, uint32_t umr_len, 1107 bool sig_init, bool sig_check_gen) 1108 { 1109 struct spdk_mlx5_umr_sig_attr sattr = { 1110 .seed = mlx5_task->base.seed ^ UINT32_MAX, 1111 .psv_index = mlx5_task->psv->psv_index, 1112 .domain = sig_domain, 1113 .sigerr_count = mkey->sig.sigerr_count, 1114 .raw_data_size = umr_len, 1115 .init = sig_init, 1116 .check_gen = sig_check_gen, 1117 }; 1118 struct spdk_mlx5_umr_attr umr_attr = { 1119 .mkey = mkey->mkey, 1120 .umr_len = umr_len, 1121 .sge_count = sge_count, 1122 .sge = sge, 1123 }; 1124 1125 return spdk_mlx5_umr_configure_sig(mlx5_task->qp->qp, &umr_attr, &sattr, 0, 0); 1126 } 1127 1128 static inline int 1129 accel_mlx5_crc_task_fill_sge(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge) 1130 { 1131 struct spdk_accel_task *task = &mlx5_task->base; 1132 struct accel_mlx5_qp *qp = mlx5_task->qp; 1133 struct accel_mlx5_dev *dev = qp->dev; 1134 uint32_t remaining; 1135 int rc; 1136 1137 rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src, task->nbytes, &remaining, 1138 task->src_domain, task->src_domain_ctx); 1139 if (spdk_unlikely(rc <= 0)) { 1140 if (rc == 0) { 1141 rc = -EINVAL; 1142 } 1143 SPDK_ERRLOG("failed set src sge, rc %d\n", rc); 1144 return rc; 1145 } 1146 assert(remaining == 0); 1147 sge->src_sge_count = rc; 1148 1149 if (!mlx5_task->inplace) { 1150 rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, task->nbytes, &remaining, 1151 task->dst_domain, task->dst_domain_ctx); 1152 if (spdk_unlikely(rc <= 0)) { 1153 if (rc == 0) { 1154 rc = -EINVAL; 1155 } 1156 SPDK_ERRLOG("failed set dst sge, rc %d\n", rc); 1157 return rc; 1158 } 1159 assert(remaining == 0); 1160 sge->dst_sge_count = rc; 1161 } 1162 1163 return 0; 1164 } 1165 1166 static inline int 1167 accel_mlx5_crc_task_process_one_req(struct accel_mlx5_task *mlx5_task) 1168 { 1169 struct accel_mlx5_sge sges; 1170 struct accel_mlx5_qp *qp = mlx5_task->qp; 1171 struct accel_mlx5_dev *dev = qp->dev; 1172 uint32_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs, 1173 mlx5_task->num_ops); 1174 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1175 uint32_t rdma_fence = SPDK_MLX5_WQE_CTRL_STRONG_ORDERING; 1176 struct ibv_sge *sge; 1177 int rc; 1178 uint16_t sge_count; 1179 1180 num_ops = spdk_min(num_ops, qp_slot >> 1); 1181 if (spdk_unlikely(!num_ops)) { 1182 return -EINVAL; 1183 } 1184 1185 mlx5_task->num_wrs = 0; 1186 /* At this moment we have as many requests as can be submitted to a qp */ 1187 rc = accel_mlx5_crc_task_fill_sge(mlx5_task, &sges); 1188 if (spdk_unlikely(rc)) { 1189 return rc; 1190 } 1191 rc = accel_mlx5_crc_task_configure_umr(mlx5_task, sges.src_sge, sges.src_sge_count, 1192 mlx5_task->mkeys[0], SPDK_MLX5_UMR_SIG_DOMAIN_WIRE, mlx5_task->base.nbytes, true, true); 1193 if (spdk_unlikely(rc)) { 1194 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 1195 return rc; 1196 } 1197 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1198 dev->stats.sig_umrs++; 1199 1200 if (mlx5_task->inplace) { 1201 sge = sges.src_sge; 1202 sge_count = sges.src_sge_count; 1203 } else { 1204 sge = sges.dst_sge; 1205 sge_count = sges.dst_sge_count; 1206 } 1207 1208 /* 1209 * Add the crc destination to the end of sges. A free entry must be available for CRC 1210 * because the task init function reserved it. 1211 */ 1212 assert(sge_count < ACCEL_MLX5_MAX_SGE); 1213 sge[sge_count].lkey = mlx5_task->psv->crc_lkey; 1214 sge[sge_count].addr = (uintptr_t)&mlx5_task->psv->crc; 1215 sge[sge_count++].length = sizeof(uint32_t); 1216 1217 if (spdk_unlikely(mlx5_task->psv->bits.error)) { 1218 rc = spdk_mlx5_qp_set_psv(qp->qp, mlx5_task->psv->psv_index, *mlx5_task->base.crc_dst, 0, 0); 1219 if (spdk_unlikely(rc)) { 1220 SPDK_ERRLOG("SET_PSV failed with %d\n", rc); 1221 return rc; 1222 } 1223 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1224 } 1225 1226 rc = spdk_mlx5_qp_rdma_read(qp->qp, sge, sge_count, 0, mlx5_task->mkeys[0]->mkey, 1227 (uint64_t)mlx5_task, rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 1228 if (spdk_unlikely(rc)) { 1229 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 1230 return rc; 1231 } 1232 mlx5_task->num_submitted_reqs++; 1233 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 1234 dev->stats.rdma_reads++; 1235 1236 return 0; 1237 } 1238 1239 static inline int 1240 accel_mlx5_crc_task_fill_umr_sge(struct accel_mlx5_qp *qp, struct ibv_sge *sge, 1241 struct accel_mlx5_iov_sgl *umr_iovs, struct spdk_memory_domain *domain, 1242 void *domain_ctx, struct accel_mlx5_iov_sgl *rdma_iovs, size_t *len) 1243 { 1244 int umr_idx = 0; 1245 int rdma_idx = 0; 1246 int umr_iovcnt = spdk_min(umr_iovs->iovcnt, (int)ACCEL_MLX5_MAX_SGE); 1247 int rdma_iovcnt = spdk_min(rdma_iovs->iovcnt, (int)ACCEL_MLX5_MAX_SGE); 1248 size_t umr_iov_offset; 1249 size_t rdma_iov_offset; 1250 size_t umr_len = 0; 1251 void *sge_addr; 1252 size_t sge_len; 1253 size_t umr_sge_len; 1254 size_t rdma_sge_len; 1255 int rc; 1256 1257 umr_iov_offset = umr_iovs->iov_offset; 1258 rdma_iov_offset = rdma_iovs->iov_offset; 1259 1260 while (umr_idx < umr_iovcnt && rdma_idx < rdma_iovcnt) { 1261 umr_sge_len = umr_iovs->iov[umr_idx].iov_len - umr_iov_offset; 1262 rdma_sge_len = rdma_iovs->iov[rdma_idx].iov_len - rdma_iov_offset; 1263 sge_addr = umr_iovs->iov[umr_idx].iov_base + umr_iov_offset; 1264 1265 if (umr_sge_len == rdma_sge_len) { 1266 rdma_idx++; 1267 umr_iov_offset = 0; 1268 rdma_iov_offset = 0; 1269 sge_len = umr_sge_len; 1270 } else if (umr_sge_len < rdma_sge_len) { 1271 umr_iov_offset = 0; 1272 rdma_iov_offset += umr_sge_len; 1273 sge_len = umr_sge_len; 1274 } else { 1275 size_t remaining; 1276 1277 remaining = umr_sge_len - rdma_sge_len; 1278 while (remaining) { 1279 rdma_idx++; 1280 if (rdma_idx == (int)ACCEL_MLX5_MAX_SGE) { 1281 break; 1282 } 1283 rdma_sge_len = rdma_iovs->iov[rdma_idx].iov_len; 1284 if (remaining == rdma_sge_len) { 1285 rdma_idx++; 1286 rdma_iov_offset = 0; 1287 umr_iov_offset = 0; 1288 remaining = 0; 1289 break; 1290 } 1291 if (remaining < rdma_sge_len) { 1292 rdma_iov_offset = remaining; 1293 umr_iov_offset = 0; 1294 remaining = 0; 1295 break; 1296 } 1297 remaining -= rdma_sge_len; 1298 } 1299 sge_len = umr_sge_len - remaining; 1300 } 1301 rc = accel_mlx5_translate_addr(sge_addr, sge_len, domain, domain_ctx, qp->dev, &sge[umr_idx]); 1302 if (spdk_unlikely(rc)) { 1303 return -EINVAL; 1304 } 1305 SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d] lkey %u, addr %p, len %u\n", umr_idx, sge[umr_idx].lkey, 1306 (void *)sge[umr_idx].addr, sge[umr_idx].length); 1307 umr_len += sge_len; 1308 umr_idx++; 1309 } 1310 accel_mlx5_iov_sgl_advance(umr_iovs, umr_len); 1311 accel_mlx5_iov_sgl_advance(rdma_iovs, umr_len); 1312 *len = umr_len; 1313 1314 return umr_idx; 1315 } 1316 1317 static inline int 1318 accel_mlx5_crc_task_process_multi_req(struct accel_mlx5_task *mlx5_task) 1319 { 1320 size_t umr_len[ACCEL_MLX5_MAX_MKEYS_IN_TASK]; 1321 struct ibv_sge sges[ACCEL_MLX5_MAX_SGE]; 1322 struct spdk_accel_task *task = &mlx5_task->base; 1323 struct accel_mlx5_qp *qp = mlx5_task->qp; 1324 struct accel_mlx5_dev *dev = qp->dev; 1325 struct accel_mlx5_iov_sgl umr_sgl; 1326 struct accel_mlx5_iov_sgl *umr_sgl_ptr; 1327 struct accel_mlx5_iov_sgl rdma_sgl; 1328 uint64_t umr_offset; 1329 uint32_t rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE; 1330 int sge_count; 1331 uint32_t remaining; 1332 int rc; 1333 uint16_t i; 1334 uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs, 1335 mlx5_task->num_ops); 1336 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1337 bool sig_init, sig_check_gen = false; 1338 1339 num_ops = spdk_min(num_ops, qp_slot >> 1); 1340 if (spdk_unlikely(!num_ops)) { 1341 return -EINVAL; 1342 } 1343 /* Init signature on the first UMR */ 1344 sig_init = !mlx5_task->num_submitted_reqs; 1345 1346 /* 1347 * accel_mlx5_crc_task_fill_umr_sge() and accel_mlx5_fill_block_sge() advance an IOV during iteration 1348 * on it. We must copy accel_mlx5_iov_sgl to iterate twice or more on the same IOV. 1349 * 1350 * In the in-place case, we iterate on the source IOV three times. That's why we need two copies of 1351 * the source accel_mlx5_iov_sgl. 1352 * 1353 * In the out-of-place case, we iterate on the source IOV once and on the destination IOV two times. 1354 * So, we need one copy of the destination accel_mlx5_iov_sgl. 1355 */ 1356 if (mlx5_task->inplace) { 1357 accel_mlx5_iov_sgl_init(&umr_sgl, mlx5_task->src.iov, mlx5_task->src.iovcnt); 1358 umr_sgl_ptr = &umr_sgl; 1359 accel_mlx5_iov_sgl_init(&rdma_sgl, mlx5_task->src.iov, mlx5_task->src.iovcnt); 1360 } else { 1361 umr_sgl_ptr = &mlx5_task->src; 1362 accel_mlx5_iov_sgl_init(&rdma_sgl, mlx5_task->dst.iov, mlx5_task->dst.iovcnt); 1363 } 1364 mlx5_task->num_wrs = 0; 1365 for (i = 0; i < num_ops; i++) { 1366 /* 1367 * The last request may have only CRC. Skip UMR in this case because the MKey from 1368 * the previous request is used. 1369 */ 1370 if (umr_sgl_ptr->iovcnt == 0) { 1371 assert((mlx5_task->num_completed_reqs + i + 1) == mlx5_task->num_reqs); 1372 break; 1373 } 1374 sge_count = accel_mlx5_crc_task_fill_umr_sge(qp, sges, umr_sgl_ptr, task->src_domain, 1375 task->src_domain_ctx, &rdma_sgl, &umr_len[i]); 1376 if (spdk_unlikely(sge_count <= 0)) { 1377 rc = (sge_count == 0) ? -EINVAL : sge_count; 1378 SPDK_ERRLOG("failed set UMR sge, rc %d\n", rc); 1379 return rc; 1380 } 1381 if (umr_sgl_ptr->iovcnt == 0) { 1382 /* 1383 * We post RDMA without UMR if the last request has only CRC. We use an MKey from 1384 * the last UMR in this case. Since the last request can be postponed to the next 1385 * call of this function, we must save the MKey to the task structure. 1386 */ 1387 mlx5_task->last_umr_len = umr_len[i]; 1388 mlx5_task->last_mkey_idx = i; 1389 sig_check_gen = true; 1390 } 1391 rc = accel_mlx5_crc_task_configure_umr(mlx5_task, sges, sge_count, mlx5_task->mkeys[i], 1392 SPDK_MLX5_UMR_SIG_DOMAIN_WIRE, umr_len[i], sig_init, 1393 sig_check_gen); 1394 if (spdk_unlikely(rc)) { 1395 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 1396 return rc; 1397 } 1398 sig_init = false; 1399 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1400 dev->stats.sig_umrs++; 1401 } 1402 1403 if (spdk_unlikely(mlx5_task->psv->bits.error)) { 1404 rc = spdk_mlx5_qp_set_psv(qp->qp, mlx5_task->psv->psv_index, *mlx5_task->base.crc_dst, 0, 0); 1405 if (spdk_unlikely(rc)) { 1406 SPDK_ERRLOG("SET_PSV failed with %d\n", rc); 1407 return rc; 1408 } 1409 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1410 } 1411 1412 for (i = 0; i < num_ops - 1; i++) { 1413 if (mlx5_task->inplace) { 1414 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->src, umr_len[i], &remaining, 1415 task->src_domain, task->src_domain_ctx); 1416 } else { 1417 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->dst, umr_len[i], &remaining, 1418 task->dst_domain, task->dst_domain_ctx); 1419 } 1420 if (spdk_unlikely(sge_count <= 0)) { 1421 rc = (sge_count == 0) ? -EINVAL : sge_count; 1422 SPDK_ERRLOG("failed set RDMA sge, rc %d\n", rc); 1423 return rc; 1424 } 1425 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges, sge_count, 0, mlx5_task->mkeys[i]->mkey, 1426 0, rdma_fence); 1427 if (spdk_unlikely(rc)) { 1428 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 1429 return rc; 1430 } 1431 mlx5_task->num_submitted_reqs++; 1432 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task); 1433 dev->stats.rdma_reads++; 1434 rdma_fence = SPDK_MLX5_WQE_CTRL_STRONG_ORDERING; 1435 } 1436 if ((mlx5_task->inplace && mlx5_task->src.iovcnt == 0) || (!mlx5_task->inplace && 1437 mlx5_task->dst.iovcnt == 0)) { 1438 /* 1439 * The last RDMA does not have any data, only CRC. It also does not have a paired Mkey. 1440 * The CRC is handled in the previous MKey in this case. 1441 */ 1442 sge_count = 0; 1443 umr_offset = mlx5_task->last_umr_len; 1444 } else { 1445 umr_offset = 0; 1446 mlx5_task->last_mkey_idx = i; 1447 if (mlx5_task->inplace) { 1448 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->src, umr_len[i], &remaining, 1449 task->src_domain, task->src_domain_ctx); 1450 } else { 1451 sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->dst, umr_len[i], &remaining, 1452 task->dst_domain, task->dst_domain_ctx); 1453 } 1454 if (spdk_unlikely(sge_count <= 0)) { 1455 rc = (sge_count == 0) ? -EINVAL : sge_count; 1456 SPDK_ERRLOG("failed set RDMA sge, rc %d\n", rc); 1457 return rc; 1458 } 1459 assert(remaining == 0); 1460 } 1461 if ((mlx5_task->num_completed_reqs + i + 1) == mlx5_task->num_reqs) { 1462 /* Ensure that there is a free sge for the CRC destination. */ 1463 assert(sge_count < (int)ACCEL_MLX5_MAX_SGE); 1464 /* Add the crc destination to the end of sges. */ 1465 sges[sge_count].lkey = mlx5_task->psv->crc_lkey; 1466 sges[sge_count].addr = (uintptr_t)&mlx5_task->psv->crc; 1467 sges[sge_count++].length = sizeof(uint32_t); 1468 } 1469 rdma_fence |= SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE; 1470 rc = spdk_mlx5_qp_rdma_read(qp->qp, sges, sge_count, umr_offset, 1471 mlx5_task->mkeys[mlx5_task->last_mkey_idx]->mkey, 1472 (uint64_t)mlx5_task, rdma_fence); 1473 if (spdk_unlikely(rc)) { 1474 SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc); 1475 return rc; 1476 } 1477 mlx5_task->num_submitted_reqs++; 1478 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 1479 dev->stats.rdma_reads++; 1480 1481 return 0; 1482 } 1483 1484 static inline int 1485 accel_mlx5_crc_task_process(struct accel_mlx5_task *mlx5_task) 1486 { 1487 int rc; 1488 1489 assert(mlx5_task->mlx5_opcode == ACCEL_MLX5_OPC_CRC32C); 1490 1491 SPDK_DEBUGLOG(accel_mlx5, "begin, crc task, %p, reqs: total %u, submitted %u, completed %u\n", 1492 mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs); 1493 1494 if (mlx5_task->num_reqs == 1) { 1495 rc = accel_mlx5_crc_task_process_one_req(mlx5_task); 1496 } else { 1497 rc = accel_mlx5_crc_task_process_multi_req(mlx5_task); 1498 } 1499 1500 if (rc == 0) { 1501 STAILQ_INSERT_TAIL(&mlx5_task->qp->in_hw, mlx5_task, link); 1502 SPDK_DEBUGLOG(accel_mlx5, "end, crc task, %p, reqs: total %u, submitted %u, completed %u\n", 1503 mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, 1504 mlx5_task->num_completed_reqs); 1505 } 1506 1507 return rc; 1508 } 1509 1510 static inline int 1511 accel_mlx5_task_alloc_crc_ctx(struct accel_mlx5_task *task, uint32_t qp_slot) 1512 { 1513 struct accel_mlx5_qp *qp = task->qp; 1514 struct accel_mlx5_dev *dev = qp->dev; 1515 1516 if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task, dev->sig_mkeys))) { 1517 SPDK_DEBUGLOG(accel_mlx5, "no mkeys in signature mkey pool, dev %s\n", 1518 dev->dev_ctx->context->device->name); 1519 dev->stats.nomem_mkey++; 1520 return -ENOMEM; 1521 } 1522 task->psv = spdk_mempool_get(dev->dev_ctx->psv_pool); 1523 if (spdk_unlikely(!task->psv)) { 1524 SPDK_DEBUGLOG(accel_mlx5, "no reqs in psv pool, dev %s\n", dev->dev_ctx->context->device->name); 1525 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops); 1526 task->num_ops = 0; 1527 dev->stats.nomem_mkey++; 1528 return -ENOMEM; 1529 } 1530 /* One extra slot is needed for SET_PSV WQE to reset the error state in PSV. */ 1531 if (spdk_unlikely(task->psv->bits.error)) { 1532 uint32_t n_slots = task->num_ops * 2 + 1; 1533 1534 if (qp_slot < n_slots) { 1535 spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv); 1536 spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops); 1537 dev->stats.nomem_qdepth++; 1538 task->num_ops = 0; 1539 return -ENOMEM; 1540 } 1541 } 1542 1543 return 0; 1544 } 1545 1546 static inline int 1547 accel_mlx5_crc_task_continue(struct accel_mlx5_task *task) 1548 { 1549 struct accel_mlx5_qp *qp = task->qp; 1550 struct accel_mlx5_dev *dev = qp->dev; 1551 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1552 int rc; 1553 1554 assert(task->num_reqs > task->num_completed_reqs); 1555 if (task->num_ops == 0) { 1556 /* No mkeys allocated, try to allocate now. */ 1557 rc = accel_mlx5_task_alloc_crc_ctx(task, qp_slot); 1558 if (spdk_unlikely(rc)) { 1559 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1560 return -ENOMEM; 1561 } 1562 } 1563 /* We need to post at least 1 UMR and 1 RDMA operation */ 1564 if (spdk_unlikely(qp_slot < 2)) { 1565 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1566 dev->stats.nomem_qdepth++; 1567 return -ENOMEM; 1568 } 1569 1570 return accel_mlx5_crc_task_process(task); 1571 } 1572 1573 static inline uint32_t 1574 accel_mlx5_get_crc_task_count(struct iovec *src_iov, uint32_t src_iovcnt, struct iovec *dst_iov, 1575 uint32_t dst_iovcnt) 1576 { 1577 uint32_t src_idx = 0; 1578 uint32_t dst_idx = 0; 1579 uint32_t num_ops = 1; 1580 uint32_t num_src_sge = 1; 1581 uint32_t num_dst_sge = 1; 1582 size_t src_offset = 0; 1583 size_t dst_offset = 0; 1584 uint32_t num_sge; 1585 size_t src_len; 1586 size_t dst_len; 1587 1588 /* One operation is enough if both iovs fit into ACCEL_MLX5_MAX_SGE. One SGE is reserved for CRC on dst_iov. */ 1589 if (src_iovcnt <= ACCEL_MLX5_MAX_SGE && (dst_iovcnt + 1) <= ACCEL_MLX5_MAX_SGE) { 1590 return 1; 1591 } 1592 1593 while (src_idx < src_iovcnt && dst_idx < dst_iovcnt) { 1594 if (num_src_sge > ACCEL_MLX5_MAX_SGE || num_dst_sge > ACCEL_MLX5_MAX_SGE) { 1595 num_ops++; 1596 num_src_sge = 1; 1597 num_dst_sge = 1; 1598 } 1599 src_len = src_iov[src_idx].iov_len - src_offset; 1600 dst_len = dst_iov[dst_idx].iov_len - dst_offset; 1601 1602 if (src_len == dst_len) { 1603 num_src_sge++; 1604 num_dst_sge++; 1605 src_offset = 0; 1606 dst_offset = 0; 1607 src_idx++; 1608 dst_idx++; 1609 continue; 1610 } 1611 if (src_len < dst_len) { 1612 /* Advance src_iov to reach the point that corresponds to the end of the current dst_iov. */ 1613 num_sge = accel_mlx5_advance_iovec(&src_iov[src_idx], 1614 spdk_min(ACCEL_MLX5_MAX_SGE + 1 - num_src_sge, 1615 src_iovcnt - src_idx), 1616 &src_offset, &dst_len); 1617 src_idx += num_sge; 1618 num_src_sge += num_sge; 1619 if (dst_len != 0) { 1620 /* 1621 * ACCEL_MLX5_MAX_SGE is reached on src_iov, and dst_len bytes 1622 * are left on the current dst_iov. 1623 */ 1624 dst_offset = dst_iov[dst_idx].iov_len - dst_len; 1625 } else { 1626 /* The src_iov advance is completed, shift to the next dst_iov. */ 1627 dst_idx++; 1628 num_dst_sge++; 1629 dst_offset = 0; 1630 } 1631 } else { /* src_len > dst_len */ 1632 /* Advance dst_iov to reach the point that corresponds to the end of the current src_iov. */ 1633 num_sge = accel_mlx5_advance_iovec(&dst_iov[dst_idx], 1634 spdk_min(ACCEL_MLX5_MAX_SGE + 1 - num_dst_sge, 1635 dst_iovcnt - dst_idx), 1636 &dst_offset, &src_len); 1637 dst_idx += num_sge; 1638 num_dst_sge += num_sge; 1639 if (src_len != 0) { 1640 /* 1641 * ACCEL_MLX5_MAX_SGE is reached on dst_iov, and src_len bytes 1642 * are left on the current src_iov. 1643 */ 1644 src_offset = src_iov[src_idx].iov_len - src_len; 1645 } else { 1646 /* The dst_iov advance is completed, shift to the next src_iov. */ 1647 src_idx++; 1648 num_src_sge++; 1649 src_offset = 0; 1650 } 1651 } 1652 } 1653 /* An extra operation is needed if no space is left on dst_iov because CRC takes one SGE. */ 1654 if (num_dst_sge > ACCEL_MLX5_MAX_SGE) { 1655 num_ops++; 1656 } 1657 1658 /* The above loop must reach the end of both iovs simultaneously because their size is the same. */ 1659 assert(src_idx == src_iovcnt); 1660 assert(dst_idx == dst_iovcnt); 1661 assert(src_offset == 0); 1662 assert(dst_offset == 0); 1663 1664 return num_ops; 1665 } 1666 1667 static inline int 1668 accel_mlx5_crc_task_init(struct accel_mlx5_task *mlx5_task) 1669 { 1670 struct spdk_accel_task *task = &mlx5_task->base; 1671 struct accel_mlx5_qp *qp = mlx5_task->qp; 1672 uint32_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp); 1673 int rc; 1674 1675 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 1676 if (mlx5_task->inplace) { 1677 /* One entry is reserved for CRC */ 1678 mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->src.iovcnt + 1, ACCEL_MLX5_MAX_SGE); 1679 } else { 1680 accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt); 1681 mlx5_task->num_reqs = accel_mlx5_get_crc_task_count(mlx5_task->src.iov, mlx5_task->src.iovcnt, 1682 mlx5_task->dst.iov, mlx5_task->dst.iovcnt); 1683 } 1684 1685 rc = accel_mlx5_task_alloc_crc_ctx(mlx5_task, qp_slot); 1686 if (spdk_unlikely(rc)) { 1687 return rc; 1688 } 1689 1690 if (spdk_unlikely(qp_slot < 2)) { 1691 /* Queue is full, queue this task */ 1692 SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", qp->dev->dev_ctx->context->device->name, 1693 mlx5_task->qp); 1694 qp->dev->stats.nomem_qdepth++; 1695 return -ENOMEM; 1696 } 1697 return 0; 1698 } 1699 1700 static inline int 1701 accel_mlx5_crypto_mkey_task_init(struct accel_mlx5_task *mlx5_task) 1702 { 1703 struct spdk_accel_task *task = &mlx5_task->base; 1704 struct accel_mlx5_qp *qp = mlx5_task->qp; 1705 struct accel_mlx5_dev *dev = qp->dev; 1706 uint32_t num_blocks; 1707 int rc; 1708 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1709 bool crypto_key_ok; 1710 1711 if (spdk_unlikely(task->s.iovcnt > ACCEL_MLX5_MAX_SGE)) { 1712 /* With `external mkey` we can't split task or register several UMRs */ 1713 SPDK_ERRLOG("src buffer is too fragmented\n"); 1714 return -EINVAL; 1715 } 1716 if (spdk_unlikely(task->src_domain == spdk_accel_get_memory_domain())) { 1717 SPDK_ERRLOG("accel domain is not supported\n"); 1718 return -ENOTSUP; 1719 } 1720 if (spdk_unlikely(spdk_accel_sequence_next_task(task) != NULL)) { 1721 SPDK_ERRLOG("Mkey registration is only supported for single task\n"); 1722 return -ENOTSUP; 1723 } 1724 1725 crypto_key_ok = (task->crypto_key && task->crypto_key->module_if == &g_accel_mlx5.module && 1726 task->crypto_key->priv); 1727 if (spdk_unlikely(!crypto_key_ok)) { 1728 SPDK_ERRLOG("Wrong crypto key provided\n"); 1729 return -EINVAL; 1730 } 1731 if (spdk_unlikely(task->nbytes % mlx5_task->base.block_size != 0)) { 1732 SPDK_ERRLOG("src length %"PRIu64" is not a multiple of the block size %u\n", task->nbytes, 1733 mlx5_task->base.block_size); 1734 return -EINVAL; 1735 } 1736 1737 num_blocks = task->nbytes / mlx5_task->base.block_size; 1738 if (dev->crypto_multi_block) { 1739 if (spdk_unlikely(g_accel_mlx5.attr.crypto_split_blocks && 1740 num_blocks > g_accel_mlx5.attr.crypto_split_blocks)) { 1741 SPDK_ERRLOG("Number of blocks in task %u exceeds split threshold %u, can't handle\n", 1742 num_blocks, g_accel_mlx5.attr.crypto_split_blocks); 1743 return -E2BIG; 1744 } 1745 } else if (num_blocks != 1) { 1746 SPDK_ERRLOG("Task contains more than 1 block, can't handle\n"); 1747 return -E2BIG; 1748 } 1749 1750 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 1751 mlx5_task->num_blocks = num_blocks; 1752 mlx5_task->num_processed_blocks = 0; 1753 mlx5_task->num_reqs = 1; 1754 mlx5_task->blocks_per_req = num_blocks; 1755 1756 if (spdk_unlikely(qp_slot == 0)) { 1757 mlx5_task->num_ops = 0; 1758 dev->stats.nomem_qdepth++; 1759 return -ENOMEM; 1760 } 1761 rc = spdk_mlx5_mkey_pool_get_bulk(dev->crypto_mkeys, mlx5_task->mkeys, 1); 1762 if (spdk_unlikely(rc)) { 1763 mlx5_task->num_ops = 0; 1764 dev->stats.nomem_mkey++; 1765 return -ENOMEM; 1766 } 1767 mlx5_task->num_ops = 1; 1768 1769 SPDK_DEBUGLOG(accel_mlx5, "crypto_mkey task num_blocks %u, src_len %zu\n", mlx5_task->num_reqs, 1770 task->nbytes); 1771 1772 return 0; 1773 } 1774 1775 static inline int 1776 accel_mlx5_crypto_mkey_task_process(struct accel_mlx5_task *mlx5_task) 1777 { 1778 struct accel_mlx5_sge sge; 1779 struct spdk_accel_task *task = &mlx5_task->base; 1780 struct accel_mlx5_qp *qp = mlx5_task->qp; 1781 struct accel_mlx5_dev *dev = qp->dev; 1782 struct spdk_mlx5_crypto_dek_data dek_data; 1783 int rc; 1784 1785 if (spdk_unlikely(!mlx5_task->num_ops)) { 1786 return -EINVAL; 1787 } 1788 SPDK_DEBUGLOG(accel_mlx5, "begin, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx); 1789 1790 mlx5_task->num_wrs = 0; 1791 rc = spdk_mlx5_crypto_get_dek_data(task->crypto_key->priv, dev->dev_ctx->pd, &dek_data); 1792 if (spdk_unlikely(rc)) { 1793 return rc; 1794 } 1795 1796 rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sge, mlx5_task->mkeys[0]->mkey, 1797 mlx5_task->num_blocks, &dek_data, (uint64_t)mlx5_task, SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 1798 if (spdk_unlikely(rc)) { 1799 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 1800 return rc; 1801 } 1802 dev->stats.crypto_umrs++; 1803 mlx5_task->num_submitted_reqs++; 1804 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 1805 STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link); 1806 1807 SPDK_DEBUGLOG(accel_mlx5, "end, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx); 1808 1809 return 0; 1810 } 1811 1812 static inline int 1813 accel_mlx5_crypto_mkey_task_continue(struct accel_mlx5_task *task) 1814 { 1815 struct accel_mlx5_qp *qp = task->qp; 1816 struct accel_mlx5_dev *dev = qp->dev; 1817 int rc; 1818 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1819 1820 if (task->num_ops == 0) { 1821 rc = spdk_mlx5_mkey_pool_get_bulk(dev->crypto_mkeys, task->mkeys, 1); 1822 if (spdk_unlikely(rc)) { 1823 dev->stats.nomem_mkey++; 1824 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1825 return -ENOMEM; 1826 } 1827 task->num_ops = 1; 1828 } 1829 if (spdk_unlikely(qp_slot == 0)) { 1830 dev->stats.nomem_qdepth++; 1831 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1832 return -ENOMEM; 1833 } 1834 return accel_mlx5_crypto_mkey_task_process(task); 1835 } 1836 1837 static inline void 1838 accel_mlx5_crypto_mkey_task_complete(struct accel_mlx5_task *mlx5_task) 1839 { 1840 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 1841 1842 assert(mlx5_task->num_ops); 1843 assert(mlx5_task->num_processed_blocks == mlx5_task->num_blocks); 1844 assert(mlx5_task->base.seq); 1845 1846 spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, mlx5_task->mkeys, 1); 1847 spdk_accel_task_complete(&mlx5_task->base, 0); 1848 } 1849 1850 static inline int 1851 accel_mlx5_mkey_task_init(struct accel_mlx5_task *mlx5_task) 1852 { 1853 struct spdk_accel_task *task = &mlx5_task->base; 1854 struct accel_mlx5_qp *qp = mlx5_task->qp; 1855 struct accel_mlx5_dev *dev = qp->dev; 1856 int rc; 1857 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1858 1859 if (spdk_unlikely(task->s.iovcnt > ACCEL_MLX5_MAX_SGE)) { 1860 /* With `external mkey` we can't split task or register several UMRs */ 1861 SPDK_ERRLOG("src buffer is too fragmented\n"); 1862 return -EINVAL; 1863 } 1864 if (spdk_unlikely(task->src_domain == spdk_accel_get_memory_domain())) { 1865 SPDK_ERRLOG("accel domain is not supported\n"); 1866 return -EINVAL; 1867 } 1868 if (spdk_unlikely(spdk_accel_sequence_next_task(task) != NULL)) { 1869 SPDK_ERRLOG("Mkey registration is only supported for single task\n"); 1870 return -ENOTSUP; 1871 } 1872 1873 accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt); 1874 mlx5_task->num_reqs = 1; 1875 1876 if (spdk_unlikely(qp_slot == 0)) { 1877 mlx5_task->num_ops = 0; 1878 dev->stats.nomem_qdepth++; 1879 return -ENOMEM; 1880 } 1881 rc = spdk_mlx5_mkey_pool_get_bulk(dev->mkeys, mlx5_task->mkeys, 1); 1882 if (spdk_unlikely(rc)) { 1883 mlx5_task->num_ops = 0; 1884 dev->stats.nomem_mkey++; 1885 return -ENOMEM; 1886 } 1887 mlx5_task->num_ops = 1; 1888 1889 SPDK_DEBUGLOG(accel_mlx5, "crypto_mkey task num_blocks %u, src_len %zu\n", mlx5_task->num_reqs, 1890 task->nbytes); 1891 1892 return 0; 1893 } 1894 1895 static inline int 1896 accel_mlx5_mkey_task_process(struct accel_mlx5_task *mlx5_task) 1897 { 1898 struct spdk_mlx5_umr_attr umr_attr; 1899 struct ibv_sge src_sge[ACCEL_MLX5_MAX_SGE]; 1900 struct spdk_accel_task *task = &mlx5_task->base; 1901 struct accel_mlx5_qp *qp = mlx5_task->qp; 1902 struct accel_mlx5_dev *dev = qp->dev; 1903 uint32_t remaining = 0; 1904 int rc; 1905 1906 if (spdk_unlikely(!mlx5_task->num_ops)) { 1907 return -EINVAL; 1908 } 1909 SPDK_DEBUGLOG(accel_mlx5, "begin, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx); 1910 1911 mlx5_task->num_wrs = 0; 1912 1913 rc = accel_mlx5_fill_block_sge(dev, src_sge, &mlx5_task->src, task->nbytes, &remaining, 1914 task->src_domain, task->src_domain_ctx); 1915 if (spdk_unlikely(rc <= 0 || remaining)) { 1916 rc = rc ? rc : -EINVAL; 1917 SPDK_ERRLOG("Failed to set src sge, rc %d, remaining %u\n", rc, remaining); 1918 return rc; 1919 } 1920 umr_attr.mkey = mlx5_task->mkeys[0]->mkey; 1921 umr_attr.sge = src_sge; 1922 umr_attr.sge_count = rc; 1923 umr_attr.umr_len = task->nbytes; 1924 1925 rc = spdk_mlx5_umr_configure(qp->qp, &umr_attr, (uint64_t)mlx5_task, 1926 SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE); 1927 if (spdk_unlikely(rc)) { 1928 SPDK_ERRLOG("UMR configure failed with %d\n", rc); 1929 return rc; 1930 } 1931 dev->stats.umrs++; 1932 mlx5_task->num_submitted_reqs++; 1933 ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task); 1934 STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link); 1935 1936 SPDK_DEBUGLOG(accel_mlx5, "end, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx); 1937 1938 return 0; 1939 } 1940 1941 static inline int 1942 accel_mlx5_mkey_task_continue(struct accel_mlx5_task *task) 1943 { 1944 struct accel_mlx5_qp *qp = task->qp; 1945 struct accel_mlx5_dev *dev = qp->dev; 1946 int rc; 1947 uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp); 1948 1949 if (task->num_ops == 0) { 1950 rc = spdk_mlx5_mkey_pool_get_bulk(dev->mkeys, task->mkeys, 1); 1951 if (spdk_unlikely(rc)) { 1952 dev->stats.nomem_mkey++; 1953 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1954 return -ENOMEM; 1955 } 1956 task->num_ops = 1; 1957 } 1958 if (spdk_unlikely(qp_slot == 0)) { 1959 dev->stats.nomem_qdepth++; 1960 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 1961 return -ENOMEM; 1962 } 1963 return accel_mlx5_mkey_task_process(task); 1964 } 1965 1966 static inline void 1967 accel_mlx5_mkey_task_complete(struct accel_mlx5_task *mlx5_task) 1968 { 1969 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 1970 1971 assert(mlx5_task->num_ops); 1972 assert(mlx5_task->base.seq); 1973 1974 spdk_mlx5_mkey_pool_put_bulk(dev->mkeys, mlx5_task->mkeys, 1); 1975 spdk_accel_task_complete(&mlx5_task->base, 0); 1976 } 1977 1978 static int 1979 accel_mlx5_task_op_not_implemented(struct accel_mlx5_task *mlx5_task) 1980 { 1981 SPDK_ERRLOG("wrong function called\n"); 1982 SPDK_UNREACHABLE(); 1983 } 1984 1985 static void 1986 accel_mlx5_task_op_not_implemented_v(struct accel_mlx5_task *mlx5_task) 1987 { 1988 SPDK_ERRLOG("wrong function called\n"); 1989 SPDK_UNREACHABLE(); 1990 } 1991 1992 static int 1993 accel_mlx5_task_op_not_supported(struct accel_mlx5_task *mlx5_task) 1994 { 1995 SPDK_ERRLOG("Unsupported opcode %d\n", mlx5_task->base.op_code); 1996 1997 return -ENOTSUP; 1998 } 1999 2000 static struct accel_mlx5_task_operations g_accel_mlx5_tasks_ops[] = { 2001 [ACCEL_MLX5_OPC_COPY] = { 2002 .init = accel_mlx5_copy_task_init, 2003 .process = accel_mlx5_copy_task_process, 2004 .cont = accel_mlx5_copy_task_continue, 2005 .complete = accel_mlx5_copy_task_complete, 2006 }, 2007 [ACCEL_MLX5_OPC_CRYPTO] = { 2008 .init = accel_mlx5_crypto_task_init, 2009 .process = accel_mlx5_crypto_task_process, 2010 .cont = accel_mlx5_crypto_task_continue, 2011 .complete = accel_mlx5_crypto_task_complete, 2012 }, 2013 [ACCEL_MLX5_OPC_CRC32C] = { 2014 .init = accel_mlx5_crc_task_init, 2015 .process = accel_mlx5_crc_task_process, 2016 .cont = accel_mlx5_crc_task_continue, 2017 .complete = accel_mlx5_crc_task_complete, 2018 }, 2019 [ACCEL_MLX5_OPC_CRYPTO_MKEY] = { 2020 .init = accel_mlx5_crypto_mkey_task_init, 2021 .process = accel_mlx5_crypto_mkey_task_process, 2022 .cont = accel_mlx5_crypto_mkey_task_continue, 2023 .complete = accel_mlx5_crypto_mkey_task_complete, 2024 }, 2025 [ACCEL_MLX5_OPC_MKEY] = { 2026 .init = accel_mlx5_mkey_task_init, 2027 .process = accel_mlx5_mkey_task_process, 2028 .cont = accel_mlx5_mkey_task_continue, 2029 .complete = accel_mlx5_mkey_task_complete, 2030 }, 2031 [ACCEL_MLX5_OPC_LAST] = { 2032 .init = accel_mlx5_task_op_not_supported, 2033 .process = accel_mlx5_task_op_not_implemented, 2034 .cont = accel_mlx5_task_op_not_implemented, 2035 .complete = accel_mlx5_task_op_not_implemented_v 2036 }, 2037 }; 2038 2039 static void 2040 accel_mlx5_memory_domain_transfer_cpl(void *ctx, int rc) 2041 { 2042 struct accel_mlx5_task *task = ctx; 2043 2044 assert(task->needs_data_transfer); 2045 task->needs_data_transfer = 0; 2046 2047 if (spdk_likely(!rc)) { 2048 SPDK_DEBUGLOG(accel_mlx5, "task %p, data transfer done\n", task); 2049 accel_mlx5_task_complete(task); 2050 } else { 2051 SPDK_ERRLOG("Task %p, data transfer failed, rc %d\n", task, rc); 2052 accel_mlx5_task_fail(task, rc); 2053 } 2054 } 2055 2056 static inline void 2057 accel_mlx5_memory_domain_transfer(struct accel_mlx5_task *task) 2058 { 2059 struct spdk_memory_domain_translation_result translation; 2060 struct spdk_accel_task *base = &task->base; 2061 struct accel_mlx5_dev *dev = task->qp->dev; 2062 int rc; 2063 2064 assert(task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO_MKEY || task->mlx5_opcode == ACCEL_MLX5_OPC_MKEY); 2065 /* UMR is an offset in the addess space, so the start address is 0 */ 2066 translation.iov.iov_base = NULL; 2067 translation.iov.iov_len = base->nbytes; 2068 translation.iov_count = 1; 2069 translation.size = sizeof(translation); 2070 translation.rdma.rkey = task->mkeys[0]->mkey; 2071 translation.rdma.lkey = task->mkeys[0]->mkey; 2072 2073 SPDK_DEBUGLOG(accel_mlx5, "start transfer, task %p, dst_domain_ctx %p, mkey %u\n", task, 2074 task->base.dst_domain_ctx, task->mkeys[0]->mkey); 2075 rc = spdk_memory_domain_transfer_data(base->dst_domain, base->dst_domain_ctx, &translation.iov, 1, 2076 dev->dev_ctx->domain, task, &translation.iov, 1, &translation, 2077 accel_mlx5_memory_domain_transfer_cpl, task); 2078 if (spdk_unlikely(rc)) { 2079 SPDK_ERRLOG("Failed to start data transfer, task %p rc %d\n", task, rc); 2080 accel_mlx5_task_fail(task, rc); 2081 } 2082 } 2083 2084 static inline void 2085 accel_mlx5_task_complete(struct accel_mlx5_task *task) 2086 { 2087 struct spdk_accel_sequence *seq = task->base.seq; 2088 struct spdk_accel_task *next; 2089 bool driver_seq; 2090 2091 if (task->needs_data_transfer) { 2092 accel_mlx5_memory_domain_transfer(task); 2093 return; 2094 } 2095 2096 next = spdk_accel_sequence_next_task(&task->base); 2097 driver_seq = task->driver_seq; 2098 2099 assert(task->num_reqs == task->num_completed_reqs); 2100 SPDK_DEBUGLOG(accel_mlx5, "Complete task %p, opc %d\n", task, task->mlx5_opcode); 2101 2102 g_accel_mlx5_tasks_ops[task->mlx5_opcode].complete(task); 2103 2104 if (driver_seq) { 2105 struct spdk_io_channel *ch = task->qp->dev->ch; 2106 2107 assert(seq); 2108 if (next) { 2109 accel_mlx5_execute_sequence(ch, seq); 2110 } else { 2111 spdk_accel_sequence_continue(seq); 2112 } 2113 } 2114 } 2115 2116 static inline int 2117 accel_mlx5_task_continue(struct accel_mlx5_task *task) 2118 { 2119 struct accel_mlx5_qp *qp = task->qp; 2120 struct accel_mlx5_dev *dev = qp->dev; 2121 2122 if (spdk_unlikely(qp->recovering)) { 2123 STAILQ_INSERT_TAIL(&dev->nomem, task, link); 2124 return 0; 2125 } 2126 2127 return g_accel_mlx5_tasks_ops[task->mlx5_opcode].cont(task); 2128 } 2129 static inline void 2130 accel_mlx5_task_init_opcode(struct accel_mlx5_task *mlx5_task) 2131 { 2132 uint8_t base_opcode = mlx5_task->base.op_code; 2133 2134 switch (base_opcode) { 2135 case SPDK_ACCEL_OPC_COPY: 2136 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_COPY; 2137 break; 2138 case SPDK_ACCEL_OPC_ENCRYPT: 2139 assert(g_accel_mlx5.crypto_supported); 2140 mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE; 2141 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO; 2142 break; 2143 case SPDK_ACCEL_OPC_DECRYPT: 2144 assert(g_accel_mlx5.crypto_supported); 2145 mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_MEMORY; 2146 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO; 2147 break; 2148 case SPDK_ACCEL_OPC_CRC32C: 2149 mlx5_task->inplace = 1; 2150 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRC32C; 2151 break; 2152 case SPDK_ACCEL_OPC_COPY_CRC32C: 2153 mlx5_task->inplace = 0; 2154 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRC32C; 2155 break; 2156 default: 2157 SPDK_ERRLOG("wrong opcode %d\n", base_opcode); 2158 mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_LAST; 2159 } 2160 } 2161 2162 static void 2163 accel_mlx5_post_poller_handler(void *fn_arg) 2164 { 2165 struct accel_mlx5_io_channel *ch = fn_arg; 2166 struct accel_mlx5_dev *dev; 2167 uint32_t i; 2168 2169 for (i = 0; i < ch->num_devs; i++) { 2170 dev = &ch->devs[i]; 2171 2172 if (dev->qp.ring_db) { 2173 spdk_mlx5_qp_complete_send(dev->qp.qp); 2174 dev->qp.ring_db = false; 2175 } 2176 } 2177 2178 ch->poller_handler_registered = false; 2179 } 2180 2181 static inline int 2182 _accel_mlx5_submit_tasks(struct accel_mlx5_io_channel *accel_ch, struct spdk_accel_task *task) 2183 { 2184 struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base); 2185 struct accel_mlx5_dev *dev = mlx5_task->qp->dev; 2186 int rc; 2187 2188 /* We should not receive any tasks if the module was not enabled */ 2189 assert(g_accel_mlx5.enabled); 2190 2191 dev->stats.opcodes[mlx5_task->mlx5_opcode]++; 2192 rc = g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].init(mlx5_task); 2193 if (spdk_unlikely(rc)) { 2194 if (rc == -ENOMEM) { 2195 SPDK_DEBUGLOG(accel_mlx5, "no reqs to handle new task %p (required %u), put to queue\n", mlx5_task, 2196 mlx5_task->num_reqs); 2197 STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link); 2198 return 0; 2199 } 2200 SPDK_ERRLOG("Task opc %d init failed, rc %d\n", task->op_code, rc); 2201 return rc; 2202 } 2203 2204 if (spdk_unlikely(mlx5_task->qp->recovering)) { 2205 STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link); 2206 return 0; 2207 } 2208 2209 if (!accel_ch->poller_handler_registered) { 2210 spdk_thread_register_post_poller_handler(accel_mlx5_post_poller_handler, accel_ch); 2211 /* Function above may fail to register our handler, in that case we ring doorbells on next polling 2212 * cycle. That is less efficient but still works */ 2213 accel_ch->poller_handler_registered = true; 2214 } 2215 2216 return g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].process(mlx5_task); 2217 } 2218 2219 static inline void 2220 accel_mlx5_task_assign_qp(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_io_channel *accel_ch) 2221 { 2222 struct accel_mlx5_dev *dev; 2223 2224 dev = &accel_ch->devs[accel_ch->dev_idx]; 2225 accel_ch->dev_idx++; 2226 if (accel_ch->dev_idx == accel_ch->num_devs) { 2227 accel_ch->dev_idx = 0; 2228 } 2229 2230 mlx5_task->qp = &dev->qp; 2231 } 2232 2233 static inline void 2234 accel_mlx5_task_reset(struct accel_mlx5_task *mlx5_task) 2235 { 2236 mlx5_task->num_completed_reqs = 0; 2237 mlx5_task->num_submitted_reqs = 0; 2238 mlx5_task->num_ops = 0; 2239 mlx5_task->num_processed_blocks = 0; 2240 mlx5_task->raw = 0; 2241 } 2242 2243 static int 2244 accel_mlx5_submit_tasks(struct spdk_io_channel *ch, struct spdk_accel_task *task) 2245 { 2246 struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base); 2247 struct accel_mlx5_io_channel *accel_ch = spdk_io_channel_get_ctx(ch); 2248 2249 accel_mlx5_task_assign_qp(mlx5_task, accel_ch); 2250 accel_mlx5_task_reset(mlx5_task); 2251 accel_mlx5_task_init_opcode(mlx5_task); 2252 2253 return _accel_mlx5_submit_tasks(accel_ch, task); 2254 } 2255 2256 static void accel_mlx5_recover_qp(struct accel_mlx5_qp *qp); 2257 2258 static int 2259 accel_mlx5_recover_qp_poller(void *arg) 2260 { 2261 struct accel_mlx5_qp *qp = arg; 2262 2263 spdk_poller_unregister(&qp->recover_poller); 2264 accel_mlx5_recover_qp(qp); 2265 return SPDK_POLLER_BUSY; 2266 } 2267 2268 static void 2269 accel_mlx5_recover_qp(struct accel_mlx5_qp *qp) 2270 { 2271 struct accel_mlx5_dev *dev = qp->dev; 2272 struct spdk_mlx5_qp_attr mlx5_qp_attr = {}; 2273 int rc; 2274 2275 SPDK_NOTICELOG("Recovering qp %p, core %u\n", qp, spdk_env_get_current_core()); 2276 if (qp->qp) { 2277 spdk_mlx5_qp_destroy(qp->qp); 2278 qp->qp = NULL; 2279 } 2280 2281 mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size; 2282 mlx5_qp_attr.cap.max_recv_wr = 0; 2283 mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE; 2284 mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE; 2285 2286 rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp); 2287 if (rc) { 2288 SPDK_ERRLOG("Failed to create mlx5 dma QP, rc %d. Retry in %d usec\n", 2289 rc, ACCEL_MLX5_RECOVER_POLLER_PERIOD_US); 2290 qp->recover_poller = SPDK_POLLER_REGISTER(accel_mlx5_recover_qp_poller, qp, 2291 ACCEL_MLX5_RECOVER_POLLER_PERIOD_US); 2292 return; 2293 } 2294 2295 qp->recovering = false; 2296 } 2297 2298 static inline void 2299 accel_mlx5_process_error_cpl(struct spdk_mlx5_cq_completion *wc, struct accel_mlx5_task *task) 2300 { 2301 struct accel_mlx5_qp *qp = task->qp; 2302 2303 if (wc->status != IBV_WC_WR_FLUSH_ERR) { 2304 SPDK_WARNLOG("RDMA: qp %p, task %p, WC status %d, core %u\n", 2305 qp, task, wc->status, spdk_env_get_current_core()); 2306 } else { 2307 SPDK_DEBUGLOG(accel_mlx5, 2308 "RDMA: qp %p, task %p, WC status %d, core %u\n", 2309 qp, task, wc->status, spdk_env_get_current_core()); 2310 } 2311 2312 qp->recovering = true; 2313 assert(task->num_completed_reqs <= task->num_submitted_reqs); 2314 if (task->num_completed_reqs == task->num_submitted_reqs) { 2315 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 2316 accel_mlx5_task_fail(task, -EIO); 2317 } 2318 } 2319 2320 static inline int64_t 2321 accel_mlx5_poll_cq(struct accel_mlx5_dev *dev) 2322 { 2323 struct spdk_mlx5_cq_completion wc[ACCEL_MLX5_MAX_WC]; 2324 struct accel_mlx5_task *task; 2325 struct accel_mlx5_qp *qp; 2326 int reaped, i, rc; 2327 uint16_t completed; 2328 2329 dev->stats.polls++; 2330 reaped = spdk_mlx5_cq_poll_completions(dev->cq, wc, ACCEL_MLX5_MAX_WC); 2331 if (spdk_unlikely(reaped < 0)) { 2332 SPDK_ERRLOG("Error polling CQ! (%d): %s\n", errno, spdk_strerror(errno)); 2333 return reaped; 2334 } else if (reaped == 0) { 2335 dev->stats.idle_polls++; 2336 return 0; 2337 } 2338 dev->stats.completions += reaped; 2339 2340 SPDK_DEBUGLOG(accel_mlx5, "Reaped %d cpls on dev %s\n", reaped, 2341 dev->dev_ctx->context->device->name); 2342 2343 for (i = 0; i < reaped; i++) { 2344 if (spdk_unlikely(!wc[i].wr_id)) { 2345 /* Unsignaled completion with error, ignore */ 2346 continue; 2347 } 2348 task = (struct accel_mlx5_task *)wc[i].wr_id; 2349 qp = task->qp; 2350 assert(task == STAILQ_FIRST(&qp->in_hw) && "submission mismatch"); 2351 assert(task->num_submitted_reqs > task->num_completed_reqs); 2352 completed = task->num_submitted_reqs - task->num_completed_reqs; 2353 assert((uint32_t)task->num_completed_reqs + completed <= UINT16_MAX); 2354 task->num_completed_reqs += completed; 2355 assert(qp->wrs_submitted >= task->num_wrs); 2356 qp->wrs_submitted -= task->num_wrs; 2357 assert(dev->wrs_in_cq > 0); 2358 dev->wrs_in_cq--; 2359 2360 if (spdk_unlikely(wc[i].status)) { 2361 accel_mlx5_process_error_cpl(&wc[i], task); 2362 if (qp->wrs_submitted == 0) { 2363 assert(STAILQ_EMPTY(&qp->in_hw)); 2364 accel_mlx5_recover_qp(qp); 2365 } 2366 continue; 2367 } 2368 2369 SPDK_DEBUGLOG(accel_mlx5, "task %p, remaining %u\n", task, 2370 task->num_reqs - task->num_completed_reqs); 2371 if (task->num_completed_reqs == task->num_reqs) { 2372 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 2373 accel_mlx5_task_complete(task); 2374 } else { 2375 assert(task->num_submitted_reqs < task->num_reqs); 2376 assert(task->num_completed_reqs == task->num_submitted_reqs); 2377 STAILQ_REMOVE_HEAD(&qp->in_hw, link); 2378 rc = accel_mlx5_task_continue(task); 2379 if (spdk_unlikely(rc)) { 2380 if (rc != -ENOMEM) { 2381 accel_mlx5_task_fail(task, rc); 2382 } 2383 } 2384 } 2385 } 2386 2387 return reaped; 2388 } 2389 2390 static inline void 2391 accel_mlx5_resubmit_nomem_tasks(struct accel_mlx5_dev *dev) 2392 { 2393 struct accel_mlx5_task *task, *tmp, *last; 2394 int rc; 2395 2396 last = STAILQ_LAST(&dev->nomem, accel_mlx5_task, link); 2397 STAILQ_FOREACH_SAFE(task, &dev->nomem, link, tmp) { 2398 STAILQ_REMOVE_HEAD(&dev->nomem, link); 2399 rc = accel_mlx5_task_continue(task); 2400 if (spdk_unlikely(rc)) { 2401 if (rc != -ENOMEM) { 2402 accel_mlx5_task_fail(task, rc); 2403 } 2404 break; 2405 } 2406 /* If qpair is recovering, task is added back to the nomem list and 0 is returned. In that case we 2407 * need a special condition to iterate the list once and stop this FOREACH loop */ 2408 if (task == last) { 2409 break; 2410 } 2411 } 2412 } 2413 2414 static int 2415 accel_mlx5_poller(void *ctx) 2416 { 2417 struct accel_mlx5_io_channel *ch = ctx; 2418 struct accel_mlx5_dev *dev; 2419 2420 int64_t completions = 0, rc; 2421 uint32_t i; 2422 2423 /* reaped completions may register a post poller handler, that makes no sense in the scope of our own poller */ 2424 ch->poller_handler_registered = true; 2425 for (i = 0; i < ch->num_devs; i++) { 2426 dev = &ch->devs[i]; 2427 if (dev->wrs_in_cq) { 2428 rc = accel_mlx5_poll_cq(dev); 2429 if (spdk_unlikely(rc < 0)) { 2430 SPDK_ERRLOG("Error %"PRId64" on CQ, dev %s\n", rc, dev->dev_ctx->context->device->name); 2431 } 2432 completions += rc; 2433 if (dev->qp.ring_db) { 2434 spdk_mlx5_qp_complete_send(dev->qp.qp); 2435 dev->qp.ring_db = false; 2436 } 2437 } 2438 if (!STAILQ_EMPTY(&dev->nomem)) { 2439 accel_mlx5_resubmit_nomem_tasks(dev); 2440 } 2441 } 2442 ch->poller_handler_registered = false; 2443 2444 return !!completions; 2445 } 2446 2447 static bool 2448 accel_mlx5_supports_opcode(enum spdk_accel_opcode opc) 2449 { 2450 assert(g_accel_mlx5.enabled); 2451 2452 switch (opc) { 2453 case SPDK_ACCEL_OPC_COPY: 2454 return true; 2455 case SPDK_ACCEL_OPC_ENCRYPT: 2456 case SPDK_ACCEL_OPC_DECRYPT: 2457 return g_accel_mlx5.crypto_supported; 2458 case SPDK_ACCEL_OPC_CRC32C: 2459 case SPDK_ACCEL_OPC_COPY_CRC32C: 2460 return g_accel_mlx5.crc32c_supported; 2461 default: 2462 return false; 2463 } 2464 } 2465 2466 static struct spdk_io_channel * 2467 accel_mlx5_get_io_channel(void) 2468 { 2469 assert(g_accel_mlx5.enabled); 2470 return spdk_get_io_channel(&g_accel_mlx5); 2471 } 2472 2473 static int 2474 accel_mlx5_create_qp(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp) 2475 { 2476 struct spdk_mlx5_qp_attr mlx5_qp_attr = {}; 2477 int rc; 2478 2479 mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size; 2480 mlx5_qp_attr.cap.max_recv_wr = 0; 2481 mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE; 2482 mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE; 2483 2484 rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp); 2485 if (rc) { 2486 return rc; 2487 } 2488 2489 STAILQ_INIT(&qp->in_hw); 2490 qp->dev = dev; 2491 qp->verbs_qp = spdk_mlx5_qp_get_verbs_qp(qp->qp); 2492 assert(qp->verbs_qp); 2493 qp->wrs_max = g_accel_mlx5.attr.qp_size; 2494 2495 return 0; 2496 } 2497 2498 static void 2499 accel_mlx5_add_stats(struct accel_mlx5_stats *stats, const struct accel_mlx5_stats *to_add) 2500 { 2501 int i; 2502 2503 stats->crypto_umrs += to_add->crypto_umrs; 2504 stats->sig_umrs += to_add->sig_umrs; 2505 stats->umrs += to_add->umrs; 2506 stats->rdma_reads += to_add->rdma_reads; 2507 stats->rdma_writes += to_add->rdma_writes; 2508 stats->polls += to_add->polls; 2509 stats->idle_polls += to_add->idle_polls; 2510 stats->completions += to_add->completions; 2511 stats->nomem_qdepth += to_add->nomem_qdepth; 2512 stats->nomem_mkey += to_add->nomem_mkey; 2513 for (i = 0; i < ACCEL_MLX5_OPC_LAST; i++) { 2514 stats->opcodes[i] += to_add->opcodes[i]; 2515 } 2516 } 2517 2518 static void 2519 accel_mlx5_destroy_cb(void *io_device, void *ctx_buf) 2520 { 2521 struct accel_mlx5_io_channel *ch = ctx_buf; 2522 struct accel_mlx5_dev *dev; 2523 uint32_t i; 2524 2525 spdk_poller_unregister(&ch->poller); 2526 for (i = 0; i < ch->num_devs; i++) { 2527 dev = &ch->devs[i]; 2528 spdk_mlx5_qp_destroy(dev->qp.qp); 2529 if (dev->cq) { 2530 spdk_mlx5_cq_destroy(dev->cq); 2531 } 2532 spdk_poller_unregister(&dev->qp.recover_poller); 2533 if (dev->mkeys) { 2534 spdk_mlx5_mkey_pool_put_ref(dev->mkeys); 2535 } 2536 if (dev->crypto_mkeys) { 2537 spdk_mlx5_mkey_pool_put_ref(dev->crypto_mkeys); 2538 } 2539 if (dev->sig_mkeys) { 2540 spdk_mlx5_mkey_pool_put_ref(dev->sig_mkeys); 2541 } 2542 spdk_rdma_utils_free_mem_map(&dev->mmap); 2543 spdk_spin_lock(&g_accel_mlx5.lock); 2544 accel_mlx5_add_stats(&g_accel_mlx5.stats, &dev->stats); 2545 spdk_spin_unlock(&g_accel_mlx5.lock); 2546 } 2547 free(ch->devs); 2548 } 2549 2550 static int 2551 accel_mlx5_create_cb(void *io_device, void *ctx_buf) 2552 { 2553 struct spdk_mlx5_cq_attr cq_attr = {}; 2554 struct accel_mlx5_io_channel *ch = ctx_buf; 2555 struct accel_mlx5_dev_ctx *dev_ctx; 2556 struct accel_mlx5_dev *dev; 2557 uint32_t i; 2558 int rc; 2559 2560 ch->devs = calloc(g_accel_mlx5.num_ctxs, sizeof(*ch->devs)); 2561 if (!ch->devs) { 2562 SPDK_ERRLOG("Memory allocation failed\n"); 2563 return -ENOMEM; 2564 } 2565 2566 for (i = 0; i < g_accel_mlx5.num_ctxs; i++) { 2567 dev_ctx = &g_accel_mlx5.dev_ctxs[i]; 2568 dev = &ch->devs[i]; 2569 dev->dev_ctx = dev_ctx; 2570 2571 assert(dev_ctx->mkeys); 2572 dev->mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, 0); 2573 if (!dev->mkeys) { 2574 SPDK_ERRLOG("Failed to get mkey pool channel, dev %s\n", dev_ctx->context->device->name); 2575 /* Should not happen since mkey pool is created on accel_mlx5 initialization. 2576 * We should not be here if pool creation failed */ 2577 assert(0); 2578 goto err_out; 2579 } 2580 2581 if (dev_ctx->crypto_mkeys) { 2582 dev->crypto_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO); 2583 if (!dev->crypto_mkeys) { 2584 SPDK_ERRLOG("Failed to get crypto mkey pool channel, dev %s\n", dev_ctx->context->device->name); 2585 /* Should not happen since mkey pool is created on accel_mlx5 initialization. 2586 * We should not be here if pool creation failed */ 2587 assert(0); 2588 goto err_out; 2589 } 2590 } 2591 if (dev_ctx->sig_mkeys) { 2592 dev->sig_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE); 2593 if (!dev->sig_mkeys) { 2594 SPDK_ERRLOG("Failed to get sig mkey pool channel, dev %s\n", dev_ctx->context->device->name); 2595 /* Should not happen since mkey pool is created on accel_mlx5 initialization. 2596 * We should not be here if pool creation failed */ 2597 assert(0); 2598 goto err_out; 2599 } 2600 } 2601 2602 memset(&cq_attr, 0, sizeof(cq_attr)); 2603 cq_attr.cqe_cnt = g_accel_mlx5.attr.qp_size; 2604 cq_attr.cqe_size = 64; 2605 cq_attr.cq_context = dev; 2606 2607 ch->num_devs++; 2608 rc = spdk_mlx5_cq_create(dev_ctx->pd, &cq_attr, &dev->cq); 2609 if (rc) { 2610 SPDK_ERRLOG("Failed to create mlx5 CQ, rc %d\n", rc); 2611 goto err_out; 2612 } 2613 2614 rc = accel_mlx5_create_qp(dev, &dev->qp); 2615 if (rc) { 2616 SPDK_ERRLOG("Failed to create mlx5 QP, rc %d\n", rc); 2617 goto err_out; 2618 } 2619 2620 dev->mmap = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL, 2621 IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE); 2622 if (!dev->mmap) { 2623 SPDK_ERRLOG("Failed to create memory map\n"); 2624 rc = -ENOMEM; 2625 goto err_out; 2626 } 2627 dev->crypto_multi_block = dev_ctx->crypto_multi_block; 2628 dev->crypto_split_blocks = dev_ctx->crypto_multi_block ? g_accel_mlx5.attr.crypto_split_blocks : 0; 2629 dev->wrs_in_cq_max = g_accel_mlx5.attr.qp_size; 2630 dev->ch = spdk_io_channel_from_ctx(ctx_buf); 2631 STAILQ_INIT(&dev->nomem); 2632 } 2633 2634 ch->poller = SPDK_POLLER_REGISTER(accel_mlx5_poller, ch, 0); 2635 2636 return 0; 2637 2638 err_out: 2639 accel_mlx5_destroy_cb(&g_accel_mlx5, ctx_buf); 2640 return rc; 2641 } 2642 2643 void 2644 accel_mlx5_get_default_attr(struct accel_mlx5_attr *attr) 2645 { 2646 assert(attr); 2647 2648 attr->qp_size = ACCEL_MLX5_QP_SIZE; 2649 attr->num_requests = ACCEL_MLX5_NUM_REQUESTS; 2650 attr->allowed_devs = NULL; 2651 attr->crypto_split_blocks = 0; 2652 attr->enable_driver = false; 2653 } 2654 2655 static void 2656 accel_mlx5_allowed_devs_free(void) 2657 { 2658 size_t i; 2659 2660 if (!g_accel_mlx5.allowed_devs) { 2661 return; 2662 } 2663 2664 for (i = 0; i < g_accel_mlx5.allowed_devs_count; i++) { 2665 free(g_accel_mlx5.allowed_devs[i]); 2666 } 2667 free(g_accel_mlx5.attr.allowed_devs); 2668 free(g_accel_mlx5.allowed_devs); 2669 g_accel_mlx5.attr.allowed_devs = NULL; 2670 g_accel_mlx5.allowed_devs = NULL; 2671 g_accel_mlx5.allowed_devs_count = 0; 2672 } 2673 2674 static int 2675 accel_mlx5_allowed_devs_parse(const char *allowed_devs) 2676 { 2677 char *str, *tmp, *tok, *sp = NULL; 2678 size_t devs_count = 0; 2679 2680 str = strdup(allowed_devs); 2681 if (!str) { 2682 return -ENOMEM; 2683 } 2684 2685 accel_mlx5_allowed_devs_free(); 2686 2687 tmp = str; 2688 while ((tmp = strchr(tmp, ',')) != NULL) { 2689 tmp++; 2690 devs_count++; 2691 } 2692 devs_count++; 2693 2694 g_accel_mlx5.allowed_devs = calloc(devs_count, sizeof(char *)); 2695 if (!g_accel_mlx5.allowed_devs) { 2696 free(str); 2697 return -ENOMEM; 2698 } 2699 2700 devs_count = 0; 2701 tok = strtok_r(str, ",", &sp); 2702 while (tok) { 2703 g_accel_mlx5.allowed_devs[devs_count] = strdup(tok); 2704 if (!g_accel_mlx5.allowed_devs[devs_count]) { 2705 free(str); 2706 accel_mlx5_allowed_devs_free(); 2707 return -ENOMEM; 2708 } 2709 tok = strtok_r(NULL, ",", &sp); 2710 devs_count++; 2711 g_accel_mlx5.allowed_devs_count++; 2712 } 2713 2714 free(str); 2715 2716 return 0; 2717 } 2718 2719 int 2720 accel_mlx5_enable(struct accel_mlx5_attr *attr) 2721 { 2722 int rc; 2723 2724 if (g_accel_mlx5.enabled) { 2725 return -EEXIST; 2726 } 2727 if (attr) { 2728 if (attr->num_requests / spdk_env_get_core_count() < ACCEL_MLX5_MAX_MKEYS_IN_TASK) { 2729 SPDK_ERRLOG("num requests per core must not be less than %u, current value %u\n", 2730 ACCEL_MLX5_MAX_MKEYS_IN_TASK, attr->num_requests / spdk_env_get_core_count()); 2731 return -EINVAL; 2732 } 2733 if (attr->qp_size < 8) { 2734 SPDK_ERRLOG("qp_size must be at least 8\n"); 2735 return -EINVAL; 2736 } 2737 g_accel_mlx5.attr = *attr; 2738 g_accel_mlx5.attr.allowed_devs = NULL; 2739 2740 if (attr->allowed_devs) { 2741 /* Contains a copy of user's string */ 2742 g_accel_mlx5.attr.allowed_devs = strndup(attr->allowed_devs, ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN); 2743 if (!g_accel_mlx5.attr.allowed_devs) { 2744 return -ENOMEM; 2745 } 2746 rc = accel_mlx5_allowed_devs_parse(g_accel_mlx5.attr.allowed_devs); 2747 if (rc) { 2748 return rc; 2749 } 2750 rc = spdk_mlx5_crypto_devs_allow((const char *const *)g_accel_mlx5.allowed_devs, 2751 g_accel_mlx5.allowed_devs_count); 2752 if (rc) { 2753 accel_mlx5_allowed_devs_free(); 2754 return rc; 2755 } 2756 } 2757 } else { 2758 accel_mlx5_get_default_attr(&g_accel_mlx5.attr); 2759 } 2760 2761 g_accel_mlx5.enabled = true; 2762 spdk_accel_module_list_add(&g_accel_mlx5.module); 2763 2764 return 0; 2765 } 2766 2767 static void 2768 accel_mlx5_psvs_release(struct accel_mlx5_dev_ctx *dev_ctx) 2769 { 2770 uint32_t i, num_psvs, num_psvs_in_pool; 2771 2772 if (!dev_ctx->psvs) { 2773 return; 2774 } 2775 2776 num_psvs = g_accel_mlx5.attr.num_requests; 2777 2778 for (i = 0; i < num_psvs; i++) { 2779 if (dev_ctx->psvs[i]) { 2780 spdk_mlx5_destroy_psv(dev_ctx->psvs[i]); 2781 dev_ctx->psvs[i] = NULL; 2782 } 2783 } 2784 free(dev_ctx->psvs); 2785 2786 if (!dev_ctx->psv_pool) { 2787 return; 2788 } 2789 num_psvs_in_pool = spdk_mempool_count(dev_ctx->psv_pool); 2790 if (num_psvs_in_pool != num_psvs) { 2791 SPDK_ERRLOG("Expected %u reqs in the pool, but got only %u\n", num_psvs, num_psvs_in_pool); 2792 } 2793 spdk_mempool_free(dev_ctx->psv_pool); 2794 } 2795 2796 static void 2797 accel_mlx5_free_resources(void) 2798 { 2799 struct accel_mlx5_dev_ctx *dev_ctx; 2800 uint32_t i; 2801 2802 for (i = 0; i < g_accel_mlx5.num_ctxs; i++) { 2803 dev_ctx = &g_accel_mlx5.dev_ctxs[i]; 2804 accel_mlx5_psvs_release(dev_ctx); 2805 if (dev_ctx->pd) { 2806 if (dev_ctx->mkeys) { 2807 spdk_mlx5_mkey_pool_destroy(0, dev_ctx->pd); 2808 } 2809 if (dev_ctx->crypto_mkeys) { 2810 spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO, dev_ctx->pd); 2811 } 2812 if (dev_ctx->sig_mkeys) { 2813 spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE, dev_ctx->pd); 2814 } 2815 spdk_rdma_utils_put_pd(dev_ctx->pd); 2816 } 2817 if (dev_ctx->domain) { 2818 spdk_rdma_utils_put_memory_domain(dev_ctx->domain); 2819 } 2820 } 2821 2822 free(g_accel_mlx5.dev_ctxs); 2823 g_accel_mlx5.dev_ctxs = NULL; 2824 g_accel_mlx5.initialized = false; 2825 } 2826 2827 static void 2828 accel_mlx5_deinit_cb(void *ctx) 2829 { 2830 accel_mlx5_free_resources(); 2831 spdk_spin_destroy(&g_accel_mlx5.lock); 2832 spdk_mlx5_umr_implementer_register(false); 2833 spdk_accel_module_finish(); 2834 } 2835 2836 static void 2837 accel_mlx5_deinit(void *ctx) 2838 { 2839 if (g_accel_mlx5.allowed_devs) { 2840 accel_mlx5_allowed_devs_free(); 2841 } 2842 spdk_mlx5_crypto_devs_allow(NULL, 0); 2843 if (g_accel_mlx5.initialized) { 2844 spdk_io_device_unregister(&g_accel_mlx5, accel_mlx5_deinit_cb); 2845 } else { 2846 spdk_accel_module_finish(); 2847 } 2848 } 2849 2850 static int 2851 accel_mlx5_mkeys_create(struct ibv_pd *pd, uint32_t num_mkeys, uint32_t flags) 2852 { 2853 struct spdk_mlx5_mkey_pool_param pool_param = {}; 2854 2855 pool_param.mkey_count = num_mkeys; 2856 pool_param.cache_per_thread = num_mkeys * 3 / 4 / spdk_env_get_core_count(); 2857 pool_param.flags = flags; 2858 2859 return spdk_mlx5_mkey_pool_init(&pool_param, pd); 2860 } 2861 2862 static void 2863 accel_mlx5_set_psv_in_pool(struct spdk_mempool *mp, void *cb_arg, void *_psv, unsigned obj_idx) 2864 { 2865 struct spdk_rdma_utils_memory_translation translation = {}; 2866 struct accel_mlx5_psv_pool_iter_cb_args *args = cb_arg; 2867 struct accel_mlx5_psv_wrapper *wrapper = _psv; 2868 struct accel_mlx5_dev_ctx *dev_ctx = args->dev; 2869 int rc; 2870 2871 if (args->rc) { 2872 return; 2873 } 2874 assert(obj_idx < g_accel_mlx5.attr.num_requests); 2875 assert(dev_ctx->psvs[obj_idx] != NULL); 2876 memset(wrapper, 0, sizeof(*wrapper)); 2877 wrapper->psv_index = dev_ctx->psvs[obj_idx]->index; 2878 2879 rc = spdk_rdma_utils_get_translation(args->map, &wrapper->crc, sizeof(uint32_t), &translation); 2880 if (rc) { 2881 SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", &wrapper->crc, sizeof(uint32_t)); 2882 args->rc = -EINVAL; 2883 } else { 2884 wrapper->crc_lkey = spdk_rdma_utils_memory_translation_get_lkey(&translation); 2885 } 2886 } 2887 2888 static int 2889 accel_mlx5_psvs_create(struct accel_mlx5_dev_ctx *dev_ctx) 2890 { 2891 struct accel_mlx5_psv_pool_iter_cb_args args = { 2892 .dev = dev_ctx 2893 }; 2894 char pool_name[32]; 2895 uint32_t i; 2896 uint32_t num_psvs = g_accel_mlx5.attr.num_requests; 2897 uint32_t cache_size; 2898 int rc; 2899 2900 dev_ctx->psvs = calloc(num_psvs, (sizeof(struct spdk_mlx5_psv *))); 2901 if (!dev_ctx->psvs) { 2902 SPDK_ERRLOG("Failed to alloc PSVs array\n"); 2903 return -ENOMEM; 2904 } 2905 for (i = 0; i < num_psvs; i++) { 2906 dev_ctx->psvs[i] = spdk_mlx5_create_psv(dev_ctx->pd); 2907 if (!dev_ctx->psvs[i]) { 2908 SPDK_ERRLOG("Failed to create PSV on dev %s\n", dev_ctx->context->device->name); 2909 return -EINVAL; 2910 } 2911 } 2912 2913 rc = snprintf(pool_name, sizeof(pool_name), "accel_psv_%s", dev_ctx->context->device->name); 2914 if (rc < 0) { 2915 assert(0); 2916 return -EINVAL; 2917 } 2918 cache_size = num_psvs * 3 / 4 / spdk_env_get_core_count(); 2919 args.map = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL, 2920 IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE); 2921 if (!args.map) { 2922 return -ENOMEM; 2923 } 2924 dev_ctx->psv_pool = spdk_mempool_create_ctor(pool_name, num_psvs, 2925 sizeof(struct accel_mlx5_psv_wrapper), 2926 cache_size, SPDK_ENV_SOCKET_ID_ANY, 2927 accel_mlx5_set_psv_in_pool, &args); 2928 spdk_rdma_utils_free_mem_map(&args.map); 2929 if (!dev_ctx->psv_pool) { 2930 SPDK_ERRLOG("Failed to create PSV memory pool\n"); 2931 return -ENOMEM; 2932 } 2933 if (args.rc) { 2934 SPDK_ERRLOG("Failed to init PSV memory pool objects, rc %d\n", args.rc); 2935 return args.rc; 2936 } 2937 2938 return 0; 2939 } 2940 2941 2942 static int 2943 accel_mlx5_dev_ctx_init(struct accel_mlx5_dev_ctx *dev_ctx, struct ibv_context *dev, 2944 struct spdk_mlx5_device_caps *caps) 2945 { 2946 struct ibv_pd *pd; 2947 int rc; 2948 2949 pd = spdk_rdma_utils_get_pd(dev); 2950 if (!pd) { 2951 SPDK_ERRLOG("Failed to get PD for context %p, dev %s\n", dev, dev->device->name); 2952 return -EINVAL; 2953 } 2954 dev_ctx->context = dev; 2955 dev_ctx->pd = pd; 2956 dev_ctx->domain = spdk_rdma_utils_get_memory_domain(pd); 2957 if (!dev_ctx->domain) { 2958 return -ENOMEM; 2959 } 2960 2961 rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, 0); 2962 if (rc) { 2963 SPDK_ERRLOG("Failed to create mkeys pool, rc %d, dev %s\n", rc, dev->device->name); 2964 return rc; 2965 } 2966 dev_ctx->mkeys = true; 2967 2968 if (g_accel_mlx5.crypto_supported) { 2969 dev_ctx->crypto_multi_block = caps->crypto.multi_block_be_tweak; 2970 if (!dev_ctx->crypto_multi_block && g_accel_mlx5.attr.crypto_split_blocks) { 2971 SPDK_WARNLOG("\"crypto_split_blocks\" is set but dev %s doesn't support multi block crypto\n", 2972 dev->device->name); 2973 } 2974 rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO); 2975 if (rc) { 2976 SPDK_ERRLOG("Failed to create crypto mkeys pool, rc %d, dev %s\n", rc, dev->device->name); 2977 return rc; 2978 } 2979 dev_ctx->crypto_mkeys = true; 2980 } 2981 if (g_accel_mlx5.crc32c_supported) { 2982 rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, 2983 SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE); 2984 if (rc) { 2985 SPDK_ERRLOG("Failed to create signature mkeys pool, rc %d, dev %s\n", rc, dev->device->name); 2986 return rc; 2987 } 2988 dev_ctx->sig_mkeys = true; 2989 rc = accel_mlx5_psvs_create(dev_ctx); 2990 if (rc) { 2991 SPDK_ERRLOG("Failed to create PSVs pool, rc %d, dev %s\n", rc, dev->device->name); 2992 return rc; 2993 } 2994 } 2995 2996 return 0; 2997 } 2998 2999 static struct ibv_context ** 3000 accel_mlx5_get_devices(int *_num_devs) 3001 { 3002 struct ibv_context **rdma_devs, **rdma_devs_out = NULL, *dev; 3003 struct ibv_device_attr dev_attr; 3004 size_t j; 3005 int num_devs = 0, i, rc; 3006 int num_devs_out = 0; 3007 bool dev_allowed; 3008 3009 rdma_devs = rdma_get_devices(&num_devs); 3010 if (!rdma_devs || !num_devs) { 3011 *_num_devs = 0; 3012 return NULL; 3013 } 3014 3015 rdma_devs_out = calloc(num_devs + 1, sizeof(struct ibv_context *)); 3016 if (!rdma_devs_out) { 3017 SPDK_ERRLOG("Memory allocation failed\n"); 3018 rdma_free_devices(rdma_devs); 3019 *_num_devs = 0; 3020 return NULL; 3021 } 3022 3023 for (i = 0; i < num_devs; i++) { 3024 dev = rdma_devs[i]; 3025 rc = ibv_query_device(dev, &dev_attr); 3026 if (rc) { 3027 SPDK_ERRLOG("Failed to query dev %s, skipping\n", dev->device->name); 3028 continue; 3029 } 3030 if (dev_attr.vendor_id != SPDK_MLX5_VENDOR_ID_MELLANOX) { 3031 SPDK_DEBUGLOG(accel_mlx5, "dev %s is not Mellanox device, skipping\n", dev->device->name); 3032 continue; 3033 } 3034 3035 if (g_accel_mlx5.allowed_devs_count) { 3036 dev_allowed = false; 3037 for (j = 0; j < g_accel_mlx5.allowed_devs_count; j++) { 3038 if (strcmp(g_accel_mlx5.allowed_devs[j], dev->device->name) == 0) { 3039 dev_allowed = true; 3040 break; 3041 } 3042 } 3043 if (!dev_allowed) { 3044 continue; 3045 } 3046 } 3047 3048 rdma_devs_out[num_devs_out] = dev; 3049 num_devs_out++; 3050 } 3051 3052 rdma_free_devices(rdma_devs); 3053 *_num_devs = num_devs_out; 3054 3055 return rdma_devs_out; 3056 } 3057 3058 static inline bool 3059 accel_mlx5_dev_supports_crypto(struct spdk_mlx5_device_caps *caps) 3060 { 3061 return caps->crypto_supported && !caps->crypto.wrapped_import_method_aes_xts && 3062 (caps->crypto.single_block_le_tweak || 3063 caps->crypto.multi_block_le_tweak || caps->crypto.multi_block_be_tweak); 3064 } 3065 3066 static int 3067 accel_mlx5_init(void) 3068 { 3069 struct spdk_mlx5_device_caps *caps; 3070 struct ibv_context **rdma_devs, *dev; 3071 int num_devs = 0, rc = 0, i; 3072 int best_dev = -1, first_dev = 0; 3073 int best_dev_stat = 0, dev_stat; 3074 bool supports_crypto; 3075 bool find_best_dev = g_accel_mlx5.allowed_devs_count == 0; 3076 3077 if (!g_accel_mlx5.enabled) { 3078 return -EINVAL; 3079 } 3080 3081 spdk_spin_init(&g_accel_mlx5.lock); 3082 rdma_devs = accel_mlx5_get_devices(&num_devs); 3083 if (!rdma_devs || !num_devs) { 3084 return -ENODEV; 3085 } 3086 caps = calloc(num_devs, sizeof(*caps)); 3087 if (!caps) { 3088 rc = -ENOMEM; 3089 goto cleanup; 3090 } 3091 3092 g_accel_mlx5.crypto_supported = true; 3093 g_accel_mlx5.crc32c_supported = true; 3094 g_accel_mlx5.num_ctxs = 0; 3095 3096 /* Iterate devices. We support an offload if all devices support it */ 3097 for (i = 0; i < num_devs; i++) { 3098 dev = rdma_devs[i]; 3099 3100 rc = spdk_mlx5_device_query_caps(dev, &caps[i]); 3101 if (rc) { 3102 SPDK_ERRLOG("Failed to get crypto caps, dev %s\n", dev->device->name); 3103 goto cleanup; 3104 } 3105 supports_crypto = accel_mlx5_dev_supports_crypto(&caps[i]); 3106 if (!supports_crypto) { 3107 SPDK_DEBUGLOG(accel_mlx5, "Disable crypto support because dev %s doesn't support it\n", 3108 rdma_devs[i]->device->name); 3109 g_accel_mlx5.crypto_supported = false; 3110 } 3111 if (!caps[i].crc32c_supported) { 3112 SPDK_DEBUGLOG(accel_mlx5, "Disable crc32c support because dev %s doesn't support it\n", 3113 rdma_devs[i]->device->name); 3114 g_accel_mlx5.crc32c_supported = false; 3115 } 3116 if (find_best_dev) { 3117 /* Find device which supports max number of offloads */ 3118 dev_stat = (int)supports_crypto + (int)caps[i].crc32c_supported; 3119 if (dev_stat > best_dev_stat) { 3120 best_dev_stat = dev_stat; 3121 best_dev = i; 3122 } 3123 } 3124 } 3125 3126 /* User didn't specify devices to use, try to select the best one */ 3127 if (find_best_dev) { 3128 if (best_dev == -1) { 3129 best_dev = 0; 3130 } 3131 g_accel_mlx5.crypto_supported = accel_mlx5_dev_supports_crypto(&caps[best_dev]); 3132 g_accel_mlx5.crc32c_supported = caps[best_dev].crc32c_supported; 3133 SPDK_NOTICELOG("Select dev %s, crypto %d, crc32c %d\n", rdma_devs[best_dev]->device->name, 3134 g_accel_mlx5.crypto_supported, g_accel_mlx5.crc32c_supported); 3135 first_dev = best_dev; 3136 num_devs = 1; 3137 if (g_accel_mlx5.crypto_supported) { 3138 const char *const dev_name[] = { rdma_devs[best_dev]->device->name }; 3139 /* Let mlx5 library know which device to use */ 3140 spdk_mlx5_crypto_devs_allow(dev_name, 1); 3141 } 3142 } else { 3143 SPDK_NOTICELOG("Found %d devices, crypto %d\n", num_devs, g_accel_mlx5.crypto_supported); 3144 } 3145 3146 g_accel_mlx5.dev_ctxs = calloc(num_devs, sizeof(*g_accel_mlx5.dev_ctxs)); 3147 if (!g_accel_mlx5.dev_ctxs) { 3148 SPDK_ERRLOG("Memory allocation failed\n"); 3149 rc = -ENOMEM; 3150 goto cleanup; 3151 } 3152 3153 for (i = first_dev; i < first_dev + num_devs; i++) { 3154 rc = accel_mlx5_dev_ctx_init(&g_accel_mlx5.dev_ctxs[g_accel_mlx5.num_ctxs++], 3155 rdma_devs[i], &caps[i]); 3156 if (rc) { 3157 goto cleanup; 3158 } 3159 } 3160 3161 SPDK_NOTICELOG("Accel framework mlx5 initialized, found %d devices.\n", num_devs); 3162 spdk_io_device_register(&g_accel_mlx5, accel_mlx5_create_cb, accel_mlx5_destroy_cb, 3163 sizeof(struct accel_mlx5_io_channel), "accel_mlx5"); 3164 g_accel_mlx5.initialized = true; 3165 free(rdma_devs); 3166 free(caps); 3167 3168 if (g_accel_mlx5.attr.enable_driver) { 3169 SPDK_NOTICELOG("Enabling mlx5 platform driver\n"); 3170 spdk_accel_driver_register(&g_accel_mlx5_driver); 3171 spdk_accel_set_driver(g_accel_mlx5_driver.name); 3172 spdk_mlx5_umr_implementer_register(true); 3173 } 3174 3175 return 0; 3176 3177 cleanup: 3178 free(rdma_devs); 3179 free(caps); 3180 accel_mlx5_free_resources(); 3181 spdk_spin_destroy(&g_accel_mlx5.lock); 3182 3183 return rc; 3184 } 3185 3186 static void 3187 accel_mlx5_write_config_json(struct spdk_json_write_ctx *w) 3188 { 3189 if (g_accel_mlx5.enabled) { 3190 spdk_json_write_object_begin(w); 3191 spdk_json_write_named_string(w, "method", "mlx5_scan_accel_module"); 3192 spdk_json_write_named_object_begin(w, "params"); 3193 spdk_json_write_named_uint16(w, "qp_size", g_accel_mlx5.attr.qp_size); 3194 spdk_json_write_named_uint32(w, "num_requests", g_accel_mlx5.attr.num_requests); 3195 if (g_accel_mlx5.attr.allowed_devs) { 3196 spdk_json_write_named_string(w, "allowed_devs", g_accel_mlx5.attr.allowed_devs); 3197 } 3198 spdk_json_write_named_uint16(w, "crypto_split_blocks", g_accel_mlx5.attr.crypto_split_blocks); 3199 spdk_json_write_named_bool(w, "enable_driver", g_accel_mlx5.attr.enable_driver); 3200 spdk_json_write_object_end(w); 3201 spdk_json_write_object_end(w); 3202 } 3203 } 3204 3205 static size_t 3206 accel_mlx5_get_ctx_size(void) 3207 { 3208 return sizeof(struct accel_mlx5_task); 3209 } 3210 3211 static int 3212 accel_mlx5_crypto_key_init(struct spdk_accel_crypto_key *key) 3213 { 3214 struct spdk_mlx5_crypto_dek_create_attr attr = {}; 3215 struct spdk_mlx5_crypto_keytag *keytag; 3216 int rc; 3217 3218 if (!key || !key->key || !key->key2 || !key->key_size || !key->key2_size) { 3219 return -EINVAL; 3220 } 3221 3222 attr.dek = calloc(1, key->key_size + key->key2_size); 3223 if (!attr.dek) { 3224 return -ENOMEM; 3225 } 3226 3227 memcpy(attr.dek, key->key, key->key_size); 3228 memcpy(attr.dek + key->key_size, key->key2, key->key2_size); 3229 attr.dek_len = key->key_size + key->key2_size; 3230 3231 rc = spdk_mlx5_crypto_keytag_create(&attr, &keytag); 3232 spdk_memset_s(attr.dek, attr.dek_len, 0, attr.dek_len); 3233 free(attr.dek); 3234 if (rc) { 3235 SPDK_ERRLOG("Failed to create a keytag, rc %d\n", rc); 3236 return rc; 3237 } 3238 3239 key->priv = keytag; 3240 3241 return 0; 3242 } 3243 3244 static void 3245 accel_mlx5_crypto_key_deinit(struct spdk_accel_crypto_key *key) 3246 { 3247 if (!key || key->module_if != &g_accel_mlx5.module || !key->priv) { 3248 return; 3249 } 3250 3251 spdk_mlx5_crypto_keytag_destroy(key->priv); 3252 } 3253 3254 static void 3255 accel_mlx5_dump_stats_json(struct spdk_json_write_ctx *w, const char *header, 3256 const struct accel_mlx5_stats *stats) 3257 { 3258 double idle_polls_percentage = 0; 3259 double cpls_per_poll = 0; 3260 uint64_t total_tasks = 0; 3261 int i; 3262 3263 if (stats->polls) { 3264 idle_polls_percentage = (double) stats->idle_polls * 100 / stats->polls; 3265 } 3266 if (stats->polls > stats->idle_polls) { 3267 cpls_per_poll = (double) stats->completions / (stats->polls - stats->idle_polls); 3268 } 3269 for (i = 0; i < ACCEL_MLX5_OPC_LAST; i++) { 3270 total_tasks += stats->opcodes[i]; 3271 } 3272 3273 spdk_json_write_named_object_begin(w, header); 3274 3275 spdk_json_write_named_object_begin(w, "umrs"); 3276 spdk_json_write_named_uint64(w, "crypto_umrs", stats->crypto_umrs); 3277 spdk_json_write_named_uint64(w, "sig_umrs", stats->sig_umrs); 3278 spdk_json_write_named_uint64(w, "umrs", stats->umrs); 3279 spdk_json_write_named_uint64(w, "total", stats->crypto_umrs + stats->sig_umrs + stats->umrs); 3280 spdk_json_write_object_end(w); 3281 3282 spdk_json_write_named_object_begin(w, "rdma"); 3283 spdk_json_write_named_uint64(w, "read", stats->rdma_reads); 3284 spdk_json_write_named_uint64(w, "write", stats->rdma_writes); 3285 spdk_json_write_named_uint64(w, "total", stats->rdma_reads + stats->rdma_writes); 3286 spdk_json_write_object_end(w); 3287 3288 spdk_json_write_named_object_begin(w, "polling"); 3289 spdk_json_write_named_uint64(w, "polls", stats->polls); 3290 spdk_json_write_named_uint64(w, "idle_polls", stats->idle_polls); 3291 spdk_json_write_named_uint64(w, "completions", stats->completions); 3292 spdk_json_write_named_double(w, "idle_polls_percentage", idle_polls_percentage); 3293 spdk_json_write_named_double(w, "cpls_per_poll", cpls_per_poll); 3294 spdk_json_write_named_uint64(w, "nomem_qdepth", stats->nomem_qdepth); 3295 spdk_json_write_named_uint64(w, "nomem_mkey", stats->nomem_mkey); 3296 spdk_json_write_object_end(w); 3297 3298 spdk_json_write_named_object_begin(w, "tasks"); 3299 spdk_json_write_named_uint64(w, "copy", stats->opcodes[ACCEL_MLX5_OPC_COPY]); 3300 spdk_json_write_named_uint64(w, "crypto", stats->opcodes[ACCEL_MLX5_OPC_CRYPTO]); 3301 spdk_json_write_named_uint64(w, "crypto_mkey", stats->opcodes[ACCEL_MLX5_OPC_CRYPTO_MKEY]); 3302 spdk_json_write_named_uint64(w, "crc32c", stats->opcodes[ACCEL_MLX5_OPC_CRC32C]); 3303 spdk_json_write_named_uint64(w, "mkey", stats->opcodes[ACCEL_MLX5_OPC_MKEY]); 3304 spdk_json_write_named_uint64(w, "total", total_tasks); 3305 spdk_json_write_object_end(w); 3306 3307 spdk_json_write_object_end(w); 3308 } 3309 3310 static void 3311 accel_mlx5_dump_channel_stat(struct spdk_io_channel_iter *i) 3312 { 3313 struct accel_mlx5_stats ch_stat = {}; 3314 struct accel_mlx5_dump_stats_ctx *ctx; 3315 struct spdk_io_channel *_ch; 3316 struct accel_mlx5_io_channel *ch; 3317 struct accel_mlx5_dev *dev; 3318 uint32_t j; 3319 3320 ctx = spdk_io_channel_iter_get_ctx(i); 3321 _ch = spdk_io_channel_iter_get_channel(i); 3322 ch = spdk_io_channel_get_ctx(_ch); 3323 3324 if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 3325 spdk_json_write_object_begin(ctx->w); 3326 spdk_json_write_named_object_begin(ctx->w, spdk_thread_get_name(spdk_get_thread())); 3327 } 3328 if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) { 3329 spdk_json_write_named_array_begin(ctx->w, "devices"); 3330 } 3331 3332 for (j = 0; j < ch->num_devs; j++) { 3333 dev = &ch->devs[j]; 3334 /* Save grand total and channel stats */ 3335 accel_mlx5_add_stats(&ctx->total, &dev->stats); 3336 accel_mlx5_add_stats(&ch_stat, &dev->stats); 3337 if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) { 3338 spdk_json_write_object_begin(ctx->w); 3339 accel_mlx5_dump_stats_json(ctx->w, dev->dev_ctx->context->device->name, &dev->stats); 3340 spdk_json_write_object_end(ctx->w); 3341 } 3342 } 3343 3344 if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) { 3345 spdk_json_write_array_end(ctx->w); 3346 } 3347 if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 3348 accel_mlx5_dump_stats_json(ctx->w, "channel_total", &ch_stat); 3349 spdk_json_write_object_end(ctx->w); 3350 spdk_json_write_object_end(ctx->w); 3351 } 3352 3353 spdk_for_each_channel_continue(i, 0); 3354 } 3355 3356 static void 3357 accel_mlx5_dump_channel_stat_done(struct spdk_io_channel_iter *i, int status) 3358 { 3359 struct accel_mlx5_dump_stats_ctx *ctx; 3360 3361 ctx = spdk_io_channel_iter_get_ctx(i); 3362 3363 spdk_spin_lock(&g_accel_mlx5.lock); 3364 /* Add statistics from destroyed channels */ 3365 accel_mlx5_add_stats(&ctx->total, &g_accel_mlx5.stats); 3366 spdk_spin_unlock(&g_accel_mlx5.lock); 3367 3368 if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 3369 /* channels[] */ 3370 spdk_json_write_array_end(ctx->w); 3371 } 3372 3373 accel_mlx5_dump_stats_json(ctx->w, "total", &ctx->total); 3374 3375 /* Ends the whole response which was begun in accel_mlx5_dump_stats */ 3376 spdk_json_write_object_end(ctx->w); 3377 3378 ctx->cb(ctx->ctx, 0); 3379 free(ctx); 3380 } 3381 3382 int 3383 accel_mlx5_dump_stats(struct spdk_json_write_ctx *w, enum accel_mlx5_dump_state_level level, 3384 accel_mlx5_dump_stat_done_cb cb, void *ctx) 3385 { 3386 struct accel_mlx5_dump_stats_ctx *stat_ctx; 3387 3388 if (!w || !cb) { 3389 return -EINVAL; 3390 } 3391 if (!g_accel_mlx5.initialized) { 3392 return -ENODEV; 3393 } 3394 3395 stat_ctx = calloc(1, sizeof(*stat_ctx)); 3396 if (!stat_ctx) { 3397 return -ENOMEM; 3398 } 3399 stat_ctx->cb = cb; 3400 stat_ctx->ctx = ctx; 3401 stat_ctx->level = level; 3402 stat_ctx->w = w; 3403 3404 spdk_json_write_object_begin(w); 3405 3406 if (level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) { 3407 spdk_json_write_named_array_begin(w, "channels"); 3408 } 3409 3410 spdk_for_each_channel(&g_accel_mlx5, accel_mlx5_dump_channel_stat, stat_ctx, 3411 accel_mlx5_dump_channel_stat_done); 3412 3413 return 0; 3414 } 3415 3416 static bool 3417 accel_mlx5_crypto_supports_cipher(enum spdk_accel_cipher cipher, size_t key_size) 3418 { 3419 switch (cipher) { 3420 case SPDK_ACCEL_CIPHER_AES_XTS: 3421 return key_size == SPDK_ACCEL_AES_XTS_128_KEY_SIZE || key_size == SPDK_ACCEL_AES_XTS_256_KEY_SIZE; 3422 default: 3423 return false; 3424 } 3425 } 3426 3427 static int 3428 accel_mlx5_get_memory_domains(struct spdk_memory_domain **domains, int array_size) 3429 { 3430 int i, size; 3431 3432 if (!domains || !array_size) { 3433 return (int)g_accel_mlx5.num_ctxs; 3434 } 3435 3436 size = spdk_min(array_size, (int)g_accel_mlx5.num_ctxs); 3437 3438 for (i = 0; i < size; i++) { 3439 domains[i] = g_accel_mlx5.dev_ctxs[i].domain; 3440 } 3441 3442 return (int)g_accel_mlx5.num_ctxs; 3443 } 3444 3445 static inline struct accel_mlx5_dev * 3446 accel_mlx5_ch_get_dev_by_pd(struct accel_mlx5_io_channel *accel_ch, struct ibv_pd *pd) 3447 { 3448 uint32_t i; 3449 3450 for (i = 0; i < accel_ch->num_devs; i++) { 3451 if (accel_ch->devs[i].dev_ctx->pd == pd) { 3452 return &accel_ch->devs[i]; 3453 } 3454 } 3455 3456 return NULL; 3457 } 3458 3459 static inline int 3460 accel_mlx5_task_assign_qp_by_domain_pd(struct accel_mlx5_task *task, 3461 struct accel_mlx5_io_channel *acce_ch, struct spdk_memory_domain *domain) 3462 { 3463 struct spdk_memory_domain_rdma_ctx *domain_ctx; 3464 struct accel_mlx5_dev *dev; 3465 struct ibv_pd *domain_pd; 3466 size_t ctx_size; 3467 3468 domain_ctx = spdk_memory_domain_get_user_context(domain, &ctx_size); 3469 if (spdk_unlikely(!domain_ctx || domain_ctx->size != ctx_size)) { 3470 SPDK_ERRLOG("no domain context or wrong size, ctx ptr %p, size %zu\n", domain_ctx, ctx_size); 3471 return -ENOTSUP; 3472 } 3473 domain_pd = domain_ctx->ibv_pd; 3474 if (spdk_unlikely(!domain_pd)) { 3475 SPDK_ERRLOG("no destination domain PD, task %p", task); 3476 return -ENOTSUP; 3477 } 3478 dev = accel_mlx5_ch_get_dev_by_pd(acce_ch, domain_pd); 3479 if (spdk_unlikely(!dev)) { 3480 SPDK_ERRLOG("No dev for PD %p dev %s\n", domain_pd, domain_pd->context->device->name); 3481 return -ENODEV; 3482 } 3483 3484 if (spdk_unlikely(!dev)) { 3485 return -ENODEV; 3486 } 3487 task->qp = &dev->qp; 3488 3489 return 0; 3490 } 3491 3492 static inline int 3493 accel_mlx5_driver_examine_sequence(struct spdk_accel_sequence *seq, 3494 struct accel_mlx5_io_channel *accel_ch) 3495 { 3496 struct spdk_accel_task *first_base = spdk_accel_sequence_first_task(seq); 3497 struct accel_mlx5_task *first = SPDK_CONTAINEROF(first_base, struct accel_mlx5_task, base); 3498 struct spdk_accel_task *next_base = TAILQ_NEXT(first_base, seq_link); 3499 struct accel_mlx5_task *next; 3500 int rc; 3501 3502 accel_mlx5_task_reset(first); 3503 SPDK_DEBUGLOG(accel_mlx5, "first %p, opc %d; next %p, opc %d\n", first_base, first_base->op_code, 3504 next_base, next_base ? next_base->op_code : -1); 3505 if (!next_base) { 3506 if (first_base->op_code == SPDK_ACCEL_OPC_COPY && first_base->dst_domain && 3507 spdk_memory_domain_get_dma_device_type(first_base->dst_domain) == 3508 SPDK_DMA_DEVICE_TYPE_RDMA && 3509 accel_mlx5_compare_iovs(first_base->d.iovs, first_base->s.iovs, first_base->s.iovcnt)) { 3510 SPDK_DEBUGLOG(accel_mlx5, "MKEY task %p\n", first); 3511 rc = accel_mlx5_task_assign_qp_by_domain_pd(first, accel_ch, first_base->dst_domain); 3512 if (spdk_unlikely(rc)) { 3513 return rc; 3514 } 3515 first->mlx5_opcode = ACCEL_MLX5_OPC_MKEY; 3516 first->needs_data_transfer = 1; 3517 first->inplace = 1; 3518 return 0; 3519 } 3520 } else { 3521 switch (first_base->op_code) { 3522 case SPDK_ACCEL_OPC_COPY: 3523 if (next_base->op_code == SPDK_ACCEL_OPC_DECRYPT && 3524 first_base->dst_domain && spdk_memory_domain_get_dma_device_type(first_base->dst_domain) == 3525 SPDK_DMA_DEVICE_TYPE_RDMA && TAILQ_NEXT(next_base, seq_link) == NULL) { 3526 next = SPDK_CONTAINEROF(next_base, struct accel_mlx5_task, base); 3527 rc = accel_mlx5_task_assign_qp_by_domain_pd(next, accel_ch, first_base->dst_domain); 3528 if (spdk_unlikely(rc)) { 3529 return rc; 3530 } 3531 /* Update decrypt task memory domain, complete copy task */ 3532 SPDK_DEBUGLOG(accel_mlx5, "Merge copy task (%p) and decrypt (%p)\n", first, next); 3533 next_base->dst_domain = first_base->dst_domain; 3534 next_base->dst_domain_ctx = first_base->dst_domain_ctx; 3535 accel_mlx5_task_reset(next); 3536 next->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO_MKEY; 3537 next->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE; 3538 next->needs_data_transfer = 1; 3539 next->inplace = 1; 3540 spdk_accel_task_complete(first_base, 0); 3541 return 0; 3542 } 3543 break; 3544 case SPDK_ACCEL_OPC_ENCRYPT: 3545 if (next_base->op_code == SPDK_ACCEL_OPC_COPY && 3546 next_base->dst_domain && spdk_memory_domain_get_dma_device_type(next_base->dst_domain) == 3547 SPDK_DMA_DEVICE_TYPE_RDMA && TAILQ_NEXT(next_base, seq_link) == NULL) { 3548 rc = accel_mlx5_task_assign_qp_by_domain_pd(first, accel_ch, next_base->dst_domain); 3549 if (spdk_unlikely(rc)) { 3550 return rc; 3551 } 3552 3553 /* Update encrypt task memory domain, complete copy task */ 3554 SPDK_DEBUGLOG(accel_mlx5, "Merge copy task (%p) and decrypt (%p)\n", 3555 SPDK_CONTAINEROF(next_base, 3556 struct accel_mlx5_task, base), first); 3557 first_base->dst_domain = next_base->dst_domain; 3558 first_base->dst_domain_ctx = next_base->dst_domain_ctx; 3559 first->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO_MKEY; 3560 first->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE; 3561 first->needs_data_transfer = 1; 3562 first->inplace = 1; 3563 spdk_accel_task_complete(next_base, 0); 3564 return 0; 3565 } 3566 break; 3567 3568 default: 3569 break; 3570 } 3571 } 3572 3573 SPDK_DEBUGLOG(accel_mlx5, "seq %p, task %p nothing to merge\n", seq, first_base); 3574 /* Nothing to merge, execute tasks one by one */ 3575 accel_mlx5_task_assign_qp(first, accel_ch); 3576 accel_mlx5_task_init_opcode(first); 3577 3578 return 0; 3579 } 3580 3581 static inline int 3582 accel_mlx5_execute_sequence(struct spdk_io_channel *ch, struct spdk_accel_sequence *seq) 3583 { 3584 struct accel_mlx5_io_channel *accel_ch = spdk_io_channel_get_ctx(ch); 3585 struct spdk_accel_task *task; 3586 struct accel_mlx5_task *mlx5_task; 3587 int rc; 3588 3589 rc = accel_mlx5_driver_examine_sequence(seq, accel_ch); 3590 if (spdk_unlikely(rc)) { 3591 return rc; 3592 } 3593 task = spdk_accel_sequence_first_task(seq); 3594 assert(task); 3595 mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base); 3596 mlx5_task->driver_seq = 1; 3597 3598 SPDK_DEBUGLOG(accel_mlx5, "driver starts seq %p, ch %p, task %p\n", seq, accel_ch, task); 3599 3600 return _accel_mlx5_submit_tasks(accel_ch, task); 3601 } 3602 3603 static struct accel_mlx5_module g_accel_mlx5 = { 3604 .module = { 3605 .module_init = accel_mlx5_init, 3606 .module_fini = accel_mlx5_deinit, 3607 .write_config_json = accel_mlx5_write_config_json, 3608 .get_ctx_size = accel_mlx5_get_ctx_size, 3609 .name = "mlx5", 3610 .supports_opcode = accel_mlx5_supports_opcode, 3611 .get_io_channel = accel_mlx5_get_io_channel, 3612 .submit_tasks = accel_mlx5_submit_tasks, 3613 .crypto_key_init = accel_mlx5_crypto_key_init, 3614 .crypto_key_deinit = accel_mlx5_crypto_key_deinit, 3615 .crypto_supports_cipher = accel_mlx5_crypto_supports_cipher, 3616 .get_memory_domains = accel_mlx5_get_memory_domains, 3617 } 3618 }; 3619 3620 static struct spdk_accel_driver g_accel_mlx5_driver = { 3621 .name = "mlx5", 3622 .execute_sequence = accel_mlx5_execute_sequence, 3623 .get_io_channel = accel_mlx5_get_io_channel 3624 }; 3625 3626 SPDK_LOG_REGISTER_COMPONENT(accel_mlx5) 3627