1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2022 Intel Corporation. 3 * Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. 4 * All rights reserved. 5 */ 6 7 #include "accel_dsa.h" 8 9 #include "spdk/stdinc.h" 10 11 #include "spdk/accel_module.h" 12 #include "spdk/log.h" 13 #include "spdk_internal/idxd.h" 14 15 #include "spdk/env.h" 16 #include "spdk/event.h" 17 #include "spdk/likely.h" 18 #include "spdk/thread.h" 19 #include "spdk/idxd.h" 20 #include "spdk/util.h" 21 #include "spdk/json.h" 22 #include "spdk/trace.h" 23 #include "spdk_internal/trace_defs.h" 24 25 static bool g_dsa_enable = false; 26 static bool g_kernel_mode = false; 27 28 enum channel_state { 29 IDXD_CHANNEL_ACTIVE, 30 IDXD_CHANNEL_ERROR, 31 }; 32 33 static bool g_dsa_initialized = false; 34 35 struct idxd_device { 36 struct spdk_idxd_device *dsa; 37 TAILQ_ENTRY(idxd_device) tailq; 38 }; 39 static TAILQ_HEAD(, idxd_device) g_dsa_devices = TAILQ_HEAD_INITIALIZER(g_dsa_devices); 40 static struct idxd_device *g_next_dev = NULL; 41 static uint32_t g_num_devices = 0; 42 static pthread_mutex_t g_dev_lock = PTHREAD_MUTEX_INITIALIZER; 43 44 struct idxd_task { 45 struct spdk_accel_task task; 46 struct idxd_io_channel *chan; 47 }; 48 49 struct idxd_io_channel { 50 struct spdk_idxd_io_channel *chan; 51 struct idxd_device *dev; 52 enum channel_state state; 53 struct spdk_poller *poller; 54 uint32_t num_outstanding; 55 STAILQ_HEAD(, spdk_accel_task) queued_tasks; 56 }; 57 58 static struct spdk_io_channel *dsa_get_io_channel(void); 59 60 static struct idxd_device * 61 idxd_select_device(struct idxd_io_channel *chan) 62 { 63 uint32_t count = 0; 64 struct idxd_device *dev; 65 uint32_t socket_id = spdk_env_get_socket_id(spdk_env_get_current_core()); 66 67 /* 68 * We allow channels to share underlying devices, 69 * selection is round-robin based with a limitation 70 * on how many channel can share one device. 71 */ 72 do { 73 /* select next device */ 74 pthread_mutex_lock(&g_dev_lock); 75 g_next_dev = TAILQ_NEXT(g_next_dev, tailq); 76 if (g_next_dev == NULL) { 77 g_next_dev = TAILQ_FIRST(&g_dsa_devices); 78 } 79 dev = g_next_dev; 80 pthread_mutex_unlock(&g_dev_lock); 81 82 if (socket_id != spdk_idxd_get_socket(dev->dsa)) { 83 continue; 84 } 85 86 /* 87 * Now see if a channel is available on this one. We only 88 * allow a specific number of channels to share a device 89 * to limit outstanding IO for flow control purposes. 90 */ 91 chan->chan = spdk_idxd_get_channel(dev->dsa); 92 if (chan->chan != NULL) { 93 SPDK_DEBUGLOG(accel_dsa, "On socket %d using device on socket %d\n", 94 socket_id, spdk_idxd_get_socket(dev->dsa)); 95 return dev; 96 } 97 } while (++count < g_num_devices); 98 99 /* We are out of available channels and/or devices for the local socket. We fix the number 100 * of channels that we allocate per device and only allocate devices on the same socket 101 * that the current thread is on. If on a 2 socket system it may be possible to avoid 102 * this situation by spreading threads across the sockets. 103 */ 104 SPDK_ERRLOG("No more DSA devices available on the local socket.\n"); 105 return NULL; 106 } 107 108 static void 109 dsa_done(void *cb_arg, int status) 110 { 111 struct idxd_task *idxd_task = cb_arg; 112 struct idxd_io_channel *chan; 113 int rc; 114 115 chan = idxd_task->chan; 116 117 /* If the DSA DIF Check operation detects an error, detailed info about 118 * this error (like actual/expected values) needs to be obtained by 119 * calling the software DIF Verify operation. 120 */ 121 if (spdk_unlikely(status == -EIO)) { 122 if (idxd_task->task.op_code == SPDK_ACCEL_OPC_DIF_VERIFY || 123 idxd_task->task.op_code == SPDK_ACCEL_OPC_DIF_VERIFY_COPY) { 124 rc = spdk_dif_verify(idxd_task->task.s.iovs, idxd_task->task.s.iovcnt, 125 idxd_task->task.dif.num_blocks, 126 idxd_task->task.dif.ctx, idxd_task->task.dif.err); 127 if (rc != 0) { 128 SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n", 129 idxd_task->task.dif.err->err_type, 130 idxd_task->task.dif.err->err_offset); 131 } 132 } 133 } 134 135 assert(chan->num_outstanding > 0); 136 spdk_trace_record(TRACE_ACCEL_DSA_OP_COMPLETE, 0, 0, 0, chan->num_outstanding - 1); 137 chan->num_outstanding--; 138 139 spdk_accel_task_complete(&idxd_task->task, status); 140 } 141 142 static int 143 idxd_submit_dualcast(struct idxd_io_channel *ch, struct idxd_task *idxd_task, int flags) 144 { 145 struct spdk_accel_task *task = &idxd_task->task; 146 147 if (spdk_unlikely(task->d.iovcnt != 1 || task->d2.iovcnt != 1 || task->s.iovcnt != 1)) { 148 return -EINVAL; 149 } 150 151 if (spdk_unlikely(task->d.iovs[0].iov_len != task->s.iovs[0].iov_len || 152 task->d.iovs[0].iov_len != task->d2.iovs[0].iov_len)) { 153 return -EINVAL; 154 } 155 156 return spdk_idxd_submit_dualcast(ch->chan, task->d.iovs[0].iov_base, 157 task->d2.iovs[0].iov_base, task->s.iovs[0].iov_base, 158 task->d.iovs[0].iov_len, flags, dsa_done, idxd_task); 159 } 160 161 static int 162 check_dsa_dif_strip_overlap_bufs(struct spdk_accel_task *task) 163 { 164 uint64_t src_seg_addr_end_ext; 165 uint64_t dst_seg_addr_end_ext; 166 size_t i; 167 168 /* The number of source and destination iovecs must be the same. 169 * If so, one of them can be used to iterate over both vectors 170 * later in the loop. */ 171 if (task->d.iovcnt != task->s.iovcnt) { 172 SPDK_ERRLOG("Mismatched iovcnts: src=%d, dst=%d\n", 173 task->s.iovcnt, task->d.iovcnt); 174 return -EINVAL; 175 } 176 177 for (i = 0; i < task->s.iovcnt; i++) { 178 src_seg_addr_end_ext = (uint64_t)task->s.iovs[i].iov_base + 179 task->s.iovs[i].iov_len; 180 181 dst_seg_addr_end_ext = (uint64_t)task->d.iovs[i].iov_base + 182 task->s.iovs[i].iov_len; 183 184 if ((dst_seg_addr_end_ext >= (uint64_t)task->s.iovs[i].iov_base) && 185 (dst_seg_addr_end_ext <= src_seg_addr_end_ext)) { 186 return -EFAULT; 187 } 188 } 189 190 return 0; 191 } 192 193 static void 194 spdk_accel_sw_task_complete(void *ctx) 195 { 196 struct spdk_accel_task *task = (struct spdk_accel_task *)ctx; 197 198 spdk_accel_task_complete(task, task->status); 199 } 200 201 static int 202 _process_single_task(struct spdk_io_channel *ch, struct spdk_accel_task *task) 203 { 204 struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch); 205 struct idxd_task *idxd_task; 206 int rc = 0, flags = 0; 207 208 idxd_task = SPDK_CONTAINEROF(task, struct idxd_task, task); 209 idxd_task->chan = chan; 210 211 switch (task->op_code) { 212 case SPDK_ACCEL_OPC_COPY: 213 rc = spdk_idxd_submit_copy(chan->chan, task->d.iovs, task->d.iovcnt, 214 task->s.iovs, task->s.iovcnt, flags, dsa_done, idxd_task); 215 break; 216 case SPDK_ACCEL_OPC_DUALCAST: 217 rc = idxd_submit_dualcast(chan, idxd_task, flags); 218 break; 219 case SPDK_ACCEL_OPC_COMPARE: 220 rc = spdk_idxd_submit_compare(chan->chan, task->s.iovs, task->s.iovcnt, 221 task->s2.iovs, task->s2.iovcnt, flags, 222 dsa_done, idxd_task); 223 break; 224 case SPDK_ACCEL_OPC_FILL: 225 rc = spdk_idxd_submit_fill(chan->chan, task->d.iovs, task->d.iovcnt, 226 task->fill_pattern, flags, dsa_done, idxd_task); 227 break; 228 case SPDK_ACCEL_OPC_CRC32C: 229 rc = spdk_idxd_submit_crc32c(chan->chan, task->s.iovs, task->s.iovcnt, task->seed, 230 task->crc_dst, flags, dsa_done, idxd_task); 231 break; 232 case SPDK_ACCEL_OPC_COPY_CRC32C: 233 rc = spdk_idxd_submit_copy_crc32c(chan->chan, task->d.iovs, task->d.iovcnt, 234 task->s.iovs, task->s.iovcnt, 235 task->seed, task->crc_dst, flags, 236 dsa_done, idxd_task); 237 break; 238 case SPDK_ACCEL_OPC_DIF_VERIFY: 239 rc = spdk_idxd_submit_dif_check(chan->chan, 240 task->s.iovs, task->s.iovcnt, 241 task->dif.num_blocks, task->dif.ctx, flags, 242 dsa_done, idxd_task); 243 break; 244 case SPDK_ACCEL_OPC_DIF_GENERATE_COPY: 245 rc = spdk_idxd_submit_dif_insert(chan->chan, 246 task->d.iovs, task->d.iovcnt, 247 task->s.iovs, task->s.iovcnt, 248 task->dif.num_blocks, task->dif.ctx, flags, 249 dsa_done, idxd_task); 250 break; 251 case SPDK_ACCEL_OPC_DIF_VERIFY_COPY: 252 /* For DIF strip operations, DSA may incorrectly report an overlapping buffer 253 * error if the destination buffer immediately precedes the source buffer. 254 * This is because DSA uses the transfer size in the descriptor for both 255 * the source and destination buffers when checking for buffer overlap. 256 * Since the transfer size applies to the source buffer, which is larger 257 * than the destination buffer by metadata, it should not be used as 258 * the destination buffer size. To avoid reporting errors by DSA, the software 259 * checks whether such an error condition can occur, and if so the software 260 * fallback is performed. */ 261 rc = check_dsa_dif_strip_overlap_bufs(task); 262 if (rc == 0) { 263 rc = spdk_idxd_submit_dif_strip(chan->chan, 264 task->d.iovs, task->d.iovcnt, 265 task->s.iovs, task->s.iovcnt, 266 task->dif.num_blocks, task->dif.ctx, flags, 267 dsa_done, idxd_task); 268 } else if (rc == -EFAULT) { 269 rc = spdk_dif_verify_copy(task->d.iovs, 270 task->d.iovcnt, 271 task->s.iovs, 272 task->s.iovcnt, 273 task->dif.num_blocks, 274 task->dif.ctx, 275 task->dif.err); 276 idxd_task->task.status = rc; 277 spdk_thread_send_msg(spdk_get_thread(), spdk_accel_sw_task_complete, (void *)&idxd_task->task); 278 rc = 0; 279 } 280 break; 281 default: 282 assert(false); 283 rc = -EINVAL; 284 break; 285 } 286 287 if (rc == 0) { 288 chan->num_outstanding++; 289 spdk_trace_record(TRACE_ACCEL_DSA_OP_SUBMIT, 0, 0, 0, chan->num_outstanding); 290 } 291 292 return rc; 293 } 294 295 static int 296 dsa_submit_task(struct spdk_io_channel *ch, struct spdk_accel_task *task) 297 { 298 struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch); 299 int rc = 0; 300 301 assert(STAILQ_NEXT(task, link) == NULL); 302 303 if (spdk_unlikely(chan->state == IDXD_CHANNEL_ERROR)) { 304 spdk_accel_task_complete(task, -EINVAL); 305 return 0; 306 } 307 308 if (!STAILQ_EMPTY(&chan->queued_tasks)) { 309 STAILQ_INSERT_TAIL(&chan->queued_tasks, task, link); 310 return 0; 311 } 312 313 rc = _process_single_task(ch, task); 314 if (rc == -EBUSY) { 315 STAILQ_INSERT_TAIL(&chan->queued_tasks, task, link); 316 } else if (rc) { 317 spdk_accel_task_complete(task, rc); 318 } 319 320 return 0; 321 } 322 323 static int 324 dsa_submit_queued_tasks(struct idxd_io_channel *chan) 325 { 326 struct spdk_accel_task *task, *tmp; 327 struct spdk_io_channel *ch = spdk_io_channel_from_ctx(chan); 328 int rc = 0; 329 330 if (spdk_unlikely(chan->state == IDXD_CHANNEL_ERROR)) { 331 /* Complete queued tasks with error and clear the list */ 332 while ((task = STAILQ_FIRST(&chan->queued_tasks))) { 333 STAILQ_REMOVE_HEAD(&chan->queued_tasks, link); 334 spdk_accel_task_complete(task, -EINVAL); 335 } 336 return 0; 337 } 338 339 STAILQ_FOREACH_SAFE(task, &chan->queued_tasks, link, tmp) { 340 rc = _process_single_task(ch, task); 341 if (rc == -EBUSY) { 342 return rc; 343 } 344 STAILQ_REMOVE_HEAD(&chan->queued_tasks, link); 345 if (rc) { 346 spdk_accel_task_complete(task, rc); 347 } 348 } 349 350 return 0; 351 } 352 353 static int 354 idxd_poll(void *arg) 355 { 356 struct idxd_io_channel *chan = arg; 357 int count; 358 359 count = spdk_idxd_process_events(chan->chan); 360 361 /* Check if there are any pending ops to process if the channel is active */ 362 if (!STAILQ_EMPTY(&chan->queued_tasks)) { 363 dsa_submit_queued_tasks(chan); 364 } 365 366 return count > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 367 } 368 369 static size_t 370 accel_dsa_get_ctx_size(void) 371 { 372 return sizeof(struct idxd_task); 373 } 374 375 static bool 376 dsa_supports_opcode(enum spdk_accel_opcode opc) 377 { 378 if (!g_dsa_initialized) { 379 assert(0); 380 return false; 381 } 382 383 switch (opc) { 384 case SPDK_ACCEL_OPC_COPY: 385 case SPDK_ACCEL_OPC_FILL: 386 case SPDK_ACCEL_OPC_DUALCAST: 387 case SPDK_ACCEL_OPC_COMPARE: 388 case SPDK_ACCEL_OPC_CRC32C: 389 case SPDK_ACCEL_OPC_COPY_CRC32C: 390 return true; 391 case SPDK_ACCEL_OPC_DIF_VERIFY: 392 case SPDK_ACCEL_OPC_DIF_GENERATE_COPY: 393 case SPDK_ACCEL_OPC_DIF_VERIFY_COPY: 394 /* Supported only if the IOMMU is enabled */ 395 return spdk_iommu_is_enabled(); 396 default: 397 return false; 398 } 399 } 400 401 static int accel_dsa_init(void); 402 static void accel_dsa_exit(void *ctx); 403 static void accel_dsa_write_config_json(struct spdk_json_write_ctx *w); 404 405 static struct spdk_accel_module_if g_dsa_module = { 406 .module_init = accel_dsa_init, 407 .module_fini = accel_dsa_exit, 408 .write_config_json = accel_dsa_write_config_json, 409 .get_ctx_size = accel_dsa_get_ctx_size, 410 .name = "dsa", 411 .supports_opcode = dsa_supports_opcode, 412 .get_io_channel = dsa_get_io_channel, 413 .submit_tasks = dsa_submit_task 414 }; 415 416 static int 417 dsa_create_cb(void *io_device, void *ctx_buf) 418 { 419 struct idxd_io_channel *chan = ctx_buf; 420 struct idxd_device *dsa; 421 422 dsa = idxd_select_device(chan); 423 if (dsa == NULL) { 424 SPDK_ERRLOG("Failed to get an idxd channel\n"); 425 return -EINVAL; 426 } 427 428 chan->dev = dsa; 429 chan->poller = SPDK_POLLER_REGISTER(idxd_poll, chan, 0); 430 STAILQ_INIT(&chan->queued_tasks); 431 chan->num_outstanding = 0; 432 chan->state = IDXD_CHANNEL_ACTIVE; 433 434 return 0; 435 } 436 437 static void 438 dsa_destroy_cb(void *io_device, void *ctx_buf) 439 { 440 struct idxd_io_channel *chan = ctx_buf; 441 442 spdk_poller_unregister(&chan->poller); 443 spdk_idxd_put_channel(chan->chan); 444 } 445 446 static struct spdk_io_channel * 447 dsa_get_io_channel(void) 448 { 449 return spdk_get_io_channel(&g_dsa_module); 450 } 451 452 static void 453 attach_cb(void *cb_ctx, struct spdk_idxd_device *idxd) 454 { 455 struct idxd_device *dev; 456 457 dev = calloc(1, sizeof(*dev)); 458 if (dev == NULL) { 459 SPDK_ERRLOG("Failed to allocate device struct\n"); 460 return; 461 } 462 463 dev->dsa = idxd; 464 if (g_next_dev == NULL) { 465 g_next_dev = dev; 466 } 467 468 TAILQ_INSERT_TAIL(&g_dsa_devices, dev, tailq); 469 g_num_devices++; 470 } 471 472 int 473 accel_dsa_enable_probe(bool kernel_mode) 474 { 475 int rc; 476 477 if (g_dsa_enable) { 478 return -EALREADY; 479 } 480 481 rc = spdk_idxd_set_config(kernel_mode); 482 if (rc != 0) { 483 return rc; 484 } 485 486 spdk_accel_module_list_add(&g_dsa_module); 487 g_kernel_mode = kernel_mode; 488 g_dsa_enable = true; 489 490 return 0; 491 } 492 493 static bool 494 probe_cb(void *cb_ctx, struct spdk_pci_device *dev) 495 { 496 if (dev->id.device_id == PCI_DEVICE_ID_INTEL_DSA) { 497 return true; 498 } 499 500 return false; 501 } 502 503 static int 504 accel_dsa_init(void) 505 { 506 if (!g_dsa_enable) { 507 return -EINVAL; 508 } 509 510 if (spdk_idxd_probe(NULL, attach_cb, probe_cb) != 0) { 511 SPDK_ERRLOG("spdk_idxd_probe() failed\n"); 512 return -EINVAL; 513 } 514 515 if (TAILQ_EMPTY(&g_dsa_devices)) { 516 return -ENODEV; 517 } 518 519 g_dsa_initialized = true; 520 spdk_io_device_register(&g_dsa_module, dsa_create_cb, dsa_destroy_cb, 521 sizeof(struct idxd_io_channel), "dsa_accel_module"); 522 return 0; 523 } 524 525 static void 526 accel_dsa_exit(void *ctx) 527 { 528 struct idxd_device *dev; 529 530 if (g_dsa_initialized) { 531 spdk_io_device_unregister(&g_dsa_module, NULL); 532 g_dsa_initialized = false; 533 } 534 535 while (!TAILQ_EMPTY(&g_dsa_devices)) { 536 dev = TAILQ_FIRST(&g_dsa_devices); 537 TAILQ_REMOVE(&g_dsa_devices, dev, tailq); 538 spdk_idxd_detach(dev->dsa); 539 free(dev); 540 } 541 542 spdk_accel_module_finish(); 543 } 544 545 static void 546 accel_dsa_write_config_json(struct spdk_json_write_ctx *w) 547 { 548 if (g_dsa_enable) { 549 spdk_json_write_object_begin(w); 550 spdk_json_write_named_string(w, "method", "dsa_scan_accel_module"); 551 spdk_json_write_named_object_begin(w, "params"); 552 spdk_json_write_named_bool(w, "config_kernel_mode", g_kernel_mode); 553 spdk_json_write_object_end(w); 554 spdk_json_write_object_end(w); 555 } 556 } 557 558 SPDK_TRACE_REGISTER_FN(dsa_trace, "dsa", TRACE_GROUP_ACCEL_DSA) 559 { 560 spdk_trace_register_description("DSA_OP_SUBMIT", TRACE_ACCEL_DSA_OP_SUBMIT, OWNER_TYPE_NONE, 561 OBJECT_NONE, 0, 562 SPDK_TRACE_ARG_TYPE_INT, "count"); 563 spdk_trace_register_description("DSA_OP_COMPLETE", TRACE_ACCEL_DSA_OP_COMPLETE, OWNER_TYPE_NONE, 564 OBJECT_NONE, 565 0, SPDK_TRACE_ARG_TYPE_INT, "count"); 566 } 567 568 SPDK_LOG_REGISTER_COMPONENT(accel_dsa) 569