1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Intel Corporation. 3 * All rights reserved. 4 * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "spdk/stdinc.h" 8 9 #include "bdev_malloc.h" 10 #include "spdk/endian.h" 11 #include "spdk/env.h" 12 #include "spdk/accel.h" 13 #include "spdk/string.h" 14 15 #include "spdk/log.h" 16 17 struct malloc_disk { 18 struct spdk_bdev disk; 19 void *malloc_buf; 20 void *malloc_md_buf; 21 TAILQ_ENTRY(malloc_disk) link; 22 }; 23 24 struct malloc_task { 25 int num_outstanding; 26 enum spdk_bdev_io_status status; 27 TAILQ_ENTRY(malloc_task) tailq; 28 }; 29 30 struct malloc_channel { 31 struct spdk_io_channel *accel_channel; 32 struct spdk_poller *completion_poller; 33 TAILQ_HEAD(, malloc_task) completed_tasks; 34 }; 35 36 static int 37 malloc_verify_pi(struct spdk_bdev_io *bdev_io) 38 { 39 struct spdk_bdev *bdev = bdev_io->bdev; 40 struct spdk_dif_ctx dif_ctx; 41 struct spdk_dif_error err_blk; 42 int rc; 43 44 rc = spdk_dif_ctx_init(&dif_ctx, 45 bdev->blocklen, 46 bdev->md_len, 47 bdev->md_interleave, 48 bdev->dif_is_head_of_md, 49 bdev->dif_type, 50 bdev->dif_check_flags, 51 bdev_io->u.bdev.offset_blocks & 0xFFFFFFFF, 52 0xFFFF, 0, 0, 0); 53 if (rc != 0) { 54 SPDK_ERRLOG("Failed to initialize DIF/DIX context\n"); 55 return rc; 56 } 57 58 if (spdk_bdev_is_md_interleaved(bdev)) { 59 rc = spdk_dif_verify(bdev_io->u.bdev.iovs, 60 bdev_io->u.bdev.iovcnt, 61 bdev_io->u.bdev.num_blocks, 62 &dif_ctx, 63 &err_blk); 64 } else { 65 struct iovec md_iov = { 66 .iov_base = bdev_io->u.bdev.md_buf, 67 .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len, 68 }; 69 70 rc = spdk_dix_verify(bdev_io->u.bdev.iovs, 71 bdev_io->u.bdev.iovcnt, 72 &md_iov, 73 bdev_io->u.bdev.num_blocks, 74 &dif_ctx, 75 &err_blk); 76 } 77 78 if (rc != 0) { 79 SPDK_ERRLOG("DIF/DIX verify failed: lba %" PRIu64 ", num_blocks %" PRIu64 ", " 80 "err_type %u, expected %u, actual %u, err_offset %u\n", 81 bdev_io->u.bdev.offset_blocks, 82 bdev_io->u.bdev.num_blocks, 83 err_blk.err_type, 84 err_blk.expected, 85 err_blk.actual, 86 err_blk.err_offset); 87 } 88 89 return rc; 90 } 91 92 static void 93 malloc_done(void *ref, int status) 94 { 95 struct malloc_task *task = (struct malloc_task *)ref; 96 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(task); 97 int rc; 98 99 if (status != 0) { 100 if (status == -ENOMEM) { 101 task->status = SPDK_BDEV_IO_STATUS_NOMEM; 102 } else { 103 task->status = SPDK_BDEV_IO_STATUS_FAILED; 104 } 105 } 106 107 if (--task->num_outstanding != 0) { 108 return; 109 } 110 111 if (bdev_io->bdev->dif_type != SPDK_DIF_DISABLE && 112 bdev_io->type == SPDK_BDEV_IO_TYPE_READ && 113 task->status == SPDK_BDEV_IO_STATUS_SUCCESS) { 114 rc = malloc_verify_pi(bdev_io); 115 if (rc != 0) { 116 task->status = SPDK_BDEV_IO_STATUS_FAILED; 117 } 118 } 119 120 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status); 121 } 122 123 static void 124 malloc_complete_task(struct malloc_task *task, struct malloc_channel *mch, 125 enum spdk_bdev_io_status status) 126 { 127 task->status = status; 128 TAILQ_INSERT_TAIL(&mch->completed_tasks, task, tailq); 129 } 130 131 static TAILQ_HEAD(, malloc_disk) g_malloc_disks = TAILQ_HEAD_INITIALIZER(g_malloc_disks); 132 133 int malloc_disk_count = 0; 134 135 static int bdev_malloc_initialize(void); 136 static void bdev_malloc_deinitialize(void); 137 138 static int 139 bdev_malloc_get_ctx_size(void) 140 { 141 return sizeof(struct malloc_task); 142 } 143 144 static struct spdk_bdev_module malloc_if = { 145 .name = "malloc", 146 .module_init = bdev_malloc_initialize, 147 .module_fini = bdev_malloc_deinitialize, 148 .get_ctx_size = bdev_malloc_get_ctx_size, 149 150 }; 151 152 SPDK_BDEV_MODULE_REGISTER(malloc, &malloc_if) 153 154 static void 155 malloc_disk_free(struct malloc_disk *malloc_disk) 156 { 157 if (!malloc_disk) { 158 return; 159 } 160 161 free(malloc_disk->disk.name); 162 spdk_free(malloc_disk->malloc_buf); 163 spdk_free(malloc_disk->malloc_md_buf); 164 free(malloc_disk); 165 } 166 167 static int 168 bdev_malloc_destruct(void *ctx) 169 { 170 struct malloc_disk *malloc_disk = ctx; 171 172 TAILQ_REMOVE(&g_malloc_disks, malloc_disk, link); 173 malloc_disk_free(malloc_disk); 174 return 0; 175 } 176 177 static int 178 bdev_malloc_check_iov_len(struct iovec *iovs, int iovcnt, size_t nbytes) 179 { 180 int i; 181 182 for (i = 0; i < iovcnt; i++) { 183 if (nbytes < iovs[i].iov_len) { 184 return 0; 185 } 186 187 nbytes -= iovs[i].iov_len; 188 } 189 190 return nbytes != 0; 191 } 192 193 static void 194 bdev_malloc_readv(struct malloc_disk *mdisk, struct spdk_io_channel *ch, 195 struct malloc_task *task, 196 struct iovec *iov, int iovcnt, size_t len, uint64_t offset, 197 void *md_buf, size_t md_len, uint64_t md_offset) 198 { 199 int64_t res = 0; 200 void *src; 201 void *md_src; 202 int i; 203 204 if (bdev_malloc_check_iov_len(iov, iovcnt, len)) { 205 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), 206 SPDK_BDEV_IO_STATUS_FAILED); 207 return; 208 } 209 210 task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 211 task->num_outstanding = 0; 212 213 SPDK_DEBUGLOG(bdev_malloc, "read %zu bytes from offset %#" PRIx64 ", iovcnt=%d\n", 214 len, offset, iovcnt); 215 216 src = mdisk->malloc_buf + offset; 217 218 for (i = 0; i < iovcnt; i++) { 219 task->num_outstanding++; 220 res = spdk_accel_submit_copy(ch, iov[i].iov_base, 221 src, iov[i].iov_len, 0, malloc_done, task); 222 223 if (res != 0) { 224 malloc_done(task, res); 225 break; 226 } 227 228 src += iov[i].iov_len; 229 len -= iov[i].iov_len; 230 } 231 232 if (md_buf == NULL) { 233 return; 234 } 235 236 SPDK_DEBUGLOG(bdev_malloc, "read metadata %zu bytes from offset%#" PRIx64 "\n", 237 md_len, md_offset); 238 239 md_src = mdisk->malloc_md_buf + md_offset; 240 241 task->num_outstanding++; 242 res = spdk_accel_submit_copy(ch, md_buf, md_src, md_len, 0, malloc_done, task); 243 244 if (res != 0) { 245 malloc_done(task, res); 246 } 247 } 248 249 static void 250 bdev_malloc_writev(struct malloc_disk *mdisk, struct spdk_io_channel *ch, 251 struct malloc_task *task, 252 struct iovec *iov, int iovcnt, size_t len, uint64_t offset, 253 void *md_buf, size_t md_len, uint64_t md_offset) 254 { 255 256 int64_t res = 0; 257 void *dst; 258 void *md_dst; 259 int i; 260 261 if (bdev_malloc_check_iov_len(iov, iovcnt, len)) { 262 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), 263 SPDK_BDEV_IO_STATUS_FAILED); 264 return; 265 } 266 267 SPDK_DEBUGLOG(bdev_malloc, "wrote %zu bytes to offset %#" PRIx64 ", iovcnt=%d\n", 268 len, offset, iovcnt); 269 270 dst = mdisk->malloc_buf + offset; 271 272 task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 273 task->num_outstanding = 0; 274 275 for (i = 0; i < iovcnt; i++) { 276 task->num_outstanding++; 277 res = spdk_accel_submit_copy(ch, dst, iov[i].iov_base, 278 iov[i].iov_len, 0, malloc_done, task); 279 280 if (res != 0) { 281 malloc_done(task, res); 282 break; 283 } 284 285 dst += iov[i].iov_len; 286 } 287 288 if (md_buf == NULL) { 289 return; 290 } 291 SPDK_DEBUGLOG(bdev_malloc, "wrote metadata %zu bytes to offset %#" PRIx64 "\n", 292 md_len, md_offset); 293 294 md_dst = mdisk->malloc_md_buf + md_offset; 295 296 task->num_outstanding++; 297 res = spdk_accel_submit_copy(ch, md_dst, md_buf, md_len, 0, malloc_done, task); 298 299 if (res != 0) { 300 malloc_done(task, res); 301 } 302 303 } 304 305 static int 306 bdev_malloc_unmap(struct malloc_disk *mdisk, 307 struct spdk_io_channel *ch, 308 struct malloc_task *task, 309 uint64_t offset, 310 uint64_t byte_count) 311 { 312 task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 313 task->num_outstanding = 1; 314 315 return spdk_accel_submit_fill(ch, mdisk->malloc_buf + offset, 0, 316 byte_count, 0, malloc_done, task); 317 } 318 319 static int 320 _bdev_malloc_submit_request(struct malloc_channel *mch, struct spdk_bdev_io *bdev_io) 321 { 322 uint32_t block_size = bdev_io->bdev->blocklen; 323 uint32_t md_size = bdev_io->bdev->md_len; 324 int rc; 325 326 switch (bdev_io->type) { 327 case SPDK_BDEV_IO_TYPE_READ: 328 if (bdev_io->u.bdev.iovs[0].iov_base == NULL) { 329 assert(bdev_io->u.bdev.iovcnt == 1); 330 bdev_io->u.bdev.iovs[0].iov_base = 331 ((struct malloc_disk *)bdev_io->bdev->ctxt)->malloc_buf + 332 bdev_io->u.bdev.offset_blocks * block_size; 333 bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * block_size; 334 malloc_complete_task((struct malloc_task *)bdev_io->driver_ctx, mch, 335 SPDK_BDEV_IO_STATUS_SUCCESS); 336 return 0; 337 } 338 339 bdev_malloc_readv((struct malloc_disk *)bdev_io->bdev->ctxt, 340 mch->accel_channel, 341 (struct malloc_task *)bdev_io->driver_ctx, 342 bdev_io->u.bdev.iovs, 343 bdev_io->u.bdev.iovcnt, 344 bdev_io->u.bdev.num_blocks * block_size, 345 bdev_io->u.bdev.offset_blocks * block_size, 346 bdev_io->u.bdev.md_buf, 347 bdev_io->u.bdev.num_blocks * md_size, 348 bdev_io->u.bdev.offset_blocks * md_size); 349 return 0; 350 351 case SPDK_BDEV_IO_TYPE_WRITE: 352 if (bdev_io->bdev->dif_type != SPDK_DIF_DISABLE) { 353 rc = malloc_verify_pi(bdev_io); 354 if (rc != 0) { 355 malloc_complete_task((struct malloc_task *)bdev_io->driver_ctx, mch, 356 SPDK_BDEV_IO_STATUS_FAILED); 357 return 0; 358 } 359 } 360 361 bdev_malloc_writev((struct malloc_disk *)bdev_io->bdev->ctxt, 362 mch->accel_channel, 363 (struct malloc_task *)bdev_io->driver_ctx, 364 bdev_io->u.bdev.iovs, 365 bdev_io->u.bdev.iovcnt, 366 bdev_io->u.bdev.num_blocks * block_size, 367 bdev_io->u.bdev.offset_blocks * block_size, 368 bdev_io->u.bdev.md_buf, 369 bdev_io->u.bdev.num_blocks * md_size, 370 bdev_io->u.bdev.offset_blocks * md_size); 371 return 0; 372 373 case SPDK_BDEV_IO_TYPE_RESET: 374 malloc_complete_task((struct malloc_task *)bdev_io->driver_ctx, mch, 375 SPDK_BDEV_IO_STATUS_SUCCESS); 376 return 0; 377 378 case SPDK_BDEV_IO_TYPE_FLUSH: 379 malloc_complete_task((struct malloc_task *)bdev_io->driver_ctx, mch, 380 SPDK_BDEV_IO_STATUS_SUCCESS); 381 return 0; 382 383 case SPDK_BDEV_IO_TYPE_UNMAP: 384 return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt, 385 mch->accel_channel, 386 (struct malloc_task *)bdev_io->driver_ctx, 387 bdev_io->u.bdev.offset_blocks * block_size, 388 bdev_io->u.bdev.num_blocks * block_size); 389 390 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 391 /* bdev_malloc_unmap is implemented with a call to mem_cpy_fill which zeroes out all of the requested bytes. */ 392 return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt, 393 mch->accel_channel, 394 (struct malloc_task *)bdev_io->driver_ctx, 395 bdev_io->u.bdev.offset_blocks * block_size, 396 bdev_io->u.bdev.num_blocks * block_size); 397 398 case SPDK_BDEV_IO_TYPE_ZCOPY: 399 if (bdev_io->u.bdev.zcopy.start) { 400 void *buf; 401 size_t len; 402 403 buf = ((struct malloc_disk *)bdev_io->bdev->ctxt)->malloc_buf + 404 bdev_io->u.bdev.offset_blocks * block_size; 405 len = bdev_io->u.bdev.num_blocks * block_size; 406 spdk_bdev_io_set_buf(bdev_io, buf, len); 407 408 } 409 malloc_complete_task((struct malloc_task *)bdev_io->driver_ctx, mch, 410 SPDK_BDEV_IO_STATUS_SUCCESS); 411 return 0; 412 case SPDK_BDEV_IO_TYPE_ABORT: 413 malloc_complete_task((struct malloc_task *)bdev_io->driver_ctx, mch, 414 SPDK_BDEV_IO_STATUS_FAILED); 415 return 0; 416 default: 417 return -1; 418 } 419 return 0; 420 } 421 422 static void 423 bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 424 { 425 struct malloc_channel *mch = spdk_io_channel_get_ctx(ch); 426 427 if (_bdev_malloc_submit_request(mch, bdev_io) != 0) { 428 malloc_complete_task((struct malloc_task *)bdev_io->driver_ctx, mch, 429 SPDK_BDEV_IO_STATUS_FAILED); 430 } 431 } 432 433 static bool 434 bdev_malloc_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 435 { 436 switch (io_type) { 437 case SPDK_BDEV_IO_TYPE_READ: 438 case SPDK_BDEV_IO_TYPE_WRITE: 439 case SPDK_BDEV_IO_TYPE_FLUSH: 440 case SPDK_BDEV_IO_TYPE_RESET: 441 case SPDK_BDEV_IO_TYPE_UNMAP: 442 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 443 case SPDK_BDEV_IO_TYPE_ZCOPY: 444 case SPDK_BDEV_IO_TYPE_ABORT: 445 return true; 446 447 default: 448 return false; 449 } 450 } 451 452 static struct spdk_io_channel * 453 bdev_malloc_get_io_channel(void *ctx) 454 { 455 return spdk_get_io_channel(&g_malloc_disks); 456 } 457 458 static void 459 bdev_malloc_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 460 { 461 char uuid_str[SPDK_UUID_STRING_LEN]; 462 463 spdk_json_write_object_begin(w); 464 465 spdk_json_write_named_string(w, "method", "bdev_malloc_create"); 466 467 spdk_json_write_named_object_begin(w, "params"); 468 spdk_json_write_named_string(w, "name", bdev->name); 469 spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt); 470 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 471 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid); 472 spdk_json_write_named_string(w, "uuid", uuid_str); 473 spdk_json_write_named_uint32(w, "optimal_io_boundary", bdev->optimal_io_boundary); 474 475 spdk_json_write_object_end(w); 476 477 spdk_json_write_object_end(w); 478 } 479 480 static const struct spdk_bdev_fn_table malloc_fn_table = { 481 .destruct = bdev_malloc_destruct, 482 .submit_request = bdev_malloc_submit_request, 483 .io_type_supported = bdev_malloc_io_type_supported, 484 .get_io_channel = bdev_malloc_get_io_channel, 485 .write_config_json = bdev_malloc_write_json_config, 486 }; 487 488 static int 489 malloc_disk_setup_pi(struct malloc_disk *mdisk) 490 { 491 struct spdk_bdev *bdev = &mdisk->disk; 492 struct spdk_dif_ctx dif_ctx; 493 struct iovec iov, md_iov; 494 int rc; 495 496 rc = spdk_dif_ctx_init(&dif_ctx, 497 bdev->blocklen, 498 bdev->md_len, 499 bdev->md_interleave, 500 bdev->dif_is_head_of_md, 501 bdev->dif_type, 502 bdev->dif_check_flags, 503 0, /* configure the whole buffers */ 504 0, 0, 0, 0); 505 if (rc != 0) { 506 SPDK_ERRLOG("Initialization of DIF/DIX context failed\n"); 507 return rc; 508 } 509 510 iov.iov_base = mdisk->malloc_buf; 511 iov.iov_len = bdev->blockcnt * bdev->blocklen; 512 513 if (mdisk->disk.md_interleave) { 514 rc = spdk_dif_generate(&iov, 1, bdev->blockcnt, &dif_ctx); 515 } else { 516 md_iov.iov_base = mdisk->malloc_md_buf; 517 md_iov.iov_len = bdev->blockcnt * bdev->md_len; 518 519 rc = spdk_dix_generate(&iov, 1, &md_iov, bdev->blockcnt, &dif_ctx); 520 } 521 522 if (rc != 0) { 523 SPDK_ERRLOG("Formatting by DIF/DIX failed\n"); 524 } 525 526 return rc; 527 } 528 529 int 530 create_malloc_disk(struct spdk_bdev **bdev, const struct malloc_bdev_opts *opts) 531 { 532 struct malloc_disk *mdisk; 533 uint32_t block_size; 534 int rc; 535 536 assert(opts != NULL); 537 538 if (opts->num_blocks == 0) { 539 SPDK_ERRLOG("Disk num_blocks must be greater than 0"); 540 return -EINVAL; 541 } 542 543 if (opts->block_size % 512) { 544 SPDK_ERRLOG("Data block size must be 512 bytes aligned\n"); 545 return -EINVAL; 546 } 547 548 switch (opts->md_size) { 549 case 0: 550 case 8: 551 case 16: 552 case 32: 553 case 64: 554 case 128: 555 break; 556 default: 557 SPDK_ERRLOG("metadata size %u is not supported\n", opts->md_size); 558 return -EINVAL; 559 } 560 561 if (opts->md_interleave) { 562 block_size = opts->block_size + opts->md_size; 563 } else { 564 block_size = opts->block_size; 565 } 566 567 if (opts->dif_type < SPDK_DIF_DISABLE || opts->dif_type > SPDK_DIF_TYPE3) { 568 SPDK_ERRLOG("DIF type is invalid\n"); 569 return -EINVAL; 570 } 571 572 if (opts->dif_type != SPDK_DIF_DISABLE && opts->md_size == 0) { 573 SPDK_ERRLOG("Metadata size should not be zero if DIF is enabled\n"); 574 return -EINVAL; 575 } 576 577 mdisk = calloc(1, sizeof(*mdisk)); 578 if (!mdisk) { 579 SPDK_ERRLOG("mdisk calloc() failed\n"); 580 return -ENOMEM; 581 } 582 583 /* 584 * Allocate the large backend memory buffer from pinned memory. 585 * 586 * TODO: need to pass a hint so we know which socket to allocate 587 * from on multi-socket systems. 588 */ 589 mdisk->malloc_buf = spdk_zmalloc(opts->num_blocks * block_size, 2 * 1024 * 1024, NULL, 590 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 591 if (!mdisk->malloc_buf) { 592 SPDK_ERRLOG("malloc_buf spdk_zmalloc() failed\n"); 593 malloc_disk_free(mdisk); 594 return -ENOMEM; 595 } 596 597 if (!opts->md_interleave && opts->md_size != 0) { 598 mdisk->malloc_md_buf = spdk_zmalloc(opts->num_blocks * opts->md_size, 2 * 1024 * 1024, NULL, 599 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 600 if (!mdisk->malloc_md_buf) { 601 SPDK_ERRLOG("malloc_md_buf spdk_zmalloc() failed\n"); 602 malloc_disk_free(mdisk); 603 return -ENOMEM; 604 } 605 } 606 607 if (opts->name) { 608 mdisk->disk.name = strdup(opts->name); 609 } else { 610 /* Auto-generate a name */ 611 mdisk->disk.name = spdk_sprintf_alloc("Malloc%d", malloc_disk_count); 612 malloc_disk_count++; 613 } 614 if (!mdisk->disk.name) { 615 malloc_disk_free(mdisk); 616 return -ENOMEM; 617 } 618 mdisk->disk.product_name = "Malloc disk"; 619 620 mdisk->disk.write_cache = 1; 621 mdisk->disk.blocklen = block_size; 622 mdisk->disk.blockcnt = opts->num_blocks; 623 mdisk->disk.md_len = opts->md_size; 624 mdisk->disk.md_interleave = opts->md_interleave; 625 mdisk->disk.dif_type = opts->dif_type; 626 mdisk->disk.dif_is_head_of_md = opts->dif_is_head_of_md; 627 /* Current block device layer API does not propagate 628 * any DIF related information from user. So, we can 629 * not generate or verify Application Tag. 630 */ 631 switch (opts->dif_type) { 632 case SPDK_DIF_TYPE1: 633 case SPDK_DIF_TYPE2: 634 mdisk->disk.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK | 635 SPDK_DIF_FLAGS_REFTAG_CHECK; 636 break; 637 case SPDK_DIF_TYPE3: 638 mdisk->disk.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK; 639 break; 640 case SPDK_DIF_DISABLE: 641 break; 642 } 643 644 if (opts->dif_type != SPDK_DIF_DISABLE) { 645 rc = malloc_disk_setup_pi(mdisk); 646 if (rc) { 647 SPDK_ERRLOG("Failed to set up protection information.\n"); 648 malloc_disk_free(mdisk); 649 return rc; 650 } 651 } 652 653 if (opts->optimal_io_boundary) { 654 mdisk->disk.optimal_io_boundary = opts->optimal_io_boundary; 655 mdisk->disk.split_on_optimal_io_boundary = true; 656 } 657 if (!spdk_mem_all_zero(&opts->uuid, sizeof(opts->uuid))) { 658 spdk_uuid_copy(&mdisk->disk.uuid, &opts->uuid); 659 } else { 660 spdk_uuid_generate(&mdisk->disk.uuid); 661 } 662 663 mdisk->disk.ctxt = mdisk; 664 mdisk->disk.fn_table = &malloc_fn_table; 665 mdisk->disk.module = &malloc_if; 666 667 rc = spdk_bdev_register(&mdisk->disk); 668 if (rc) { 669 malloc_disk_free(mdisk); 670 return rc; 671 } 672 673 *bdev = &(mdisk->disk); 674 675 TAILQ_INSERT_TAIL(&g_malloc_disks, mdisk, link); 676 677 return rc; 678 } 679 680 void 681 delete_malloc_disk(const char *name, spdk_delete_malloc_complete cb_fn, void *cb_arg) 682 { 683 int rc; 684 685 rc = spdk_bdev_unregister_by_name(name, &malloc_if, cb_fn, cb_arg); 686 if (rc != 0) { 687 cb_fn(cb_arg, rc); 688 } 689 } 690 691 static int 692 malloc_completion_poller(void *ctx) 693 { 694 struct malloc_channel *ch = ctx; 695 struct malloc_task *task; 696 TAILQ_HEAD(, malloc_task) completed_tasks; 697 uint32_t num_completions = 0; 698 699 TAILQ_INIT(&completed_tasks); 700 TAILQ_SWAP(&completed_tasks, &ch->completed_tasks, malloc_task, tailq); 701 702 while (!TAILQ_EMPTY(&completed_tasks)) { 703 task = TAILQ_FIRST(&completed_tasks); 704 TAILQ_REMOVE(&completed_tasks, task, tailq); 705 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status); 706 num_completions++; 707 } 708 709 return num_completions > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 710 } 711 712 static int 713 malloc_create_channel_cb(void *io_device, void *ctx) 714 { 715 struct malloc_channel *ch = ctx; 716 717 ch->accel_channel = spdk_accel_get_io_channel(); 718 if (!ch->accel_channel) { 719 SPDK_ERRLOG("Failed to get accel framework's IO channel\n"); 720 return -ENOMEM; 721 } 722 723 ch->completion_poller = SPDK_POLLER_REGISTER(malloc_completion_poller, ch, 0); 724 if (!ch->completion_poller) { 725 SPDK_ERRLOG("Failed to register malloc completion poller\n"); 726 spdk_put_io_channel(ch->accel_channel); 727 return -ENOMEM; 728 } 729 730 TAILQ_INIT(&ch->completed_tasks); 731 732 return 0; 733 } 734 735 static void 736 malloc_destroy_channel_cb(void *io_device, void *ctx) 737 { 738 struct malloc_channel *ch = ctx; 739 740 assert(TAILQ_EMPTY(&ch->completed_tasks)); 741 742 spdk_put_io_channel(ch->accel_channel); 743 spdk_poller_unregister(&ch->completion_poller); 744 } 745 746 static int 747 bdev_malloc_initialize(void) 748 { 749 /* This needs to be reset for each reinitialization of submodules. 750 * Otherwise after enough devices or reinitializations the value gets too high. 751 * TODO: Make malloc bdev name mandatory and remove this counter. */ 752 malloc_disk_count = 0; 753 754 spdk_io_device_register(&g_malloc_disks, malloc_create_channel_cb, 755 malloc_destroy_channel_cb, sizeof(struct malloc_channel), 756 "bdev_malloc"); 757 758 return 0; 759 } 760 761 static void 762 bdev_malloc_deinitialize(void) 763 { 764 spdk_io_device_unregister(&g_malloc_disks, NULL); 765 } 766 767 SPDK_LOG_REGISTER_COMPONENT(bdev_malloc) 768