1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. 3 * Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. 4 * All rights reserved. 5 */ 6 7 /* 8 * Common code for partition-like virtual bdevs. 9 */ 10 11 #include "spdk/bdev.h" 12 #include "spdk/likely.h" 13 #include "spdk/log.h" 14 #include "spdk/string.h" 15 #include "spdk/thread.h" 16 17 #include "spdk/bdev_module.h" 18 19 struct spdk_bdev_part_base { 20 struct spdk_bdev *bdev; 21 struct spdk_bdev_desc *desc; 22 uint32_t ref; 23 uint32_t channel_size; 24 spdk_bdev_part_base_free_fn base_free_fn; 25 void *ctx; 26 bool claimed; 27 struct spdk_bdev_module *module; 28 struct spdk_bdev_fn_table *fn_table; 29 struct bdev_part_tailq *tailq; 30 spdk_io_channel_create_cb ch_create_cb; 31 spdk_io_channel_destroy_cb ch_destroy_cb; 32 spdk_bdev_remove_cb_t remove_cb; 33 struct spdk_thread *thread; 34 }; 35 36 struct spdk_bdev * 37 spdk_bdev_part_base_get_bdev(struct spdk_bdev_part_base *part_base) 38 { 39 return part_base->bdev; 40 } 41 42 struct spdk_bdev_desc * 43 spdk_bdev_part_base_get_desc(struct spdk_bdev_part_base *part_base) 44 { 45 return part_base->desc; 46 } 47 48 struct bdev_part_tailq * 49 spdk_bdev_part_base_get_tailq(struct spdk_bdev_part_base *part_base) 50 { 51 return part_base->tailq; 52 } 53 54 void * 55 spdk_bdev_part_base_get_ctx(struct spdk_bdev_part_base *part_base) 56 { 57 return part_base->ctx; 58 } 59 60 const char * 61 spdk_bdev_part_base_get_bdev_name(struct spdk_bdev_part_base *part_base) 62 { 63 return part_base->bdev->name; 64 } 65 66 static void 67 bdev_part_base_free(void *ctx) 68 { 69 struct spdk_bdev_desc *desc = ctx; 70 71 spdk_bdev_close(desc); 72 } 73 74 void 75 spdk_bdev_part_base_free(struct spdk_bdev_part_base *base) 76 { 77 if (base->desc) { 78 /* Close the underlying bdev on its same opened thread. */ 79 if (base->thread && base->thread != spdk_get_thread()) { 80 spdk_thread_send_msg(base->thread, bdev_part_base_free, base->desc); 81 } else { 82 spdk_bdev_close(base->desc); 83 } 84 } 85 86 if (base->base_free_fn != NULL) { 87 base->base_free_fn(base->ctx); 88 } 89 90 free(base); 91 } 92 93 static void 94 bdev_part_free_cb(void *io_device) 95 { 96 struct spdk_bdev_part *part = io_device; 97 struct spdk_bdev_part_base *base; 98 99 assert(part); 100 assert(part->internal.base); 101 102 base = part->internal.base; 103 104 TAILQ_REMOVE(base->tailq, part, tailq); 105 106 if (--base->ref == 0) { 107 spdk_bdev_module_release_bdev(base->bdev); 108 spdk_bdev_part_base_free(base); 109 } 110 111 spdk_bdev_destruct_done(&part->internal.bdev, 0); 112 free(part->internal.bdev.name); 113 free(part->internal.bdev.product_name); 114 free(part); 115 } 116 117 int 118 spdk_bdev_part_free(struct spdk_bdev_part *part) 119 { 120 spdk_io_device_unregister(part, bdev_part_free_cb); 121 122 /* Return 1 to indicate that this is an asynchronous operation that isn't complete 123 * until spdk_bdev_destruct_done is called */ 124 return 1; 125 } 126 127 void 128 spdk_bdev_part_base_hotremove(struct spdk_bdev_part_base *part_base, struct bdev_part_tailq *tailq) 129 { 130 struct spdk_bdev_part *part, *tmp; 131 132 TAILQ_FOREACH_SAFE(part, tailq, tailq, tmp) { 133 if (part->internal.base == part_base) { 134 spdk_bdev_unregister(&part->internal.bdev, NULL, NULL); 135 } 136 } 137 } 138 139 static bool 140 bdev_part_io_type_supported(void *_part, enum spdk_bdev_io_type io_type) 141 { 142 struct spdk_bdev_part *part = _part; 143 144 /* We can't decode/modify passthrough NVMe commands, so don't report 145 * that a partition supports these io types, even if the underlying 146 * bdev does. 147 */ 148 switch (io_type) { 149 case SPDK_BDEV_IO_TYPE_NVME_ADMIN: 150 case SPDK_BDEV_IO_TYPE_NVME_IO: 151 case SPDK_BDEV_IO_TYPE_NVME_IO_MD: 152 return false; 153 default: 154 break; 155 } 156 157 return part->internal.base->bdev->fn_table->io_type_supported(part->internal.base->bdev->ctxt, 158 io_type); 159 } 160 161 static struct spdk_io_channel * 162 bdev_part_get_io_channel(void *_part) 163 { 164 struct spdk_bdev_part *part = _part; 165 166 return spdk_get_io_channel(part); 167 } 168 169 struct spdk_bdev * 170 spdk_bdev_part_get_bdev(struct spdk_bdev_part *part) 171 { 172 return &part->internal.bdev; 173 } 174 175 struct spdk_bdev_part_base * 176 spdk_bdev_part_get_base(struct spdk_bdev_part *part) 177 { 178 return part->internal.base; 179 } 180 181 struct spdk_bdev * 182 spdk_bdev_part_get_base_bdev(struct spdk_bdev_part *part) 183 { 184 return part->internal.base->bdev; 185 } 186 187 uint64_t 188 spdk_bdev_part_get_offset_blocks(struct spdk_bdev_part *part) 189 { 190 return part->internal.offset_blocks; 191 } 192 193 static int 194 bdev_part_remap_dif(struct spdk_bdev_io *bdev_io, uint32_t offset, 195 uint32_t remapped_offset) 196 { 197 struct spdk_bdev *bdev = bdev_io->bdev; 198 struct spdk_dif_ctx dif_ctx; 199 struct spdk_dif_error err_blk = {}; 200 int rc; 201 202 if (spdk_likely(!(bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK))) { 203 return 0; 204 } 205 206 rc = spdk_dif_ctx_init(&dif_ctx, 207 bdev->blocklen, bdev->md_len, bdev->md_interleave, 208 bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags, 209 offset, 0, 0, 0, 0); 210 if (rc != 0) { 211 SPDK_ERRLOG("Initialization of DIF context failed\n"); 212 return rc; 213 } 214 215 spdk_dif_ctx_set_remapped_init_ref_tag(&dif_ctx, remapped_offset); 216 217 if (bdev->md_interleave) { 218 rc = spdk_dif_remap_ref_tag(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 219 bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 220 } else { 221 struct iovec md_iov = { 222 .iov_base = bdev_io->u.bdev.md_buf, 223 .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len, 224 }; 225 226 rc = spdk_dix_remap_ref_tag(&md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 227 } 228 229 if (rc != 0) { 230 SPDK_ERRLOG("Remapping reference tag failed. type=%d, offset=%" PRIu32 "\n", 231 err_blk.err_type, err_blk.err_offset); 232 } 233 234 return rc; 235 } 236 237 static void 238 bdev_part_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 239 { 240 struct spdk_bdev_io *part_io = cb_arg; 241 uint32_t offset, remapped_offset; 242 spdk_bdev_io_completion_cb cb; 243 int rc, status; 244 245 switch (bdev_io->type) { 246 case SPDK_BDEV_IO_TYPE_READ: 247 if (success) { 248 offset = bdev_io->u.bdev.offset_blocks; 249 remapped_offset = part_io->u.bdev.offset_blocks; 250 251 rc = bdev_part_remap_dif(bdev_io, offset, remapped_offset); 252 if (rc != 0) { 253 success = false; 254 } 255 } 256 break; 257 case SPDK_BDEV_IO_TYPE_ZCOPY: 258 spdk_bdev_io_set_buf(part_io, bdev_io->u.bdev.iovs[0].iov_base, 259 bdev_io->u.bdev.iovs[0].iov_len); 260 break; 261 default: 262 break; 263 } 264 265 266 cb = part_io->u.bdev.stored_user_cb; 267 if (cb != NULL) { 268 cb(part_io, success, NULL); 269 } else { 270 status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 271 272 spdk_bdev_io_complete(part_io, status); 273 } 274 275 spdk_bdev_free_io(bdev_io); 276 } 277 278 static inline void 279 bdev_part_init_ext_io_opts(struct spdk_bdev_io *bdev_io, struct spdk_bdev_ext_io_opts *opts) 280 { 281 memset(opts, 0, sizeof(*opts)); 282 opts->size = sizeof(*opts); 283 opts->memory_domain = bdev_io->u.bdev.memory_domain; 284 opts->memory_domain_ctx = bdev_io->u.bdev.memory_domain_ctx; 285 opts->metadata = bdev_io->u.bdev.md_buf; 286 } 287 288 int 289 spdk_bdev_part_submit_request_ext(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io, 290 spdk_bdev_io_completion_cb cb) 291 { 292 struct spdk_bdev_part *part = ch->part; 293 struct spdk_io_channel *base_ch = ch->base_ch; 294 struct spdk_bdev_desc *base_desc = part->internal.base->desc; 295 struct spdk_bdev_ext_io_opts io_opts; 296 uint64_t offset, remapped_offset, remapped_src_offset; 297 int rc = 0; 298 299 bdev_io->u.bdev.stored_user_cb = cb; 300 301 offset = bdev_io->u.bdev.offset_blocks; 302 remapped_offset = offset + part->internal.offset_blocks; 303 304 /* Modify the I/O to adjust for the offset within the base bdev. */ 305 switch (bdev_io->type) { 306 case SPDK_BDEV_IO_TYPE_READ: 307 bdev_part_init_ext_io_opts(bdev_io, &io_opts); 308 rc = spdk_bdev_readv_blocks_ext(base_desc, base_ch, bdev_io->u.bdev.iovs, 309 bdev_io->u.bdev.iovcnt, remapped_offset, 310 bdev_io->u.bdev.num_blocks, 311 bdev_part_complete_io, bdev_io, &io_opts); 312 break; 313 case SPDK_BDEV_IO_TYPE_WRITE: 314 rc = bdev_part_remap_dif(bdev_io, offset, remapped_offset); 315 if (rc != 0) { 316 return SPDK_BDEV_IO_STATUS_FAILED; 317 } 318 bdev_part_init_ext_io_opts(bdev_io, &io_opts); 319 rc = spdk_bdev_writev_blocks_ext(base_desc, base_ch, bdev_io->u.bdev.iovs, 320 bdev_io->u.bdev.iovcnt, remapped_offset, 321 bdev_io->u.bdev.num_blocks, 322 bdev_part_complete_io, bdev_io, &io_opts); 323 break; 324 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 325 rc = spdk_bdev_write_zeroes_blocks(base_desc, base_ch, remapped_offset, 326 bdev_io->u.bdev.num_blocks, bdev_part_complete_io, 327 bdev_io); 328 break; 329 case SPDK_BDEV_IO_TYPE_UNMAP: 330 rc = spdk_bdev_unmap_blocks(base_desc, base_ch, remapped_offset, 331 bdev_io->u.bdev.num_blocks, bdev_part_complete_io, 332 bdev_io); 333 break; 334 case SPDK_BDEV_IO_TYPE_FLUSH: 335 rc = spdk_bdev_flush_blocks(base_desc, base_ch, remapped_offset, 336 bdev_io->u.bdev.num_blocks, bdev_part_complete_io, 337 bdev_io); 338 break; 339 case SPDK_BDEV_IO_TYPE_RESET: 340 rc = spdk_bdev_reset(base_desc, base_ch, 341 bdev_part_complete_io, bdev_io); 342 break; 343 case SPDK_BDEV_IO_TYPE_ZCOPY: 344 rc = spdk_bdev_zcopy_start(base_desc, base_ch, NULL, 0, remapped_offset, 345 bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.zcopy.populate, 346 bdev_part_complete_io, bdev_io); 347 break; 348 case SPDK_BDEV_IO_TYPE_COMPARE: 349 if (!bdev_io->u.bdev.md_buf) { 350 rc = spdk_bdev_comparev_blocks(base_desc, base_ch, 351 bdev_io->u.bdev.iovs, 352 bdev_io->u.bdev.iovcnt, 353 remapped_offset, 354 bdev_io->u.bdev.num_blocks, 355 bdev_part_complete_io, bdev_io); 356 } else { 357 rc = spdk_bdev_comparev_blocks_with_md(base_desc, base_ch, 358 bdev_io->u.bdev.iovs, 359 bdev_io->u.bdev.iovcnt, 360 bdev_io->u.bdev.md_buf, 361 remapped_offset, 362 bdev_io->u.bdev.num_blocks, 363 bdev_part_complete_io, bdev_io); 364 } 365 break; 366 case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE: 367 rc = spdk_bdev_comparev_and_writev_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs, 368 bdev_io->u.bdev.iovcnt, 369 bdev_io->u.bdev.fused_iovs, 370 bdev_io->u.bdev.fused_iovcnt, 371 remapped_offset, 372 bdev_io->u.bdev.num_blocks, 373 bdev_part_complete_io, bdev_io); 374 break; 375 case SPDK_BDEV_IO_TYPE_COPY: 376 remapped_src_offset = bdev_io->u.bdev.copy.src_offset_blocks + part->internal.offset_blocks; 377 rc = spdk_bdev_copy_blocks(base_desc, base_ch, remapped_offset, remapped_src_offset, 378 bdev_io->u.bdev.num_blocks, bdev_part_complete_io, 379 bdev_io); 380 break; 381 default: 382 SPDK_ERRLOG("unknown I/O type %d\n", bdev_io->type); 383 return SPDK_BDEV_IO_STATUS_FAILED; 384 } 385 386 return rc; 387 } 388 389 int 390 spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io) 391 { 392 return spdk_bdev_part_submit_request_ext(ch, bdev_io, NULL); 393 } 394 395 static int 396 bdev_part_channel_create_cb(void *io_device, void *ctx_buf) 397 { 398 struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device; 399 struct spdk_bdev_part_channel *ch = ctx_buf; 400 401 ch->part = part; 402 ch->base_ch = spdk_bdev_get_io_channel(part->internal.base->desc); 403 if (ch->base_ch == NULL) { 404 return -1; 405 } 406 407 if (part->internal.base->ch_create_cb) { 408 return part->internal.base->ch_create_cb(io_device, ctx_buf); 409 } else { 410 return 0; 411 } 412 } 413 414 static void 415 bdev_part_channel_destroy_cb(void *io_device, void *ctx_buf) 416 { 417 struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device; 418 struct spdk_bdev_part_channel *ch = ctx_buf; 419 420 if (part->internal.base->ch_destroy_cb) { 421 part->internal.base->ch_destroy_cb(io_device, ctx_buf); 422 } 423 spdk_put_io_channel(ch->base_ch); 424 } 425 426 static void 427 bdev_part_base_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 428 void *event_ctx) 429 { 430 struct spdk_bdev_part_base *base = event_ctx; 431 432 switch (type) { 433 case SPDK_BDEV_EVENT_REMOVE: 434 base->remove_cb(base); 435 break; 436 default: 437 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 438 break; 439 } 440 } 441 442 int 443 spdk_bdev_part_base_construct_ext(const char *bdev_name, 444 spdk_bdev_remove_cb_t remove_cb, struct spdk_bdev_module *module, 445 struct spdk_bdev_fn_table *fn_table, struct bdev_part_tailq *tailq, 446 spdk_bdev_part_base_free_fn free_fn, void *ctx, 447 uint32_t channel_size, spdk_io_channel_create_cb ch_create_cb, 448 spdk_io_channel_destroy_cb ch_destroy_cb, 449 struct spdk_bdev_part_base **_base) 450 { 451 int rc; 452 struct spdk_bdev_part_base *base; 453 454 if (_base == NULL) { 455 return -EINVAL; 456 } 457 458 base = calloc(1, sizeof(*base)); 459 if (!base) { 460 SPDK_ERRLOG("Memory allocation failure\n"); 461 return -ENOMEM; 462 } 463 fn_table->get_io_channel = bdev_part_get_io_channel; 464 fn_table->io_type_supported = bdev_part_io_type_supported; 465 466 base->desc = NULL; 467 base->ref = 0; 468 base->module = module; 469 base->fn_table = fn_table; 470 base->tailq = tailq; 471 base->base_free_fn = free_fn; 472 base->ctx = ctx; 473 base->claimed = false; 474 base->channel_size = channel_size; 475 base->ch_create_cb = ch_create_cb; 476 base->ch_destroy_cb = ch_destroy_cb; 477 base->remove_cb = remove_cb; 478 479 rc = spdk_bdev_open_ext(bdev_name, false, bdev_part_base_event_cb, base, &base->desc); 480 if (rc) { 481 if (rc == -ENODEV) { 482 free(base); 483 } else { 484 SPDK_ERRLOG("could not open bdev %s: %s\n", bdev_name, spdk_strerror(-rc)); 485 spdk_bdev_part_base_free(base); 486 } 487 return rc; 488 } 489 490 base->bdev = spdk_bdev_desc_get_bdev(base->desc); 491 492 /* Save the thread where the base device is opened */ 493 base->thread = spdk_get_thread(); 494 495 *_base = base; 496 497 return 0; 498 } 499 500 int 501 spdk_bdev_part_construct(struct spdk_bdev_part *part, struct spdk_bdev_part_base *base, 502 char *name, uint64_t offset_blocks, uint64_t num_blocks, 503 char *product_name) 504 { 505 int rc; 506 bool first_claimed = false; 507 508 part->internal.bdev.blocklen = base->bdev->blocklen; 509 part->internal.bdev.blockcnt = num_blocks; 510 part->internal.offset_blocks = offset_blocks; 511 512 part->internal.bdev.write_cache = base->bdev->write_cache; 513 part->internal.bdev.required_alignment = base->bdev->required_alignment; 514 part->internal.bdev.ctxt = part; 515 part->internal.bdev.module = base->module; 516 part->internal.bdev.fn_table = base->fn_table; 517 518 part->internal.bdev.md_interleave = base->bdev->md_interleave; 519 part->internal.bdev.md_len = base->bdev->md_len; 520 part->internal.bdev.dif_type = base->bdev->dif_type; 521 part->internal.bdev.dif_is_head_of_md = base->bdev->dif_is_head_of_md; 522 part->internal.bdev.dif_check_flags = base->bdev->dif_check_flags; 523 524 part->internal.bdev.name = strdup(name); 525 if (part->internal.bdev.name == NULL) { 526 SPDK_ERRLOG("Failed to allocate name for new part of bdev %s\n", spdk_bdev_get_name(base->bdev)); 527 return -1; 528 } 529 530 part->internal.bdev.product_name = strdup(product_name); 531 if (part->internal.bdev.product_name == NULL) { 532 free(part->internal.bdev.name); 533 SPDK_ERRLOG("Failed to allocate product name for new part of bdev %s\n", 534 spdk_bdev_get_name(base->bdev)); 535 return -1; 536 } 537 538 base->ref++; 539 part->internal.base = base; 540 541 if (!base->claimed) { 542 int rc; 543 544 rc = spdk_bdev_module_claim_bdev(base->bdev, base->desc, base->module); 545 if (rc) { 546 SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(base->bdev)); 547 free(part->internal.bdev.name); 548 free(part->internal.bdev.product_name); 549 base->ref--; 550 return -1; 551 } 552 base->claimed = true; 553 first_claimed = true; 554 } 555 556 spdk_io_device_register(part, bdev_part_channel_create_cb, 557 bdev_part_channel_destroy_cb, 558 base->channel_size, 559 name); 560 561 rc = spdk_bdev_register(&part->internal.bdev); 562 if (rc == 0) { 563 TAILQ_INSERT_TAIL(base->tailq, part, tailq); 564 } else { 565 spdk_io_device_unregister(part, NULL); 566 if (--base->ref == 0) { 567 spdk_bdev_module_release_bdev(base->bdev); 568 } 569 free(part->internal.bdev.name); 570 free(part->internal.bdev.product_name); 571 if (first_claimed == true) { 572 base->claimed = false; 573 } 574 } 575 576 return rc; 577 } 578