1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Common code for partition-like virtual bdevs. 36 */ 37 38 #include "spdk/bdev.h" 39 #include "spdk/likely.h" 40 #include "spdk/log.h" 41 #include "spdk/string.h" 42 #include "spdk/thread.h" 43 44 #include "spdk/bdev_module.h" 45 46 struct spdk_bdev_part_base { 47 struct spdk_bdev *bdev; 48 struct spdk_bdev_desc *desc; 49 uint32_t ref; 50 uint32_t channel_size; 51 spdk_bdev_part_base_free_fn base_free_fn; 52 void *ctx; 53 bool claimed; 54 struct spdk_bdev_module *module; 55 struct spdk_bdev_fn_table *fn_table; 56 struct bdev_part_tailq *tailq; 57 spdk_io_channel_create_cb ch_create_cb; 58 spdk_io_channel_destroy_cb ch_destroy_cb; 59 spdk_bdev_remove_cb_t remove_cb; 60 struct spdk_thread *thread; 61 }; 62 63 struct spdk_bdev * 64 spdk_bdev_part_base_get_bdev(struct spdk_bdev_part_base *part_base) 65 { 66 return part_base->bdev; 67 } 68 69 struct spdk_bdev_desc * 70 spdk_bdev_part_base_get_desc(struct spdk_bdev_part_base *part_base) 71 { 72 return part_base->desc; 73 } 74 75 struct bdev_part_tailq * 76 spdk_bdev_part_base_get_tailq(struct spdk_bdev_part_base *part_base) 77 { 78 return part_base->tailq; 79 } 80 81 void * 82 spdk_bdev_part_base_get_ctx(struct spdk_bdev_part_base *part_base) 83 { 84 return part_base->ctx; 85 } 86 87 const char * 88 spdk_bdev_part_base_get_bdev_name(struct spdk_bdev_part_base *part_base) 89 { 90 return part_base->bdev->name; 91 } 92 93 static void 94 bdev_part_base_free(void *ctx) 95 { 96 struct spdk_bdev_desc *desc = ctx; 97 98 spdk_bdev_close(desc); 99 } 100 101 void 102 spdk_bdev_part_base_free(struct spdk_bdev_part_base *base) 103 { 104 if (base->desc) { 105 /* Close the underlying bdev on its same opened thread. */ 106 if (base->thread && base->thread != spdk_get_thread()) { 107 spdk_thread_send_msg(base->thread, bdev_part_base_free, base->desc); 108 } else { 109 spdk_bdev_close(base->desc); 110 } 111 } 112 113 if (base->base_free_fn != NULL) { 114 base->base_free_fn(base->ctx); 115 } 116 117 free(base); 118 } 119 120 static void 121 bdev_part_free_cb(void *io_device) 122 { 123 struct spdk_bdev_part *part = io_device; 124 struct spdk_bdev_part_base *base; 125 126 assert(part); 127 assert(part->internal.base); 128 129 base = part->internal.base; 130 131 TAILQ_REMOVE(base->tailq, part, tailq); 132 133 if (--base->ref == 0) { 134 spdk_bdev_module_release_bdev(base->bdev); 135 spdk_bdev_part_base_free(base); 136 } 137 138 spdk_bdev_destruct_done(&part->internal.bdev, 0); 139 free(part->internal.bdev.name); 140 free(part->internal.bdev.product_name); 141 free(part); 142 } 143 144 int 145 spdk_bdev_part_free(struct spdk_bdev_part *part) 146 { 147 spdk_io_device_unregister(part, bdev_part_free_cb); 148 149 /* Return 1 to indicate that this is an asynchronous operation that isn't complete 150 * until spdk_bdev_destruct_done is called */ 151 return 1; 152 } 153 154 void 155 spdk_bdev_part_base_hotremove(struct spdk_bdev_part_base *part_base, struct bdev_part_tailq *tailq) 156 { 157 struct spdk_bdev_part *part, *tmp; 158 159 TAILQ_FOREACH_SAFE(part, tailq, tailq, tmp) { 160 if (part->internal.base == part_base) { 161 spdk_bdev_unregister(&part->internal.bdev, NULL, NULL); 162 } 163 } 164 } 165 166 static bool 167 bdev_part_io_type_supported(void *_part, enum spdk_bdev_io_type io_type) 168 { 169 struct spdk_bdev_part *part = _part; 170 171 /* We can't decode/modify passthrough NVMe commands, so don't report 172 * that a partition supports these io types, even if the underlying 173 * bdev does. 174 */ 175 switch (io_type) { 176 case SPDK_BDEV_IO_TYPE_NVME_ADMIN: 177 case SPDK_BDEV_IO_TYPE_NVME_IO: 178 case SPDK_BDEV_IO_TYPE_NVME_IO_MD: 179 return false; 180 default: 181 break; 182 } 183 184 return part->internal.base->bdev->fn_table->io_type_supported(part->internal.base->bdev->ctxt, 185 io_type); 186 } 187 188 static struct spdk_io_channel * 189 bdev_part_get_io_channel(void *_part) 190 { 191 struct spdk_bdev_part *part = _part; 192 193 return spdk_get_io_channel(part); 194 } 195 196 struct spdk_bdev * 197 spdk_bdev_part_get_bdev(struct spdk_bdev_part *part) 198 { 199 return &part->internal.bdev; 200 } 201 202 struct spdk_bdev_part_base * 203 spdk_bdev_part_get_base(struct spdk_bdev_part *part) 204 { 205 return part->internal.base; 206 } 207 208 struct spdk_bdev * 209 spdk_bdev_part_get_base_bdev(struct spdk_bdev_part *part) 210 { 211 return part->internal.base->bdev; 212 } 213 214 uint64_t 215 spdk_bdev_part_get_offset_blocks(struct spdk_bdev_part *part) 216 { 217 return part->internal.offset_blocks; 218 } 219 220 static int 221 bdev_part_remap_dif(struct spdk_bdev_io *bdev_io, uint32_t offset, 222 uint32_t remapped_offset) 223 { 224 struct spdk_bdev *bdev = bdev_io->bdev; 225 struct spdk_dif_ctx dif_ctx; 226 struct spdk_dif_error err_blk = {}; 227 int rc; 228 229 if (spdk_likely(!(bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK))) { 230 return 0; 231 } 232 233 rc = spdk_dif_ctx_init(&dif_ctx, 234 bdev->blocklen, bdev->md_len, bdev->md_interleave, 235 bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags, 236 offset, 0, 0, 0, 0); 237 if (rc != 0) { 238 SPDK_ERRLOG("Initialization of DIF context failed\n"); 239 return rc; 240 } 241 242 spdk_dif_ctx_set_remapped_init_ref_tag(&dif_ctx, remapped_offset); 243 244 if (bdev->md_interleave) { 245 rc = spdk_dif_remap_ref_tag(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 246 bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 247 } else { 248 struct iovec md_iov = { 249 .iov_base = bdev_io->u.bdev.md_buf, 250 .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len, 251 }; 252 253 rc = spdk_dix_remap_ref_tag(&md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 254 } 255 256 if (rc != 0) { 257 SPDK_ERRLOG("Remapping reference tag failed. type=%d, offset=%" PRIu32 "\n", 258 err_blk.err_type, err_blk.err_offset); 259 } 260 261 return rc; 262 } 263 264 static void 265 bdev_part_complete_read_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 266 { 267 struct spdk_bdev_io *part_io = cb_arg; 268 uint32_t offset, remapped_offset; 269 int rc, status; 270 271 offset = bdev_io->u.bdev.offset_blocks; 272 remapped_offset = part_io->u.bdev.offset_blocks; 273 274 if (success) { 275 rc = bdev_part_remap_dif(bdev_io, offset, remapped_offset); 276 if (rc != 0) { 277 success = false; 278 } 279 } 280 281 status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 282 283 spdk_bdev_io_complete(part_io, status); 284 spdk_bdev_free_io(bdev_io); 285 } 286 287 static void 288 bdev_part_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 289 { 290 struct spdk_bdev_io *part_io = cb_arg; 291 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 292 293 spdk_bdev_io_complete(part_io, status); 294 spdk_bdev_free_io(bdev_io); 295 } 296 297 static void 298 bdev_part_complete_zcopy_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 299 { 300 struct spdk_bdev_io *part_io = cb_arg; 301 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 302 303 spdk_bdev_io_set_buf(part_io, bdev_io->u.bdev.iovs[0].iov_base, bdev_io->u.bdev.iovs[0].iov_len); 304 spdk_bdev_io_complete(part_io, status); 305 spdk_bdev_free_io(bdev_io); 306 } 307 308 int 309 spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io) 310 { 311 struct spdk_bdev_part *part = ch->part; 312 struct spdk_io_channel *base_ch = ch->base_ch; 313 struct spdk_bdev_desc *base_desc = part->internal.base->desc; 314 uint64_t offset, remapped_offset; 315 int rc = 0; 316 317 offset = bdev_io->u.bdev.offset_blocks; 318 remapped_offset = offset + part->internal.offset_blocks; 319 320 /* Modify the I/O to adjust for the offset within the base bdev. */ 321 switch (bdev_io->type) { 322 case SPDK_BDEV_IO_TYPE_READ: 323 if (bdev_io->u.bdev.ext_opts || !bdev_io->u.bdev.md_buf) { 324 rc = spdk_bdev_readv_blocks_ext(base_desc, base_ch, bdev_io->u.bdev.iovs, 325 bdev_io->u.bdev.iovcnt, remapped_offset, 326 bdev_io->u.bdev.num_blocks, 327 bdev_part_complete_read_io, bdev_io, 328 bdev_io->u.bdev.ext_opts); 329 } else { 330 rc = spdk_bdev_readv_blocks_with_md(base_desc, base_ch, 331 bdev_io->u.bdev.iovs, 332 bdev_io->u.bdev.iovcnt, 333 bdev_io->u.bdev.md_buf, remapped_offset, 334 bdev_io->u.bdev.num_blocks, 335 bdev_part_complete_read_io, bdev_io); 336 } 337 break; 338 case SPDK_BDEV_IO_TYPE_WRITE: 339 rc = bdev_part_remap_dif(bdev_io, offset, remapped_offset); 340 if (rc != 0) { 341 return SPDK_BDEV_IO_STATUS_FAILED; 342 } 343 344 if (bdev_io->u.bdev.ext_opts || !bdev_io->u.bdev.md_buf) { 345 rc = spdk_bdev_writev_blocks_ext(base_desc, base_ch, bdev_io->u.bdev.iovs, 346 bdev_io->u.bdev.iovcnt, remapped_offset, 347 bdev_io->u.bdev.num_blocks, 348 bdev_part_complete_io, bdev_io, 349 bdev_io->u.bdev.ext_opts); 350 } else { 351 rc = spdk_bdev_writev_blocks_with_md(base_desc, base_ch, 352 bdev_io->u.bdev.iovs, 353 bdev_io->u.bdev.iovcnt, 354 bdev_io->u.bdev.md_buf, remapped_offset, 355 bdev_io->u.bdev.num_blocks, 356 bdev_part_complete_io, bdev_io); 357 } 358 break; 359 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 360 rc = spdk_bdev_write_zeroes_blocks(base_desc, base_ch, remapped_offset, 361 bdev_io->u.bdev.num_blocks, bdev_part_complete_io, 362 bdev_io); 363 break; 364 case SPDK_BDEV_IO_TYPE_UNMAP: 365 rc = spdk_bdev_unmap_blocks(base_desc, base_ch, remapped_offset, 366 bdev_io->u.bdev.num_blocks, bdev_part_complete_io, 367 bdev_io); 368 break; 369 case SPDK_BDEV_IO_TYPE_FLUSH: 370 rc = spdk_bdev_flush_blocks(base_desc, base_ch, remapped_offset, 371 bdev_io->u.bdev.num_blocks, bdev_part_complete_io, 372 bdev_io); 373 break; 374 case SPDK_BDEV_IO_TYPE_RESET: 375 rc = spdk_bdev_reset(base_desc, base_ch, 376 bdev_part_complete_io, bdev_io); 377 break; 378 case SPDK_BDEV_IO_TYPE_ZCOPY: 379 rc = spdk_bdev_zcopy_start(base_desc, base_ch, NULL, 0, remapped_offset, 380 bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.zcopy.populate, 381 bdev_part_complete_zcopy_io, bdev_io); 382 break; 383 case SPDK_BDEV_IO_TYPE_COMPARE: 384 if (!bdev_io->u.bdev.md_buf) { 385 rc = spdk_bdev_comparev_blocks(base_desc, base_ch, 386 bdev_io->u.bdev.iovs, 387 bdev_io->u.bdev.iovcnt, 388 remapped_offset, 389 bdev_io->u.bdev.num_blocks, 390 bdev_part_complete_io, bdev_io); 391 } else { 392 rc = spdk_bdev_comparev_blocks_with_md(base_desc, base_ch, 393 bdev_io->u.bdev.iovs, 394 bdev_io->u.bdev.iovcnt, 395 bdev_io->u.bdev.md_buf, 396 remapped_offset, 397 bdev_io->u.bdev.num_blocks, 398 bdev_part_complete_io, bdev_io); 399 } 400 break; 401 case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE: 402 rc = spdk_bdev_comparev_and_writev_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs, 403 bdev_io->u.bdev.iovcnt, 404 bdev_io->u.bdev.fused_iovs, 405 bdev_io->u.bdev.fused_iovcnt, 406 remapped_offset, 407 bdev_io->u.bdev.num_blocks, 408 bdev_part_complete_io, bdev_io); 409 break; 410 default: 411 SPDK_ERRLOG("unknown I/O type %d\n", bdev_io->type); 412 return SPDK_BDEV_IO_STATUS_FAILED; 413 } 414 415 return rc; 416 } 417 418 static int 419 bdev_part_channel_create_cb(void *io_device, void *ctx_buf) 420 { 421 struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device; 422 struct spdk_bdev_part_channel *ch = ctx_buf; 423 424 ch->part = part; 425 ch->base_ch = spdk_bdev_get_io_channel(part->internal.base->desc); 426 if (ch->base_ch == NULL) { 427 return -1; 428 } 429 430 if (part->internal.base->ch_create_cb) { 431 return part->internal.base->ch_create_cb(io_device, ctx_buf); 432 } else { 433 return 0; 434 } 435 } 436 437 static void 438 bdev_part_channel_destroy_cb(void *io_device, void *ctx_buf) 439 { 440 struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device; 441 struct spdk_bdev_part_channel *ch = ctx_buf; 442 443 if (part->internal.base->ch_destroy_cb) { 444 part->internal.base->ch_destroy_cb(io_device, ctx_buf); 445 } 446 spdk_put_io_channel(ch->base_ch); 447 } 448 449 static void 450 bdev_part_base_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 451 void *event_ctx) 452 { 453 struct spdk_bdev_part_base *base = event_ctx; 454 455 switch (type) { 456 case SPDK_BDEV_EVENT_REMOVE: 457 base->remove_cb(base); 458 break; 459 default: 460 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 461 break; 462 } 463 } 464 465 int 466 spdk_bdev_part_base_construct_ext(const char *bdev_name, 467 spdk_bdev_remove_cb_t remove_cb, struct spdk_bdev_module *module, 468 struct spdk_bdev_fn_table *fn_table, struct bdev_part_tailq *tailq, 469 spdk_bdev_part_base_free_fn free_fn, void *ctx, 470 uint32_t channel_size, spdk_io_channel_create_cb ch_create_cb, 471 spdk_io_channel_destroy_cb ch_destroy_cb, 472 struct spdk_bdev_part_base **_base) 473 { 474 int rc; 475 struct spdk_bdev_part_base *base; 476 477 if (_base == NULL) { 478 return -EINVAL; 479 } 480 481 base = calloc(1, sizeof(*base)); 482 if (!base) { 483 SPDK_ERRLOG("Memory allocation failure\n"); 484 return -ENOMEM; 485 } 486 fn_table->get_io_channel = bdev_part_get_io_channel; 487 fn_table->io_type_supported = bdev_part_io_type_supported; 488 489 base->desc = NULL; 490 base->ref = 0; 491 base->module = module; 492 base->fn_table = fn_table; 493 base->tailq = tailq; 494 base->base_free_fn = free_fn; 495 base->ctx = ctx; 496 base->claimed = false; 497 base->channel_size = channel_size; 498 base->ch_create_cb = ch_create_cb; 499 base->ch_destroy_cb = ch_destroy_cb; 500 base->remove_cb = remove_cb; 501 502 rc = spdk_bdev_open_ext(bdev_name, false, bdev_part_base_event_cb, base, &base->desc); 503 if (rc) { 504 if (rc == -ENODEV) { 505 free(base); 506 } else { 507 SPDK_ERRLOG("could not open bdev %s: %s\n", bdev_name, spdk_strerror(-rc)); 508 spdk_bdev_part_base_free(base); 509 } 510 return rc; 511 } 512 513 base->bdev = spdk_bdev_desc_get_bdev(base->desc); 514 515 /* Save the thread where the base device is opened */ 516 base->thread = spdk_get_thread(); 517 518 *_base = base; 519 520 return 0; 521 } 522 523 int 524 spdk_bdev_part_construct(struct spdk_bdev_part *part, struct spdk_bdev_part_base *base, 525 char *name, uint64_t offset_blocks, uint64_t num_blocks, 526 char *product_name) 527 { 528 part->internal.bdev.blocklen = base->bdev->blocklen; 529 part->internal.bdev.blockcnt = num_blocks; 530 part->internal.offset_blocks = offset_blocks; 531 532 part->internal.bdev.write_cache = base->bdev->write_cache; 533 part->internal.bdev.required_alignment = base->bdev->required_alignment; 534 part->internal.bdev.ctxt = part; 535 part->internal.bdev.module = base->module; 536 part->internal.bdev.fn_table = base->fn_table; 537 538 part->internal.bdev.md_interleave = base->bdev->md_interleave; 539 part->internal.bdev.md_len = base->bdev->md_len; 540 part->internal.bdev.dif_type = base->bdev->dif_type; 541 part->internal.bdev.dif_is_head_of_md = base->bdev->dif_is_head_of_md; 542 part->internal.bdev.dif_check_flags = base->bdev->dif_check_flags; 543 544 part->internal.bdev.name = strdup(name); 545 if (part->internal.bdev.name == NULL) { 546 SPDK_ERRLOG("Failed to allocate name for new part of bdev %s\n", spdk_bdev_get_name(base->bdev)); 547 return -1; 548 } 549 550 part->internal.bdev.product_name = strdup(product_name); 551 if (part->internal.bdev.product_name == NULL) { 552 free(part->internal.bdev.name); 553 SPDK_ERRLOG("Failed to allocate product name for new part of bdev %s\n", 554 spdk_bdev_get_name(base->bdev)); 555 return -1; 556 } 557 558 base->ref++; 559 part->internal.base = base; 560 561 if (!base->claimed) { 562 int rc; 563 564 rc = spdk_bdev_module_claim_bdev(base->bdev, base->desc, base->module); 565 if (rc) { 566 SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(base->bdev)); 567 free(part->internal.bdev.name); 568 free(part->internal.bdev.product_name); 569 return -1; 570 } 571 base->claimed = true; 572 } 573 574 spdk_io_device_register(part, bdev_part_channel_create_cb, 575 bdev_part_channel_destroy_cb, 576 base->channel_size, 577 name); 578 579 spdk_bdev_register(&part->internal.bdev); 580 TAILQ_INSERT_TAIL(base->tailq, part, tailq); 581 582 return 0; 583 } 584