1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Common code for partition-like virtual bdevs. 36 */ 37 38 #include "spdk/bdev.h" 39 #include "spdk/likely.h" 40 #include "spdk/log.h" 41 #include "spdk/string.h" 42 #include "spdk/thread.h" 43 44 #include "spdk/bdev_module.h" 45 46 struct spdk_bdev_part_base { 47 struct spdk_bdev *bdev; 48 struct spdk_bdev_desc *desc; 49 uint32_t ref; 50 uint32_t channel_size; 51 spdk_bdev_part_base_free_fn base_free_fn; 52 void *ctx; 53 bool claimed; 54 struct spdk_bdev_module *module; 55 struct spdk_bdev_fn_table *fn_table; 56 struct bdev_part_tailq *tailq; 57 spdk_io_channel_create_cb ch_create_cb; 58 spdk_io_channel_destroy_cb ch_destroy_cb; 59 struct spdk_thread *thread; 60 }; 61 62 struct spdk_bdev * 63 spdk_bdev_part_base_get_bdev(struct spdk_bdev_part_base *part_base) 64 { 65 return part_base->bdev; 66 } 67 68 struct spdk_bdev_desc * 69 spdk_bdev_part_base_get_desc(struct spdk_bdev_part_base *part_base) 70 { 71 return part_base->desc; 72 } 73 74 struct bdev_part_tailq * 75 spdk_bdev_part_base_get_tailq(struct spdk_bdev_part_base *part_base) 76 { 77 return part_base->tailq; 78 } 79 80 void * 81 spdk_bdev_part_base_get_ctx(struct spdk_bdev_part_base *part_base) 82 { 83 return part_base->ctx; 84 } 85 86 const char * 87 spdk_bdev_part_base_get_bdev_name(struct spdk_bdev_part_base *part_base) 88 { 89 return part_base->bdev->name; 90 } 91 92 static void 93 bdev_part_base_free(void *ctx) 94 { 95 struct spdk_bdev_desc *desc = ctx; 96 97 spdk_bdev_close(desc); 98 } 99 100 void 101 spdk_bdev_part_base_free(struct spdk_bdev_part_base *base) 102 { 103 if (base->desc) { 104 /* Close the underlying bdev on its same opened thread. */ 105 if (base->thread && base->thread != spdk_get_thread()) { 106 spdk_thread_send_msg(base->thread, bdev_part_base_free, base->desc); 107 } else { 108 spdk_bdev_close(base->desc); 109 } 110 } 111 112 if (base->base_free_fn != NULL) { 113 base->base_free_fn(base->ctx); 114 } 115 116 free(base); 117 } 118 119 static void 120 bdev_part_free_cb(void *io_device) 121 { 122 struct spdk_bdev_part *part = io_device; 123 struct spdk_bdev_part_base *base; 124 125 assert(part); 126 assert(part->internal.base); 127 128 base = part->internal.base; 129 130 TAILQ_REMOVE(base->tailq, part, tailq); 131 132 if (--base->ref == 0) { 133 spdk_bdev_module_release_bdev(base->bdev); 134 spdk_bdev_part_base_free(base); 135 } 136 137 spdk_bdev_destruct_done(&part->internal.bdev, 0); 138 free(part->internal.bdev.name); 139 free(part->internal.bdev.product_name); 140 free(part); 141 } 142 143 int 144 spdk_bdev_part_free(struct spdk_bdev_part *part) 145 { 146 spdk_io_device_unregister(part, bdev_part_free_cb); 147 148 /* Return 1 to indicate that this is an asynchronous operation that isn't complete 149 * until spdk_bdev_destruct_done is called */ 150 return 1; 151 } 152 153 void 154 spdk_bdev_part_base_hotremove(struct spdk_bdev_part_base *part_base, struct bdev_part_tailq *tailq) 155 { 156 struct spdk_bdev_part *part, *tmp; 157 158 TAILQ_FOREACH_SAFE(part, tailq, tailq, tmp) { 159 if (part->internal.base == part_base) { 160 spdk_bdev_unregister(&part->internal.bdev, NULL, NULL); 161 } 162 } 163 } 164 165 static bool 166 bdev_part_io_type_supported(void *_part, enum spdk_bdev_io_type io_type) 167 { 168 struct spdk_bdev_part *part = _part; 169 170 /* We can't decode/modify passthrough NVMe commands, so don't report 171 * that a partition supports these io types, even if the underlying 172 * bdev does. 173 */ 174 switch (io_type) { 175 case SPDK_BDEV_IO_TYPE_NVME_ADMIN: 176 case SPDK_BDEV_IO_TYPE_NVME_IO: 177 case SPDK_BDEV_IO_TYPE_NVME_IO_MD: 178 return false; 179 default: 180 break; 181 } 182 183 return part->internal.base->bdev->fn_table->io_type_supported(part->internal.base->bdev->ctxt, 184 io_type); 185 } 186 187 static struct spdk_io_channel * 188 bdev_part_get_io_channel(void *_part) 189 { 190 struct spdk_bdev_part *part = _part; 191 192 return spdk_get_io_channel(part); 193 } 194 195 struct spdk_bdev * 196 spdk_bdev_part_get_bdev(struct spdk_bdev_part *part) 197 { 198 return &part->internal.bdev; 199 } 200 201 struct spdk_bdev_part_base * 202 spdk_bdev_part_get_base(struct spdk_bdev_part *part) 203 { 204 return part->internal.base; 205 } 206 207 struct spdk_bdev * 208 spdk_bdev_part_get_base_bdev(struct spdk_bdev_part *part) 209 { 210 return part->internal.base->bdev; 211 } 212 213 uint64_t 214 spdk_bdev_part_get_offset_blocks(struct spdk_bdev_part *part) 215 { 216 return part->internal.offset_blocks; 217 } 218 219 static int 220 bdev_part_remap_dif(struct spdk_bdev_io *bdev_io, uint32_t offset, 221 uint32_t remapped_offset) 222 { 223 struct spdk_bdev *bdev = bdev_io->bdev; 224 struct spdk_dif_ctx dif_ctx; 225 struct spdk_dif_error err_blk = {}; 226 int rc; 227 228 if (spdk_likely(!(bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK))) { 229 return 0; 230 } 231 232 rc = spdk_dif_ctx_init(&dif_ctx, 233 bdev->blocklen, bdev->md_len, bdev->md_interleave, 234 bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags, 235 offset, 0, 0, 0, 0); 236 if (rc != 0) { 237 SPDK_ERRLOG("Initialization of DIF context failed\n"); 238 return rc; 239 } 240 241 spdk_dif_ctx_set_remapped_init_ref_tag(&dif_ctx, remapped_offset); 242 243 if (bdev->md_interleave) { 244 rc = spdk_dif_remap_ref_tag(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 245 bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 246 } else { 247 struct iovec md_iov = { 248 .iov_base = bdev_io->u.bdev.md_buf, 249 .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len, 250 }; 251 252 rc = spdk_dix_remap_ref_tag(&md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 253 } 254 255 if (rc != 0) { 256 SPDK_ERRLOG("Remapping reference tag failed. type=%d, offset=%" PRIu32 "\n", 257 err_blk.err_type, err_blk.err_offset); 258 } 259 260 return rc; 261 } 262 263 static void 264 bdev_part_complete_read_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 265 { 266 struct spdk_bdev_io *part_io = cb_arg; 267 uint32_t offset, remapped_offset; 268 int rc, status; 269 270 offset = bdev_io->u.bdev.offset_blocks; 271 remapped_offset = part_io->u.bdev.offset_blocks; 272 273 if (success) { 274 rc = bdev_part_remap_dif(bdev_io, offset, remapped_offset); 275 if (rc != 0) { 276 success = false; 277 } 278 } 279 280 status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 281 282 spdk_bdev_io_complete(part_io, status); 283 spdk_bdev_free_io(bdev_io); 284 } 285 286 static void 287 bdev_part_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 288 { 289 struct spdk_bdev_io *part_io = cb_arg; 290 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 291 292 spdk_bdev_io_complete(part_io, status); 293 spdk_bdev_free_io(bdev_io); 294 } 295 296 static void 297 bdev_part_complete_zcopy_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 298 { 299 struct spdk_bdev_io *part_io = cb_arg; 300 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 301 302 spdk_bdev_io_set_buf(part_io, bdev_io->u.bdev.iovs[0].iov_base, bdev_io->u.bdev.iovs[0].iov_len); 303 spdk_bdev_io_complete(part_io, status); 304 spdk_bdev_free_io(bdev_io); 305 } 306 307 int 308 spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io) 309 { 310 struct spdk_bdev_part *part = ch->part; 311 struct spdk_io_channel *base_ch = ch->base_ch; 312 struct spdk_bdev_desc *base_desc = part->internal.base->desc; 313 uint64_t offset, remapped_offset; 314 int rc = 0; 315 316 offset = bdev_io->u.bdev.offset_blocks; 317 remapped_offset = offset + part->internal.offset_blocks; 318 319 /* Modify the I/O to adjust for the offset within the base bdev. */ 320 switch (bdev_io->type) { 321 case SPDK_BDEV_IO_TYPE_READ: 322 if (bdev_io->u.bdev.md_buf == NULL) { 323 rc = spdk_bdev_readv_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs, 324 bdev_io->u.bdev.iovcnt, remapped_offset, 325 bdev_io->u.bdev.num_blocks, 326 bdev_part_complete_read_io, bdev_io); 327 } else { 328 rc = spdk_bdev_readv_blocks_with_md(base_desc, base_ch, 329 bdev_io->u.bdev.iovs, 330 bdev_io->u.bdev.iovcnt, 331 bdev_io->u.bdev.md_buf, remapped_offset, 332 bdev_io->u.bdev.num_blocks, 333 bdev_part_complete_read_io, bdev_io); 334 } 335 break; 336 case SPDK_BDEV_IO_TYPE_WRITE: 337 rc = bdev_part_remap_dif(bdev_io, offset, remapped_offset); 338 if (rc != 0) { 339 return SPDK_BDEV_IO_STATUS_FAILED; 340 } 341 342 if (bdev_io->u.bdev.md_buf == NULL) { 343 rc = spdk_bdev_writev_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs, 344 bdev_io->u.bdev.iovcnt, remapped_offset, 345 bdev_io->u.bdev.num_blocks, 346 bdev_part_complete_io, bdev_io); 347 } else { 348 rc = spdk_bdev_writev_blocks_with_md(base_desc, base_ch, 349 bdev_io->u.bdev.iovs, 350 bdev_io->u.bdev.iovcnt, 351 bdev_io->u.bdev.md_buf, remapped_offset, 352 bdev_io->u.bdev.num_blocks, 353 bdev_part_complete_io, bdev_io); 354 } 355 break; 356 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 357 rc = spdk_bdev_write_zeroes_blocks(base_desc, base_ch, remapped_offset, 358 bdev_io->u.bdev.num_blocks, bdev_part_complete_io, 359 bdev_io); 360 break; 361 case SPDK_BDEV_IO_TYPE_UNMAP: 362 rc = spdk_bdev_unmap_blocks(base_desc, base_ch, remapped_offset, 363 bdev_io->u.bdev.num_blocks, bdev_part_complete_io, 364 bdev_io); 365 break; 366 case SPDK_BDEV_IO_TYPE_FLUSH: 367 rc = spdk_bdev_flush_blocks(base_desc, base_ch, remapped_offset, 368 bdev_io->u.bdev.num_blocks, bdev_part_complete_io, 369 bdev_io); 370 break; 371 case SPDK_BDEV_IO_TYPE_RESET: 372 rc = spdk_bdev_reset(base_desc, base_ch, 373 bdev_part_complete_io, bdev_io); 374 break; 375 case SPDK_BDEV_IO_TYPE_ZCOPY: 376 rc = spdk_bdev_zcopy_start(base_desc, base_ch, remapped_offset, 377 bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.zcopy.populate, 378 bdev_part_complete_zcopy_io, bdev_io); 379 break; 380 default: 381 SPDK_ERRLOG("unknown I/O type %d\n", bdev_io->type); 382 return SPDK_BDEV_IO_STATUS_FAILED; 383 } 384 385 return rc; 386 } 387 388 static int 389 bdev_part_channel_create_cb(void *io_device, void *ctx_buf) 390 { 391 struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device; 392 struct spdk_bdev_part_channel *ch = ctx_buf; 393 394 ch->part = part; 395 ch->base_ch = spdk_bdev_get_io_channel(part->internal.base->desc); 396 if (ch->base_ch == NULL) { 397 return -1; 398 } 399 400 if (part->internal.base->ch_create_cb) { 401 return part->internal.base->ch_create_cb(io_device, ctx_buf); 402 } else { 403 return 0; 404 } 405 } 406 407 static void 408 bdev_part_channel_destroy_cb(void *io_device, void *ctx_buf) 409 { 410 struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device; 411 struct spdk_bdev_part_channel *ch = ctx_buf; 412 413 if (part->internal.base->ch_destroy_cb) { 414 part->internal.base->ch_destroy_cb(io_device, ctx_buf); 415 } 416 spdk_put_io_channel(ch->base_ch); 417 } 418 419 struct spdk_bdev_part_base * 420 spdk_bdev_part_base_construct(struct spdk_bdev *bdev, 421 spdk_bdev_remove_cb_t remove_cb, struct spdk_bdev_module *module, 422 struct spdk_bdev_fn_table *fn_table, struct bdev_part_tailq *tailq, 423 spdk_bdev_part_base_free_fn free_fn, void *ctx, 424 uint32_t channel_size, spdk_io_channel_create_cb ch_create_cb, 425 spdk_io_channel_destroy_cb ch_destroy_cb) 426 { 427 int rc; 428 struct spdk_bdev_part_base *base; 429 430 base = calloc(1, sizeof(*base)); 431 if (!base) { 432 SPDK_ERRLOG("Memory allocation failure\n"); 433 return NULL; 434 } 435 fn_table->get_io_channel = bdev_part_get_io_channel; 436 fn_table->io_type_supported = bdev_part_io_type_supported; 437 438 base->bdev = bdev; 439 base->desc = NULL; 440 base->ref = 0; 441 base->module = module; 442 base->fn_table = fn_table; 443 base->tailq = tailq; 444 base->base_free_fn = free_fn; 445 base->ctx = ctx; 446 base->claimed = false; 447 base->channel_size = channel_size; 448 base->ch_create_cb = ch_create_cb; 449 base->ch_destroy_cb = ch_destroy_cb; 450 451 rc = spdk_bdev_open(bdev, false, remove_cb, base, &base->desc); 452 if (rc) { 453 spdk_bdev_part_base_free(base); 454 SPDK_ERRLOG("could not open bdev %s: %s\n", spdk_bdev_get_name(bdev), 455 spdk_strerror(-rc)); 456 return NULL; 457 } 458 459 /* Save the thread where the base device is opened */ 460 base->thread = spdk_get_thread(); 461 462 return base; 463 } 464 465 int 466 spdk_bdev_part_construct(struct spdk_bdev_part *part, struct spdk_bdev_part_base *base, 467 char *name, uint64_t offset_blocks, uint64_t num_blocks, 468 char *product_name) 469 { 470 part->internal.bdev.blocklen = base->bdev->blocklen; 471 part->internal.bdev.blockcnt = num_blocks; 472 part->internal.offset_blocks = offset_blocks; 473 474 part->internal.bdev.write_cache = base->bdev->write_cache; 475 part->internal.bdev.required_alignment = base->bdev->required_alignment; 476 part->internal.bdev.ctxt = part; 477 part->internal.bdev.module = base->module; 478 part->internal.bdev.fn_table = base->fn_table; 479 480 part->internal.bdev.md_interleave = base->bdev->md_interleave; 481 part->internal.bdev.md_len = base->bdev->md_len; 482 part->internal.bdev.dif_type = base->bdev->dif_type; 483 part->internal.bdev.dif_is_head_of_md = base->bdev->dif_is_head_of_md; 484 part->internal.bdev.dif_check_flags = base->bdev->dif_check_flags; 485 486 part->internal.bdev.name = strdup(name); 487 part->internal.bdev.product_name = strdup(product_name); 488 489 if (part->internal.bdev.name == NULL) { 490 SPDK_ERRLOG("Failed to allocate name for new part of bdev %s\n", spdk_bdev_get_name(base->bdev)); 491 return -1; 492 } else if (part->internal.bdev.product_name == NULL) { 493 free(part->internal.bdev.name); 494 SPDK_ERRLOG("Failed to allocate product name for new part of bdev %s\n", 495 spdk_bdev_get_name(base->bdev)); 496 return -1; 497 } 498 499 base->ref++; 500 part->internal.base = base; 501 502 if (!base->claimed) { 503 int rc; 504 505 rc = spdk_bdev_module_claim_bdev(base->bdev, base->desc, base->module); 506 if (rc) { 507 SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(base->bdev)); 508 free(part->internal.bdev.name); 509 free(part->internal.bdev.product_name); 510 return -1; 511 } 512 base->claimed = true; 513 } 514 515 spdk_io_device_register(part, bdev_part_channel_create_cb, 516 bdev_part_channel_destroy_cb, 517 base->channel_size, 518 name); 519 520 spdk_bdev_register(&part->internal.bdev); 521 TAILQ_INSERT_TAIL(base->tailq, part, tailq); 522 523 return 0; 524 } 525