1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Common code for partition-like virtual bdevs. 36 */ 37 38 #include "spdk/bdev.h" 39 #include "spdk/likely.h" 40 #include "spdk/log.h" 41 #include "spdk/string.h" 42 43 #include "spdk/bdev_module.h" 44 45 struct spdk_bdev_part_base { 46 struct spdk_bdev *bdev; 47 struct spdk_bdev_desc *desc; 48 uint32_t ref; 49 uint32_t channel_size; 50 spdk_bdev_part_base_free_fn base_free_fn; 51 void *ctx; 52 bool claimed; 53 struct spdk_bdev_module *module; 54 struct spdk_bdev_fn_table *fn_table; 55 struct bdev_part_tailq *tailq; 56 spdk_io_channel_create_cb ch_create_cb; 57 spdk_io_channel_destroy_cb ch_destroy_cb; 58 }; 59 60 struct spdk_bdev * 61 spdk_bdev_part_base_get_bdev(struct spdk_bdev_part_base *part_base) 62 { 63 return part_base->bdev; 64 } 65 66 struct spdk_bdev_desc * 67 spdk_bdev_part_base_get_desc(struct spdk_bdev_part_base *part_base) 68 { 69 return part_base->desc; 70 } 71 72 struct bdev_part_tailq * 73 spdk_bdev_part_base_get_tailq(struct spdk_bdev_part_base *part_base) 74 { 75 return part_base->tailq; 76 } 77 78 void * 79 spdk_bdev_part_base_get_ctx(struct spdk_bdev_part_base *part_base) 80 { 81 return part_base->ctx; 82 } 83 84 const char * 85 spdk_bdev_part_base_get_bdev_name(struct spdk_bdev_part_base *part_base) 86 { 87 return part_base->bdev->name; 88 } 89 90 void 91 spdk_bdev_part_base_free(struct spdk_bdev_part_base *base) 92 { 93 if (base->desc) { 94 spdk_bdev_close(base->desc); 95 base->desc = NULL; 96 } 97 98 if (base->base_free_fn != NULL) { 99 base->base_free_fn(base->ctx); 100 } 101 102 free(base); 103 } 104 105 static void 106 spdk_bdev_part_free_cb(void *io_device) 107 { 108 struct spdk_bdev_part *part = io_device; 109 struct spdk_bdev_part_base *base; 110 111 assert(part); 112 assert(part->internal.base); 113 114 base = part->internal.base; 115 116 TAILQ_REMOVE(base->tailq, part, tailq); 117 118 if (--base->ref == 0) { 119 spdk_bdev_module_release_bdev(base->bdev); 120 spdk_bdev_part_base_free(base); 121 } 122 123 spdk_bdev_destruct_done(&part->internal.bdev, 0); 124 free(part->internal.bdev.name); 125 free(part->internal.bdev.product_name); 126 free(part); 127 } 128 129 int 130 spdk_bdev_part_free(struct spdk_bdev_part *part) 131 { 132 spdk_io_device_unregister(part, spdk_bdev_part_free_cb); 133 134 /* Return 1 to indicate that this is an asynchronous operation that isn't complete 135 * until spdk_bdev_destruct_done is called */ 136 return 1; 137 } 138 139 void 140 spdk_bdev_part_base_hotremove(struct spdk_bdev_part_base *part_base, struct bdev_part_tailq *tailq) 141 { 142 struct spdk_bdev_part *part, *tmp; 143 144 TAILQ_FOREACH_SAFE(part, tailq, tailq, tmp) { 145 if (part->internal.base == part_base) { 146 spdk_bdev_unregister(&part->internal.bdev, NULL, NULL); 147 } 148 } 149 } 150 151 static bool 152 spdk_bdev_part_io_type_supported(void *_part, enum spdk_bdev_io_type io_type) 153 { 154 struct spdk_bdev_part *part = _part; 155 156 /* We can't decode/modify passthrough NVMe commands, so don't report 157 * that a partition supports these io types, even if the underlying 158 * bdev does. 159 */ 160 switch (io_type) { 161 case SPDK_BDEV_IO_TYPE_NVME_ADMIN: 162 case SPDK_BDEV_IO_TYPE_NVME_IO: 163 case SPDK_BDEV_IO_TYPE_NVME_IO_MD: 164 return false; 165 default: 166 break; 167 } 168 169 return part->internal.base->bdev->fn_table->io_type_supported(part->internal.base->bdev->ctxt, 170 io_type); 171 } 172 173 static struct spdk_io_channel * 174 spdk_bdev_part_get_io_channel(void *_part) 175 { 176 struct spdk_bdev_part *part = _part; 177 178 return spdk_get_io_channel(part); 179 } 180 181 struct spdk_bdev * 182 spdk_bdev_part_get_bdev(struct spdk_bdev_part *part) 183 { 184 return &part->internal.bdev; 185 } 186 187 struct spdk_bdev_part_base * 188 spdk_bdev_part_get_base(struct spdk_bdev_part *part) 189 { 190 return part->internal.base; 191 } 192 193 struct spdk_bdev * 194 spdk_bdev_part_get_base_bdev(struct spdk_bdev_part *part) 195 { 196 return part->internal.base->bdev; 197 } 198 199 uint64_t 200 spdk_bdev_part_get_offset_blocks(struct spdk_bdev_part *part) 201 { 202 return part->internal.offset_blocks; 203 } 204 205 static int 206 spdk_bdev_part_remap_dif(struct spdk_bdev_io *bdev_io, uint32_t offset, 207 uint32_t remapped_offset) 208 { 209 struct spdk_bdev *bdev = bdev_io->bdev; 210 struct spdk_dif_ctx dif_ctx; 211 struct spdk_dif_error err_blk = {}; 212 int rc; 213 214 if (spdk_likely(!(bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK))) { 215 return 0; 216 } 217 218 rc = spdk_dif_ctx_init(&dif_ctx, 219 bdev->blocklen, bdev->md_len, bdev->md_interleave, 220 bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags, 221 offset, 0, 0, 0, 0); 222 if (rc != 0) { 223 SPDK_ERRLOG("Initialization of DIF context failed\n"); 224 return rc; 225 } 226 227 spdk_dif_ctx_set_remapped_init_ref_tag(&dif_ctx, remapped_offset); 228 229 if (bdev->md_interleave) { 230 rc = spdk_dif_remap_ref_tag(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 231 bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 232 } else { 233 struct iovec md_iov = { 234 .iov_base = bdev_io->u.bdev.md_buf, 235 .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len, 236 }; 237 238 rc = spdk_dix_remap_ref_tag(&md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 239 } 240 241 if (rc != 0) { 242 SPDK_ERRLOG("Remapping reference tag failed. type=%d, offset=%" PRIu32 "\n", 243 err_blk.err_type, err_blk.err_offset); 244 } 245 246 return rc; 247 } 248 249 static void 250 spdk_bdev_part_complete_read_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 251 { 252 struct spdk_bdev_io *part_io = cb_arg; 253 uint32_t offset, remapped_offset; 254 int rc, status; 255 256 offset = bdev_io->u.bdev.offset_blocks; 257 remapped_offset = part_io->u.bdev.offset_blocks; 258 259 if (success) { 260 rc = spdk_bdev_part_remap_dif(bdev_io, offset, remapped_offset); 261 if (rc != 0) { 262 success = false; 263 } 264 } 265 266 status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 267 268 spdk_bdev_io_complete(part_io, status); 269 spdk_bdev_free_io(bdev_io); 270 } 271 272 static void 273 spdk_bdev_part_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 274 { 275 struct spdk_bdev_io *part_io = cb_arg; 276 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 277 278 spdk_bdev_io_complete(part_io, status); 279 spdk_bdev_free_io(bdev_io); 280 } 281 282 static void 283 spdk_bdev_part_complete_zcopy_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 284 { 285 struct spdk_bdev_io *part_io = cb_arg; 286 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 287 288 spdk_bdev_io_set_buf(part_io, bdev_io->u.bdev.iovs[0].iov_base, bdev_io->u.bdev.iovs[0].iov_len); 289 spdk_bdev_io_complete(part_io, status); 290 spdk_bdev_free_io(bdev_io); 291 } 292 293 int 294 spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io) 295 { 296 struct spdk_bdev_part *part = ch->part; 297 struct spdk_io_channel *base_ch = ch->base_ch; 298 struct spdk_bdev_desc *base_desc = part->internal.base->desc; 299 uint64_t offset, remapped_offset; 300 int rc = 0; 301 302 offset = bdev_io->u.bdev.offset_blocks; 303 remapped_offset = offset + part->internal.offset_blocks; 304 305 /* Modify the I/O to adjust for the offset within the base bdev. */ 306 switch (bdev_io->type) { 307 case SPDK_BDEV_IO_TYPE_READ: 308 if (bdev_io->u.bdev.md_buf == NULL) { 309 rc = spdk_bdev_readv_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs, 310 bdev_io->u.bdev.iovcnt, remapped_offset, 311 bdev_io->u.bdev.num_blocks, 312 spdk_bdev_part_complete_read_io, bdev_io); 313 } else { 314 rc = spdk_bdev_readv_blocks_with_md(base_desc, base_ch, 315 bdev_io->u.bdev.iovs, 316 bdev_io->u.bdev.iovcnt, 317 bdev_io->u.bdev.md_buf, remapped_offset, 318 bdev_io->u.bdev.num_blocks, 319 spdk_bdev_part_complete_read_io, bdev_io); 320 } 321 break; 322 case SPDK_BDEV_IO_TYPE_WRITE: 323 rc = spdk_bdev_part_remap_dif(bdev_io, offset, remapped_offset); 324 if (rc != 0) { 325 return SPDK_BDEV_IO_STATUS_FAILED; 326 } 327 328 if (bdev_io->u.bdev.md_buf == NULL) { 329 rc = spdk_bdev_writev_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs, 330 bdev_io->u.bdev.iovcnt, remapped_offset, 331 bdev_io->u.bdev.num_blocks, 332 spdk_bdev_part_complete_io, bdev_io); 333 } else { 334 rc = spdk_bdev_writev_blocks_with_md(base_desc, base_ch, 335 bdev_io->u.bdev.iovs, 336 bdev_io->u.bdev.iovcnt, 337 bdev_io->u.bdev.md_buf, remapped_offset, 338 bdev_io->u.bdev.num_blocks, 339 spdk_bdev_part_complete_io, bdev_io); 340 } 341 break; 342 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 343 rc = spdk_bdev_write_zeroes_blocks(base_desc, base_ch, remapped_offset, 344 bdev_io->u.bdev.num_blocks, spdk_bdev_part_complete_io, 345 bdev_io); 346 break; 347 case SPDK_BDEV_IO_TYPE_UNMAP: 348 rc = spdk_bdev_unmap_blocks(base_desc, base_ch, remapped_offset, 349 bdev_io->u.bdev.num_blocks, spdk_bdev_part_complete_io, 350 bdev_io); 351 break; 352 case SPDK_BDEV_IO_TYPE_FLUSH: 353 rc = spdk_bdev_flush_blocks(base_desc, base_ch, remapped_offset, 354 bdev_io->u.bdev.num_blocks, spdk_bdev_part_complete_io, 355 bdev_io); 356 break; 357 case SPDK_BDEV_IO_TYPE_RESET: 358 rc = spdk_bdev_reset(base_desc, base_ch, 359 spdk_bdev_part_complete_io, bdev_io); 360 break; 361 case SPDK_BDEV_IO_TYPE_ZCOPY: 362 rc = spdk_bdev_zcopy_start(base_desc, base_ch, remapped_offset, 363 bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.zcopy.populate, 364 spdk_bdev_part_complete_zcopy_io, bdev_io); 365 break; 366 default: 367 SPDK_ERRLOG("unknown I/O type %d\n", bdev_io->type); 368 return SPDK_BDEV_IO_STATUS_FAILED; 369 } 370 371 return rc; 372 } 373 374 static int 375 spdk_bdev_part_channel_create_cb(void *io_device, void *ctx_buf) 376 { 377 struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device; 378 struct spdk_bdev_part_channel *ch = ctx_buf; 379 380 ch->part = part; 381 ch->base_ch = spdk_bdev_get_io_channel(part->internal.base->desc); 382 if (ch->base_ch == NULL) { 383 return -1; 384 } 385 386 if (part->internal.base->ch_create_cb) { 387 return part->internal.base->ch_create_cb(io_device, ctx_buf); 388 } else { 389 return 0; 390 } 391 } 392 393 static void 394 spdk_bdev_part_channel_destroy_cb(void *io_device, void *ctx_buf) 395 { 396 struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device; 397 struct spdk_bdev_part_channel *ch = ctx_buf; 398 399 if (part->internal.base->ch_destroy_cb) { 400 part->internal.base->ch_destroy_cb(io_device, ctx_buf); 401 } 402 spdk_put_io_channel(ch->base_ch); 403 } 404 405 struct spdk_bdev_part_base * 406 spdk_bdev_part_base_construct(struct spdk_bdev *bdev, 407 spdk_bdev_remove_cb_t remove_cb, struct spdk_bdev_module *module, 408 struct spdk_bdev_fn_table *fn_table, struct bdev_part_tailq *tailq, 409 spdk_bdev_part_base_free_fn free_fn, void *ctx, 410 uint32_t channel_size, spdk_io_channel_create_cb ch_create_cb, 411 spdk_io_channel_destroy_cb ch_destroy_cb) 412 { 413 int rc; 414 struct spdk_bdev_part_base *base; 415 416 base = calloc(1, sizeof(*base)); 417 if (!base) { 418 SPDK_ERRLOG("Memory allocation failure\n"); 419 return NULL; 420 } 421 fn_table->get_io_channel = spdk_bdev_part_get_io_channel; 422 fn_table->io_type_supported = spdk_bdev_part_io_type_supported; 423 424 base->bdev = bdev; 425 base->desc = NULL; 426 base->ref = 0; 427 base->module = module; 428 base->fn_table = fn_table; 429 base->tailq = tailq; 430 base->base_free_fn = free_fn; 431 base->ctx = ctx; 432 base->claimed = false; 433 base->channel_size = channel_size; 434 base->ch_create_cb = ch_create_cb; 435 base->ch_destroy_cb = ch_destroy_cb; 436 437 rc = spdk_bdev_open(bdev, false, remove_cb, base, &base->desc); 438 if (rc) { 439 spdk_bdev_part_base_free(base); 440 SPDK_ERRLOG("could not open bdev %s: %s\n", spdk_bdev_get_name(bdev), 441 spdk_strerror(-rc)); 442 return NULL; 443 } 444 445 return base; 446 } 447 448 int 449 spdk_bdev_part_construct(struct spdk_bdev_part *part, struct spdk_bdev_part_base *base, 450 char *name, uint64_t offset_blocks, uint64_t num_blocks, 451 char *product_name) 452 { 453 part->internal.bdev.blocklen = base->bdev->blocklen; 454 part->internal.bdev.blockcnt = num_blocks; 455 part->internal.offset_blocks = offset_blocks; 456 457 part->internal.bdev.write_cache = base->bdev->write_cache; 458 part->internal.bdev.required_alignment = base->bdev->required_alignment; 459 part->internal.bdev.ctxt = part; 460 part->internal.bdev.module = base->module; 461 part->internal.bdev.fn_table = base->fn_table; 462 463 part->internal.bdev.md_interleave = base->bdev->md_interleave; 464 part->internal.bdev.md_len = base->bdev->md_len; 465 part->internal.bdev.dif_type = base->bdev->dif_type; 466 part->internal.bdev.dif_is_head_of_md = base->bdev->dif_is_head_of_md; 467 part->internal.bdev.dif_check_flags = base->bdev->dif_check_flags; 468 469 part->internal.bdev.name = strdup(name); 470 part->internal.bdev.product_name = strdup(product_name); 471 472 if (part->internal.bdev.name == NULL) { 473 SPDK_ERRLOG("Failed to allocate name for new part of bdev %s\n", spdk_bdev_get_name(base->bdev)); 474 return -1; 475 } else if (part->internal.bdev.product_name == NULL) { 476 free(part->internal.bdev.name); 477 SPDK_ERRLOG("Failed to allocate product name for new part of bdev %s\n", 478 spdk_bdev_get_name(base->bdev)); 479 return -1; 480 } 481 482 base->ref++; 483 part->internal.base = base; 484 485 if (!base->claimed) { 486 int rc; 487 488 rc = spdk_bdev_module_claim_bdev(base->bdev, base->desc, base->module); 489 if (rc) { 490 SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(base->bdev)); 491 free(part->internal.bdev.name); 492 free(part->internal.bdev.product_name); 493 return -1; 494 } 495 base->claimed = true; 496 } 497 498 spdk_io_device_register(part, spdk_bdev_part_channel_create_cb, 499 spdk_bdev_part_channel_destroy_cb, 500 base->channel_size, 501 name); 502 503 spdk_bdev_register(&part->internal.bdev); 504 TAILQ_INSERT_TAIL(base->tailq, part, tailq); 505 506 return 0; 507 } 508