1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "vbdev_zone_block.h" 37 38 #include "spdk/config.h" 39 #include "spdk/nvme.h" 40 #include "spdk/bdev_zone.h" 41 42 #include "spdk/log.h" 43 44 static int zone_block_init(void); 45 static int zone_block_get_ctx_size(void); 46 static void zone_block_finish(void); 47 static int zone_block_config_json(struct spdk_json_write_ctx *w); 48 static void zone_block_examine(struct spdk_bdev *bdev); 49 50 static struct spdk_bdev_module bdev_zoned_if = { 51 .name = "bdev_zoned_block", 52 .module_init = zone_block_init, 53 .module_fini = zone_block_finish, 54 .config_json = zone_block_config_json, 55 .examine_config = zone_block_examine, 56 .get_ctx_size = zone_block_get_ctx_size, 57 }; 58 59 SPDK_BDEV_MODULE_REGISTER(bdev_zoned_block, &bdev_zoned_if) 60 61 /* List of block vbdev names and their base bdevs via configuration file. 62 * Used so we can parse the conf once at init and use this list in examine(). 63 */ 64 struct bdev_zone_block_config { 65 char *vbdev_name; 66 char *bdev_name; 67 uint64_t zone_capacity; 68 uint64_t optimal_open_zones; 69 TAILQ_ENTRY(bdev_zone_block_config) link; 70 }; 71 static TAILQ_HEAD(, bdev_zone_block_config) g_bdev_configs = TAILQ_HEAD_INITIALIZER(g_bdev_configs); 72 73 struct block_zone { 74 struct spdk_bdev_zone_info zone_info; 75 pthread_spinlock_t lock; 76 }; 77 78 /* List of block vbdevs and associated info for each. */ 79 struct bdev_zone_block { 80 struct spdk_bdev bdev; /* the block zoned bdev */ 81 struct spdk_bdev_desc *base_desc; /* its descriptor we get from open */ 82 struct block_zone *zones; /* array of zones */ 83 uint64_t num_zones; /* number of zones */ 84 uint64_t zone_capacity; /* zone capacity */ 85 uint64_t zone_shift; /* log2 of zone_size */ 86 TAILQ_ENTRY(bdev_zone_block) link; 87 struct spdk_thread *thread; /* thread where base device is opened */ 88 }; 89 static TAILQ_HEAD(, bdev_zone_block) g_bdev_nodes = TAILQ_HEAD_INITIALIZER(g_bdev_nodes); 90 91 struct zone_block_io_channel { 92 struct spdk_io_channel *base_ch; /* IO channel of base device */ 93 }; 94 95 struct zone_block_io { 96 /* vbdev to which IO was issued */ 97 struct bdev_zone_block *bdev_zone_block; 98 }; 99 100 static int 101 zone_block_init(void) 102 { 103 return 0; 104 } 105 106 static void 107 zone_block_remove_config(struct bdev_zone_block_config *name) 108 { 109 TAILQ_REMOVE(&g_bdev_configs, name, link); 110 free(name->bdev_name); 111 free(name->vbdev_name); 112 free(name); 113 } 114 115 static void 116 zone_block_finish(void) 117 { 118 struct bdev_zone_block_config *name; 119 120 while ((name = TAILQ_FIRST(&g_bdev_configs))) { 121 zone_block_remove_config(name); 122 } 123 } 124 125 static int 126 zone_block_get_ctx_size(void) 127 { 128 return sizeof(struct zone_block_io); 129 } 130 131 static int 132 zone_block_config_json(struct spdk_json_write_ctx *w) 133 { 134 struct bdev_zone_block *bdev_node; 135 struct spdk_bdev *base_bdev = NULL; 136 137 TAILQ_FOREACH(bdev_node, &g_bdev_nodes, link) { 138 base_bdev = spdk_bdev_desc_get_bdev(bdev_node->base_desc); 139 spdk_json_write_object_begin(w); 140 spdk_json_write_named_string(w, "method", "bdev_zone_block_create"); 141 spdk_json_write_named_object_begin(w, "params"); 142 spdk_json_write_named_string(w, "base_bdev", spdk_bdev_get_name(base_bdev)); 143 spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&bdev_node->bdev)); 144 spdk_json_write_named_uint64(w, "zone_capacity", bdev_node->zone_capacity); 145 spdk_json_write_named_uint64(w, "optimal_open_zones", bdev_node->bdev.optimal_open_zones); 146 spdk_json_write_object_end(w); 147 spdk_json_write_object_end(w); 148 } 149 150 return 0; 151 } 152 153 /* Callback for unregistering the IO device. */ 154 static void 155 _device_unregister_cb(void *io_device) 156 { 157 struct bdev_zone_block *bdev_node = io_device; 158 uint64_t i; 159 160 free(bdev_node->bdev.name); 161 for (i = 0; i < bdev_node->num_zones; i++) { 162 pthread_spin_destroy(&bdev_node->zones[i].lock); 163 } 164 free(bdev_node->zones); 165 free(bdev_node); 166 } 167 168 static void 169 _zone_block_destruct(void *ctx) 170 { 171 struct spdk_bdev_desc *desc = ctx; 172 173 spdk_bdev_close(desc); 174 } 175 176 static int 177 zone_block_destruct(void *ctx) 178 { 179 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 180 181 TAILQ_REMOVE(&g_bdev_nodes, bdev_node, link); 182 183 /* Unclaim the underlying bdev. */ 184 spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(bdev_node->base_desc)); 185 186 /* Close the underlying bdev on its same opened thread. */ 187 if (bdev_node->thread && bdev_node->thread != spdk_get_thread()) { 188 spdk_thread_send_msg(bdev_node->thread, _zone_block_destruct, bdev_node->base_desc); 189 } else { 190 spdk_bdev_close(bdev_node->base_desc); 191 } 192 193 /* Unregister the io_device. */ 194 spdk_io_device_unregister(bdev_node, _device_unregister_cb); 195 196 return 0; 197 } 198 199 static struct block_zone * 200 zone_block_get_zone_containing_lba(struct bdev_zone_block *bdev_node, uint64_t lba) 201 { 202 size_t index = lba >> bdev_node->zone_shift; 203 204 if (index >= bdev_node->num_zones) { 205 return NULL; 206 } 207 208 return &bdev_node->zones[index]; 209 } 210 211 static struct block_zone * 212 zone_block_get_zone_by_slba(struct bdev_zone_block *bdev_node, uint64_t start_lba) 213 { 214 struct block_zone *zone = zone_block_get_zone_containing_lba(bdev_node, start_lba); 215 216 if (zone && zone->zone_info.zone_id == start_lba) { 217 return zone; 218 } else { 219 return NULL; 220 } 221 } 222 223 static int 224 zone_block_get_zone_info(struct bdev_zone_block *bdev_node, struct spdk_bdev_io *bdev_io) 225 { 226 struct block_zone *zone; 227 struct spdk_bdev_zone_info *zone_info = bdev_io->u.zone_mgmt.buf; 228 uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id; 229 size_t i; 230 231 /* User can request info for more zones than exist, need to check both internal and user 232 * boundaries 233 */ 234 for (i = 0; i < bdev_io->u.zone_mgmt.num_zones; i++, zone_id += bdev_node->bdev.zone_size) { 235 zone = zone_block_get_zone_by_slba(bdev_node, zone_id); 236 if (!zone) { 237 return -EINVAL; 238 } 239 memcpy(&zone_info[i], &zone->zone_info, sizeof(*zone_info)); 240 } 241 242 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 243 return 0; 244 } 245 246 static int 247 zone_block_open_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 248 { 249 pthread_spin_lock(&zone->lock); 250 251 switch (zone->zone_info.state) { 252 case SPDK_BDEV_ZONE_STATE_EMPTY: 253 case SPDK_BDEV_ZONE_STATE_OPEN: 254 case SPDK_BDEV_ZONE_STATE_CLOSED: 255 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_OPEN; 256 pthread_spin_unlock(&zone->lock); 257 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 258 return 0; 259 default: 260 pthread_spin_unlock(&zone->lock); 261 return -EINVAL; 262 } 263 } 264 265 static void 266 _zone_block_complete_unmap(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 267 { 268 struct spdk_bdev_io *orig_io = cb_arg; 269 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 270 271 /* Complete the original IO and then free the one that we created here 272 * as a result of issuing an IO via submit_request. 273 */ 274 spdk_bdev_io_complete(orig_io, status); 275 spdk_bdev_free_io(bdev_io); 276 } 277 278 static int 279 zone_block_reset_zone(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 280 struct block_zone *zone, struct spdk_bdev_io *bdev_io) 281 { 282 pthread_spin_lock(&zone->lock); 283 284 switch (zone->zone_info.state) { 285 case SPDK_BDEV_ZONE_STATE_EMPTY: 286 pthread_spin_unlock(&zone->lock); 287 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 288 return 0; 289 case SPDK_BDEV_ZONE_STATE_OPEN: 290 case SPDK_BDEV_ZONE_STATE_FULL: 291 case SPDK_BDEV_ZONE_STATE_CLOSED: 292 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_EMPTY; 293 zone->zone_info.write_pointer = zone->zone_info.zone_id; 294 pthread_spin_unlock(&zone->lock); 295 296 /* The unmap isn't necessary, so if the base bdev doesn't support it, we're done */ 297 if (!spdk_bdev_io_type_supported(spdk_bdev_desc_get_bdev(bdev_node->base_desc), 298 SPDK_BDEV_IO_TYPE_UNMAP)) { 299 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 300 return 0; 301 } 302 303 return spdk_bdev_unmap_blocks(bdev_node->base_desc, ch->base_ch, 304 zone->zone_info.zone_id, zone->zone_info.capacity, 305 _zone_block_complete_unmap, bdev_io); 306 default: 307 pthread_spin_unlock(&zone->lock); 308 return -EINVAL; 309 } 310 } 311 312 static int 313 zone_block_close_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 314 { 315 pthread_spin_lock(&zone->lock); 316 317 switch (zone->zone_info.state) { 318 case SPDK_BDEV_ZONE_STATE_OPEN: 319 case SPDK_BDEV_ZONE_STATE_CLOSED: 320 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_CLOSED; 321 pthread_spin_unlock(&zone->lock); 322 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 323 return 0; 324 default: 325 pthread_spin_unlock(&zone->lock); 326 return -EINVAL; 327 } 328 } 329 330 static int 331 zone_block_finish_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 332 { 333 pthread_spin_lock(&zone->lock); 334 335 zone->zone_info.write_pointer = zone->zone_info.zone_id + zone->zone_info.capacity; 336 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 337 338 pthread_spin_unlock(&zone->lock); 339 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 340 return 0; 341 } 342 343 static int 344 zone_block_zone_management(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 345 struct spdk_bdev_io *bdev_io) 346 { 347 struct block_zone *zone; 348 349 zone = zone_block_get_zone_by_slba(bdev_node, bdev_io->u.zone_mgmt.zone_id); 350 if (!zone) { 351 return -EINVAL; 352 } 353 354 switch (bdev_io->u.zone_mgmt.zone_action) { 355 case SPDK_BDEV_ZONE_RESET: 356 return zone_block_reset_zone(bdev_node, ch, zone, bdev_io); 357 case SPDK_BDEV_ZONE_OPEN: 358 return zone_block_open_zone(zone, bdev_io); 359 case SPDK_BDEV_ZONE_CLOSE: 360 return zone_block_close_zone(zone, bdev_io); 361 case SPDK_BDEV_ZONE_FINISH: 362 return zone_block_finish_zone(zone, bdev_io); 363 default: 364 return -EINVAL; 365 } 366 } 367 368 static void 369 _zone_block_complete_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 370 { 371 struct spdk_bdev_io *orig_io = cb_arg; 372 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 373 374 if (success && orig_io->type == SPDK_BDEV_IO_TYPE_ZONE_APPEND) { 375 orig_io->u.bdev.offset_blocks = bdev_io->u.bdev.offset_blocks; 376 } 377 378 /* Complete the original IO and then free the one that we created here 379 * as a result of issuing an IO via submit_request. 380 */ 381 spdk_bdev_io_complete(orig_io, status); 382 spdk_bdev_free_io(bdev_io); 383 } 384 385 static int 386 zone_block_write(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 387 struct spdk_bdev_io *bdev_io) 388 { 389 struct block_zone *zone; 390 uint64_t len = bdev_io->u.bdev.num_blocks; 391 uint64_t lba = bdev_io->u.bdev.offset_blocks; 392 uint64_t num_blocks_left, wp; 393 int rc = 0; 394 bool is_append = bdev_io->type == SPDK_BDEV_IO_TYPE_ZONE_APPEND; 395 396 if (is_append) { 397 zone = zone_block_get_zone_by_slba(bdev_node, lba); 398 } else { 399 zone = zone_block_get_zone_containing_lba(bdev_node, lba); 400 } 401 if (!zone) { 402 SPDK_ERRLOG("Trying to write to invalid zone (lba 0x%" PRIx64 ")\n", lba); 403 return -EINVAL; 404 } 405 406 pthread_spin_lock(&zone->lock); 407 408 switch (zone->zone_info.state) { 409 case SPDK_BDEV_ZONE_STATE_OPEN: 410 case SPDK_BDEV_ZONE_STATE_EMPTY: 411 case SPDK_BDEV_ZONE_STATE_CLOSED: 412 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_OPEN; 413 break; 414 default: 415 SPDK_ERRLOG("Trying to write to zone in invalid state %u\n", zone->zone_info.state); 416 rc = -EINVAL; 417 goto write_fail; 418 } 419 420 wp = zone->zone_info.write_pointer; 421 if (is_append) { 422 lba = wp; 423 } else { 424 if (lba != wp) { 425 SPDK_ERRLOG("Trying to write to zone with invalid address (lba 0x%" PRIx64 ", wp 0x%" PRIx64 ")\n", 426 lba, wp); 427 rc = -EINVAL; 428 goto write_fail; 429 } 430 } 431 432 num_blocks_left = zone->zone_info.zone_id + zone->zone_info.capacity - wp; 433 if (len > num_blocks_left) { 434 SPDK_ERRLOG("Write exceeds zone capacity (lba 0x%" PRIx64 ", len 0x%" PRIx64 ", wp 0x%" PRIx64 435 ")\n", lba, len, wp); 436 rc = -EINVAL; 437 goto write_fail; 438 } 439 440 zone->zone_info.write_pointer += bdev_io->u.bdev.num_blocks; 441 assert(zone->zone_info.write_pointer <= zone->zone_info.zone_id + zone->zone_info.capacity); 442 if (zone->zone_info.write_pointer == zone->zone_info.zone_id + zone->zone_info.capacity) { 443 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 444 } 445 pthread_spin_unlock(&zone->lock); 446 447 if (bdev_io->u.bdev.md_buf == NULL) { 448 rc = spdk_bdev_writev_blocks(bdev_node->base_desc, ch->base_ch, bdev_io->u.bdev.iovs, 449 bdev_io->u.bdev.iovcnt, lba, 450 bdev_io->u.bdev.num_blocks, _zone_block_complete_write, 451 bdev_io); 452 } else { 453 rc = spdk_bdev_writev_blocks_with_md(bdev_node->base_desc, ch->base_ch, 454 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 455 bdev_io->u.bdev.md_buf, 456 lba, bdev_io->u.bdev.num_blocks, 457 _zone_block_complete_write, bdev_io); 458 } 459 460 return rc; 461 462 write_fail: 463 pthread_spin_unlock(&zone->lock); 464 return rc; 465 } 466 467 static void 468 _zone_block_complete_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 469 { 470 struct spdk_bdev_io *orig_io = cb_arg; 471 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 472 473 /* Complete the original IO and then free the one that we created here 474 * as a result of issuing an IO via submit_request. 475 */ 476 spdk_bdev_io_complete(orig_io, status); 477 spdk_bdev_free_io(bdev_io); 478 } 479 480 static int 481 zone_block_read(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 482 struct spdk_bdev_io *bdev_io) 483 { 484 struct block_zone *zone; 485 uint64_t len = bdev_io->u.bdev.num_blocks; 486 uint64_t lba = bdev_io->u.bdev.offset_blocks; 487 int rc; 488 489 zone = zone_block_get_zone_containing_lba(bdev_node, lba); 490 if (!zone) { 491 SPDK_ERRLOG("Trying to read from invalid zone (lba 0x%" PRIx64 ")\n", lba); 492 return -EINVAL; 493 } 494 495 if ((lba + len) > (zone->zone_info.zone_id + zone->zone_info.capacity)) { 496 SPDK_ERRLOG("Read exceeds zone capacity (lba 0x%" PRIx64 ", len 0x%" PRIx64 ")\n", lba, len); 497 return -EINVAL; 498 } 499 500 if (bdev_io->u.bdev.md_buf == NULL) { 501 rc = spdk_bdev_readv_blocks(bdev_node->base_desc, ch->base_ch, bdev_io->u.bdev.iovs, 502 bdev_io->u.bdev.iovcnt, lba, 503 len, _zone_block_complete_read, 504 bdev_io); 505 } else { 506 rc = spdk_bdev_readv_blocks_with_md(bdev_node->base_desc, ch->base_ch, 507 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 508 bdev_io->u.bdev.md_buf, 509 lba, len, 510 _zone_block_complete_read, bdev_io); 511 } 512 513 return rc; 514 } 515 516 static void 517 zone_block_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 518 { 519 struct bdev_zone_block *bdev_node = SPDK_CONTAINEROF(bdev_io->bdev, struct bdev_zone_block, bdev); 520 struct zone_block_io_channel *dev_ch = spdk_io_channel_get_ctx(ch); 521 int rc = 0; 522 523 switch (bdev_io->type) { 524 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 525 rc = zone_block_get_zone_info(bdev_node, bdev_io); 526 break; 527 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 528 rc = zone_block_zone_management(bdev_node, dev_ch, bdev_io); 529 break; 530 case SPDK_BDEV_IO_TYPE_WRITE: 531 case SPDK_BDEV_IO_TYPE_ZONE_APPEND: 532 rc = zone_block_write(bdev_node, dev_ch, bdev_io); 533 break; 534 case SPDK_BDEV_IO_TYPE_READ: 535 rc = zone_block_read(bdev_node, dev_ch, bdev_io); 536 break; 537 default: 538 SPDK_ERRLOG("vbdev_block: unknown I/O type %u\n", bdev_io->type); 539 rc = -ENOTSUP; 540 break; 541 } 542 543 if (rc != 0) { 544 if (rc == -ENOMEM) { 545 SPDK_WARNLOG("ENOMEM, start to queue io for vbdev.\n"); 546 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 547 } else { 548 SPDK_ERRLOG("ERROR on bdev_io submission!\n"); 549 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 550 } 551 } 552 } 553 554 static bool 555 zone_block_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 556 { 557 switch (io_type) { 558 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 559 case SPDK_BDEV_IO_TYPE_WRITE: 560 case SPDK_BDEV_IO_TYPE_READ: 561 case SPDK_BDEV_IO_TYPE_ZONE_APPEND: 562 return true; 563 default: 564 return false; 565 } 566 } 567 568 static struct spdk_io_channel * 569 zone_block_get_io_channel(void *ctx) 570 { 571 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 572 573 return spdk_get_io_channel(bdev_node); 574 } 575 576 static int 577 zone_block_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 578 { 579 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 580 struct spdk_bdev *base_bdev = spdk_bdev_desc_get_bdev(bdev_node->base_desc); 581 582 spdk_json_write_name(w, "zoned_block"); 583 spdk_json_write_object_begin(w); 584 spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&bdev_node->bdev)); 585 spdk_json_write_named_string(w, "base_bdev", spdk_bdev_get_name(base_bdev)); 586 spdk_json_write_named_uint64(w, "zone_capacity", bdev_node->zone_capacity); 587 spdk_json_write_named_uint64(w, "optimal_open_zones", bdev_node->bdev.optimal_open_zones); 588 spdk_json_write_object_end(w); 589 590 return 0; 591 } 592 593 /* When we register our vbdev this is how we specify our entry points. */ 594 static const struct spdk_bdev_fn_table zone_block_fn_table = { 595 .destruct = zone_block_destruct, 596 .submit_request = zone_block_submit_request, 597 .io_type_supported = zone_block_io_type_supported, 598 .get_io_channel = zone_block_get_io_channel, 599 .dump_info_json = zone_block_dump_info_json, 600 }; 601 602 static void 603 zone_block_base_bdev_hotremove_cb(struct spdk_bdev *bdev_find) 604 { 605 struct bdev_zone_block *bdev_node, *tmp; 606 607 TAILQ_FOREACH_SAFE(bdev_node, &g_bdev_nodes, link, tmp) { 608 if (bdev_find == spdk_bdev_desc_get_bdev(bdev_node->base_desc)) { 609 spdk_bdev_unregister(&bdev_node->bdev, NULL, NULL); 610 } 611 } 612 } 613 614 static void 615 zone_block_base_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 616 void *event_ctx) 617 { 618 switch (type) { 619 case SPDK_BDEV_EVENT_REMOVE: 620 zone_block_base_bdev_hotremove_cb(bdev); 621 break; 622 default: 623 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 624 break; 625 } 626 } 627 628 static int 629 _zone_block_ch_create_cb(void *io_device, void *ctx_buf) 630 { 631 struct zone_block_io_channel *bdev_ch = ctx_buf; 632 struct bdev_zone_block *bdev_node = io_device; 633 634 bdev_ch->base_ch = spdk_bdev_get_io_channel(bdev_node->base_desc); 635 if (!bdev_ch->base_ch) { 636 return -ENOMEM; 637 } 638 639 return 0; 640 } 641 642 static void 643 _zone_block_ch_destroy_cb(void *io_device, void *ctx_buf) 644 { 645 struct zone_block_io_channel *bdev_ch = ctx_buf; 646 647 spdk_put_io_channel(bdev_ch->base_ch); 648 } 649 650 static int 651 zone_block_insert_name(const char *bdev_name, const char *vbdev_name, uint64_t zone_capacity, 652 uint64_t optimal_open_zones) 653 { 654 struct bdev_zone_block_config *name; 655 656 TAILQ_FOREACH(name, &g_bdev_configs, link) { 657 if (strcmp(vbdev_name, name->vbdev_name) == 0) { 658 SPDK_ERRLOG("block zoned bdev %s already exists\n", vbdev_name); 659 return -EEXIST; 660 } 661 if (strcmp(bdev_name, name->bdev_name) == 0) { 662 SPDK_ERRLOG("base bdev %s already claimed\n", bdev_name); 663 return -EEXIST; 664 } 665 } 666 667 name = calloc(1, sizeof(*name)); 668 if (!name) { 669 SPDK_ERRLOG("could not allocate bdev_names\n"); 670 return -ENOMEM; 671 } 672 673 name->bdev_name = strdup(bdev_name); 674 if (!name->bdev_name) { 675 SPDK_ERRLOG("could not allocate name->bdev_name\n"); 676 free(name); 677 return -ENOMEM; 678 } 679 680 name->vbdev_name = strdup(vbdev_name); 681 if (!name->vbdev_name) { 682 SPDK_ERRLOG("could not allocate name->vbdev_name\n"); 683 free(name->bdev_name); 684 free(name); 685 return -ENOMEM; 686 } 687 688 name->zone_capacity = zone_capacity; 689 name->optimal_open_zones = optimal_open_zones; 690 691 TAILQ_INSERT_TAIL(&g_bdev_configs, name, link); 692 693 return 0; 694 } 695 696 static int 697 zone_block_init_zone_info(struct bdev_zone_block *bdev_node) 698 { 699 size_t i; 700 struct block_zone *zone; 701 int rc = 0; 702 703 for (i = 0; i < bdev_node->num_zones; i++) { 704 zone = &bdev_node->zones[i]; 705 zone->zone_info.zone_id = bdev_node->bdev.zone_size * i; 706 zone->zone_info.capacity = bdev_node->zone_capacity; 707 zone->zone_info.write_pointer = zone->zone_info.zone_id + zone->zone_info.capacity; 708 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 709 if (pthread_spin_init(&zone->lock, PTHREAD_PROCESS_PRIVATE)) { 710 SPDK_ERRLOG("pthread_spin_init() failed\n"); 711 rc = -ENOMEM; 712 break; 713 } 714 } 715 716 if (rc) { 717 for (; i > 0; i--) { 718 pthread_spin_destroy(&bdev_node->zones[i - 1].lock); 719 } 720 } 721 722 return rc; 723 } 724 725 static int 726 zone_block_register(const char *base_bdev_name) 727 { 728 struct spdk_bdev_desc *base_desc; 729 struct spdk_bdev *base_bdev; 730 struct bdev_zone_block_config *name, *tmp; 731 struct bdev_zone_block *bdev_node; 732 uint64_t zone_size; 733 int rc = 0; 734 735 /* Check our list of names from config versus this bdev and if 736 * there's a match, create the bdev_node & bdev accordingly. 737 */ 738 TAILQ_FOREACH_SAFE(name, &g_bdev_configs, link, tmp) { 739 if (strcmp(name->bdev_name, base_bdev_name) != 0) { 740 continue; 741 } 742 743 rc = spdk_bdev_open_ext(base_bdev_name, true, zone_block_base_bdev_event_cb, 744 NULL, &base_desc); 745 if (rc == -ENODEV) { 746 return -ENODEV; 747 } else if (rc) { 748 SPDK_ERRLOG("could not open bdev %s\n", base_bdev_name); 749 goto free_config; 750 } 751 752 base_bdev = spdk_bdev_desc_get_bdev(base_desc); 753 754 if (spdk_bdev_is_zoned(base_bdev)) { 755 SPDK_ERRLOG("Base bdev %s is already a zoned bdev\n", base_bdev_name); 756 rc = -EEXIST; 757 goto zone_exist; 758 } 759 760 bdev_node = calloc(1, sizeof(struct bdev_zone_block)); 761 if (!bdev_node) { 762 rc = -ENOMEM; 763 SPDK_ERRLOG("could not allocate bdev_node\n"); 764 goto zone_exist; 765 } 766 767 bdev_node->base_desc = base_desc; 768 769 /* The base bdev that we're attaching to. */ 770 bdev_node->bdev.name = strdup(name->vbdev_name); 771 if (!bdev_node->bdev.name) { 772 rc = -ENOMEM; 773 SPDK_ERRLOG("could not allocate bdev_node name\n"); 774 goto strdup_failed; 775 } 776 777 zone_size = spdk_align64pow2(name->zone_capacity); 778 if (zone_size == 0) { 779 rc = -EINVAL; 780 SPDK_ERRLOG("invalid zone size\n"); 781 goto roundup_failed; 782 } 783 784 bdev_node->zone_shift = spdk_u64log2(zone_size); 785 bdev_node->num_zones = base_bdev->blockcnt / zone_size; 786 787 bdev_node->zones = calloc(bdev_node->num_zones, sizeof(struct block_zone)); 788 if (!bdev_node->zones) { 789 rc = -ENOMEM; 790 SPDK_ERRLOG("could not allocate zones\n"); 791 goto calloc_failed; 792 } 793 794 bdev_node->bdev.product_name = "zone_block"; 795 796 /* Copy some properties from the underlying base bdev. */ 797 bdev_node->bdev.write_cache = base_bdev->write_cache; 798 bdev_node->bdev.required_alignment = base_bdev->required_alignment; 799 bdev_node->bdev.optimal_io_boundary = base_bdev->optimal_io_boundary; 800 801 bdev_node->bdev.blocklen = base_bdev->blocklen; 802 bdev_node->bdev.blockcnt = bdev_node->num_zones * zone_size; 803 804 if (bdev_node->num_zones * name->zone_capacity != base_bdev->blockcnt) { 805 SPDK_DEBUGLOG(vbdev_zone_block, 806 "Lost %" PRIu64 " blocks due to zone capacity and base bdev size misalignment\n", 807 base_bdev->blockcnt - bdev_node->num_zones * name->zone_capacity); 808 } 809 810 bdev_node->bdev.write_unit_size = base_bdev->write_unit_size; 811 812 bdev_node->bdev.md_interleave = base_bdev->md_interleave; 813 bdev_node->bdev.md_len = base_bdev->md_len; 814 bdev_node->bdev.dif_type = base_bdev->dif_type; 815 bdev_node->bdev.dif_is_head_of_md = base_bdev->dif_is_head_of_md; 816 bdev_node->bdev.dif_check_flags = base_bdev->dif_check_flags; 817 818 bdev_node->bdev.zoned = true; 819 bdev_node->bdev.ctxt = bdev_node; 820 bdev_node->bdev.fn_table = &zone_block_fn_table; 821 bdev_node->bdev.module = &bdev_zoned_if; 822 823 /* bdev specific info */ 824 bdev_node->bdev.zone_size = zone_size; 825 826 bdev_node->zone_capacity = name->zone_capacity; 827 bdev_node->bdev.optimal_open_zones = name->optimal_open_zones; 828 bdev_node->bdev.max_open_zones = 0; 829 rc = zone_block_init_zone_info(bdev_node); 830 if (rc) { 831 SPDK_ERRLOG("could not init zone info\n"); 832 goto zone_info_failed; 833 } 834 835 TAILQ_INSERT_TAIL(&g_bdev_nodes, bdev_node, link); 836 837 spdk_io_device_register(bdev_node, _zone_block_ch_create_cb, _zone_block_ch_destroy_cb, 838 sizeof(struct zone_block_io_channel), 839 name->vbdev_name); 840 841 /* Save the thread where the base device is opened */ 842 bdev_node->thread = spdk_get_thread(); 843 844 rc = spdk_bdev_module_claim_bdev(base_bdev, base_desc, bdev_node->bdev.module); 845 if (rc) { 846 SPDK_ERRLOG("could not claim bdev %s\n", base_bdev_name); 847 goto claim_failed; 848 } 849 850 rc = spdk_bdev_register(&bdev_node->bdev); 851 if (rc) { 852 SPDK_ERRLOG("could not register zoned bdev\n"); 853 goto register_failed; 854 } 855 } 856 857 return rc; 858 859 register_failed: 860 spdk_bdev_module_release_bdev(&bdev_node->bdev); 861 claim_failed: 862 TAILQ_REMOVE(&g_bdev_nodes, bdev_node, link); 863 spdk_io_device_unregister(bdev_node, NULL); 864 zone_info_failed: 865 free(bdev_node->zones); 866 calloc_failed: 867 roundup_failed: 868 free(bdev_node->bdev.name); 869 strdup_failed: 870 free(bdev_node); 871 zone_exist: 872 spdk_bdev_close(base_desc); 873 free_config: 874 zone_block_remove_config(name); 875 return rc; 876 } 877 878 int 879 vbdev_zone_block_create(const char *bdev_name, const char *vbdev_name, uint64_t zone_capacity, 880 uint64_t optimal_open_zones) 881 { 882 int rc = 0; 883 884 if (zone_capacity == 0) { 885 SPDK_ERRLOG("Zone capacity can't be 0\n"); 886 return -EINVAL; 887 } 888 889 if (optimal_open_zones == 0) { 890 SPDK_ERRLOG("Optimal open zones can't be 0\n"); 891 return -EINVAL; 892 } 893 894 /* Insert the bdev into our global name list even if it doesn't exist yet, 895 * it may show up soon... 896 */ 897 rc = zone_block_insert_name(bdev_name, vbdev_name, zone_capacity, optimal_open_zones); 898 if (rc) { 899 return rc; 900 } 901 902 rc = zone_block_register(bdev_name); 903 if (rc == -ENODEV) { 904 /* This is not an error, even though the bdev is not present at this time it may 905 * still show up later. 906 */ 907 rc = 0; 908 } 909 return rc; 910 } 911 912 void 913 vbdev_zone_block_delete(const char *name, spdk_bdev_unregister_cb cb_fn, void *cb_arg) 914 { 915 struct bdev_zone_block_config *name_node; 916 struct spdk_bdev *bdev = NULL; 917 918 bdev = spdk_bdev_get_by_name(name); 919 if (!bdev || bdev->module != &bdev_zoned_if) { 920 cb_fn(cb_arg, -ENODEV); 921 return; 922 } 923 924 TAILQ_FOREACH(name_node, &g_bdev_configs, link) { 925 if (strcmp(name_node->vbdev_name, bdev->name) == 0) { 926 zone_block_remove_config(name_node); 927 break; 928 } 929 } 930 931 spdk_bdev_unregister(bdev, cb_fn, cb_arg); 932 } 933 934 static void 935 zone_block_examine(struct spdk_bdev *bdev) 936 { 937 zone_block_register(bdev->name); 938 939 spdk_bdev_module_examine_done(&bdev_zoned_if); 940 } 941 942 SPDK_LOG_REGISTER_COMPONENT(vbdev_zone_block) 943