1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "vbdev_zone_block.h" 37 38 #include "spdk/config.h" 39 #include "spdk/nvme.h" 40 #include "spdk/bdev_zone.h" 41 42 #include "spdk/log.h" 43 44 static int zone_block_init(void); 45 static int zone_block_get_ctx_size(void); 46 static void zone_block_finish(void); 47 static int zone_block_config_json(struct spdk_json_write_ctx *w); 48 static void zone_block_examine(struct spdk_bdev *bdev); 49 50 static struct spdk_bdev_module bdev_zoned_if = { 51 .name = "bdev_zoned_block", 52 .module_init = zone_block_init, 53 .module_fini = zone_block_finish, 54 .config_json = zone_block_config_json, 55 .examine_config = zone_block_examine, 56 .get_ctx_size = zone_block_get_ctx_size, 57 }; 58 59 SPDK_BDEV_MODULE_REGISTER(bdev_zoned_block, &bdev_zoned_if) 60 61 /* List of block vbdev names and their base bdevs via configuration file. 62 * Used so we can parse the conf once at init and use this list in examine(). 63 */ 64 struct bdev_zone_block_config { 65 char *vbdev_name; 66 char *bdev_name; 67 uint64_t zone_capacity; 68 uint64_t optimal_open_zones; 69 TAILQ_ENTRY(bdev_zone_block_config) link; 70 }; 71 static TAILQ_HEAD(, bdev_zone_block_config) g_bdev_configs = TAILQ_HEAD_INITIALIZER(g_bdev_configs); 72 73 struct block_zone { 74 struct spdk_bdev_zone_info zone_info; 75 pthread_spinlock_t lock; 76 }; 77 78 /* List of block vbdevs and associated info for each. */ 79 struct bdev_zone_block { 80 struct spdk_bdev bdev; /* the block zoned bdev */ 81 struct spdk_bdev_desc *base_desc; /* its descriptor we get from open */ 82 struct block_zone *zones; /* array of zones */ 83 uint64_t num_zones; /* number of zones */ 84 uint64_t zone_capacity; /* zone capacity */ 85 uint64_t zone_shift; /* log2 of zone_size */ 86 TAILQ_ENTRY(bdev_zone_block) link; 87 struct spdk_thread *thread; /* thread where base device is opened */ 88 }; 89 static TAILQ_HEAD(, bdev_zone_block) g_bdev_nodes = TAILQ_HEAD_INITIALIZER(g_bdev_nodes); 90 91 struct zone_block_io_channel { 92 struct spdk_io_channel *base_ch; /* IO channel of base device */ 93 }; 94 95 struct zone_block_io { 96 /* vbdev to which IO was issued */ 97 struct bdev_zone_block *bdev_zone_block; 98 }; 99 100 static int 101 zone_block_init(void) 102 { 103 return 0; 104 } 105 106 static void 107 zone_block_remove_config(struct bdev_zone_block_config *name) 108 { 109 TAILQ_REMOVE(&g_bdev_configs, name, link); 110 free(name->bdev_name); 111 free(name->vbdev_name); 112 free(name); 113 } 114 115 static void 116 zone_block_finish(void) 117 { 118 struct bdev_zone_block_config *name; 119 120 while ((name = TAILQ_FIRST(&g_bdev_configs))) { 121 zone_block_remove_config(name); 122 } 123 } 124 125 static int 126 zone_block_get_ctx_size(void) 127 { 128 return sizeof(struct zone_block_io); 129 } 130 131 static int 132 zone_block_config_json(struct spdk_json_write_ctx *w) 133 { 134 struct bdev_zone_block *bdev_node; 135 struct spdk_bdev *base_bdev = NULL; 136 137 TAILQ_FOREACH(bdev_node, &g_bdev_nodes, link) { 138 base_bdev = spdk_bdev_desc_get_bdev(bdev_node->base_desc); 139 spdk_json_write_object_begin(w); 140 spdk_json_write_named_string(w, "method", "bdev_zone_block_create"); 141 spdk_json_write_named_object_begin(w, "params"); 142 spdk_json_write_named_string(w, "base_bdev", spdk_bdev_get_name(base_bdev)); 143 spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&bdev_node->bdev)); 144 spdk_json_write_named_uint64(w, "zone_capacity", bdev_node->zone_capacity); 145 spdk_json_write_named_uint64(w, "optimal_open_zones", bdev_node->bdev.optimal_open_zones); 146 spdk_json_write_object_end(w); 147 spdk_json_write_object_end(w); 148 } 149 150 return 0; 151 } 152 153 /* Callback for unregistering the IO device. */ 154 static void 155 _device_unregister_cb(void *io_device) 156 { 157 struct bdev_zone_block *bdev_node = io_device; 158 uint64_t i; 159 160 free(bdev_node->bdev.name); 161 for (i = 0; i < bdev_node->num_zones; i++) { 162 pthread_spin_destroy(&bdev_node->zones[i].lock); 163 } 164 free(bdev_node->zones); 165 free(bdev_node); 166 } 167 168 static void 169 _zone_block_destruct(void *ctx) 170 { 171 struct spdk_bdev_desc *desc = ctx; 172 173 spdk_bdev_close(desc); 174 } 175 176 static int 177 zone_block_destruct(void *ctx) 178 { 179 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 180 181 TAILQ_REMOVE(&g_bdev_nodes, bdev_node, link); 182 183 /* Unclaim the underlying bdev. */ 184 spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(bdev_node->base_desc)); 185 186 /* Close the underlying bdev on its same opened thread. */ 187 if (bdev_node->thread && bdev_node->thread != spdk_get_thread()) { 188 spdk_thread_send_msg(bdev_node->thread, _zone_block_destruct, bdev_node->base_desc); 189 } else { 190 spdk_bdev_close(bdev_node->base_desc); 191 } 192 193 /* Unregister the io_device. */ 194 spdk_io_device_unregister(bdev_node, _device_unregister_cb); 195 196 return 0; 197 } 198 199 static struct block_zone * 200 zone_block_get_zone_containing_lba(struct bdev_zone_block *bdev_node, uint64_t lba) 201 { 202 size_t index = lba >> bdev_node->zone_shift; 203 204 if (index >= bdev_node->num_zones) { 205 return NULL; 206 } 207 208 return &bdev_node->zones[index]; 209 } 210 211 static struct block_zone * 212 zone_block_get_zone_by_slba(struct bdev_zone_block *bdev_node, uint64_t start_lba) 213 { 214 struct block_zone *zone = zone_block_get_zone_containing_lba(bdev_node, start_lba); 215 216 if (zone && zone->zone_info.zone_id == start_lba) { 217 return zone; 218 } else { 219 return NULL; 220 } 221 } 222 223 static int 224 zone_block_get_zone_info(struct bdev_zone_block *bdev_node, struct spdk_bdev_io *bdev_io) 225 { 226 struct block_zone *zone; 227 struct spdk_bdev_zone_info *zone_info = bdev_io->u.zone_mgmt.buf; 228 uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id; 229 size_t i; 230 231 /* User can request info for more zones than exist, need to check both internal and user 232 * boundaries 233 */ 234 for (i = 0; i < bdev_io->u.zone_mgmt.num_zones; i++, zone_id += bdev_node->bdev.zone_size) { 235 zone = zone_block_get_zone_by_slba(bdev_node, zone_id); 236 if (!zone) { 237 return -EINVAL; 238 } 239 memcpy(&zone_info[i], &zone->zone_info, sizeof(*zone_info)); 240 } 241 242 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 243 return 0; 244 } 245 246 static int 247 zone_block_open_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 248 { 249 pthread_spin_lock(&zone->lock); 250 251 switch (zone->zone_info.state) { 252 case SPDK_BDEV_ZONE_STATE_EMPTY: 253 case SPDK_BDEV_ZONE_STATE_OPEN: 254 case SPDK_BDEV_ZONE_STATE_CLOSED: 255 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_OPEN; 256 pthread_spin_unlock(&zone->lock); 257 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 258 return 0; 259 default: 260 pthread_spin_unlock(&zone->lock); 261 return -EINVAL; 262 } 263 } 264 265 static void 266 _zone_block_complete_unmap(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 267 { 268 struct spdk_bdev_io *orig_io = cb_arg; 269 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 270 271 /* Complete the original IO and then free the one that we created here 272 * as a result of issuing an IO via submit_reqeust. 273 */ 274 spdk_bdev_io_complete(orig_io, status); 275 spdk_bdev_free_io(bdev_io); 276 } 277 278 static int 279 zone_block_reset_zone(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 280 struct block_zone *zone, struct spdk_bdev_io *bdev_io) 281 { 282 pthread_spin_lock(&zone->lock); 283 284 switch (zone->zone_info.state) { 285 case SPDK_BDEV_ZONE_STATE_EMPTY: 286 pthread_spin_unlock(&zone->lock); 287 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 288 return 0; 289 case SPDK_BDEV_ZONE_STATE_OPEN: 290 case SPDK_BDEV_ZONE_STATE_FULL: 291 case SPDK_BDEV_ZONE_STATE_CLOSED: 292 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_EMPTY; 293 zone->zone_info.write_pointer = zone->zone_info.zone_id; 294 pthread_spin_unlock(&zone->lock); 295 return spdk_bdev_unmap_blocks(bdev_node->base_desc, ch->base_ch, 296 zone->zone_info.zone_id, zone->zone_info.capacity, 297 _zone_block_complete_unmap, bdev_io); 298 default: 299 pthread_spin_unlock(&zone->lock); 300 return -EINVAL; 301 } 302 } 303 304 static int 305 zone_block_close_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 306 { 307 pthread_spin_lock(&zone->lock); 308 309 switch (zone->zone_info.state) { 310 case SPDK_BDEV_ZONE_STATE_OPEN: 311 case SPDK_BDEV_ZONE_STATE_CLOSED: 312 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_CLOSED; 313 pthread_spin_unlock(&zone->lock); 314 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 315 return 0; 316 default: 317 pthread_spin_unlock(&zone->lock); 318 return -EINVAL; 319 } 320 } 321 322 static int 323 zone_block_finish_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 324 { 325 pthread_spin_lock(&zone->lock); 326 327 zone->zone_info.write_pointer = zone->zone_info.zone_id + zone->zone_info.capacity; 328 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 329 330 pthread_spin_unlock(&zone->lock); 331 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 332 return 0; 333 } 334 335 static int 336 zone_block_zone_management(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 337 struct spdk_bdev_io *bdev_io) 338 { 339 struct block_zone *zone; 340 341 zone = zone_block_get_zone_by_slba(bdev_node, bdev_io->u.zone_mgmt.zone_id); 342 if (!zone) { 343 return -EINVAL; 344 } 345 346 switch (bdev_io->u.zone_mgmt.zone_action) { 347 case SPDK_BDEV_ZONE_RESET: 348 return zone_block_reset_zone(bdev_node, ch, zone, bdev_io); 349 case SPDK_BDEV_ZONE_OPEN: 350 return zone_block_open_zone(zone, bdev_io); 351 case SPDK_BDEV_ZONE_CLOSE: 352 return zone_block_close_zone(zone, bdev_io); 353 case SPDK_BDEV_ZONE_FINISH: 354 return zone_block_finish_zone(zone, bdev_io); 355 default: 356 return -EINVAL; 357 } 358 } 359 360 static void 361 _zone_block_complete_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 362 { 363 struct spdk_bdev_io *orig_io = cb_arg; 364 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 365 366 if (success && orig_io->type == SPDK_BDEV_IO_TYPE_ZONE_APPEND) { 367 orig_io->u.bdev.offset_blocks = bdev_io->u.bdev.offset_blocks; 368 } 369 370 /* Complete the original IO and then free the one that we created here 371 * as a result of issuing an IO via submit_reqeust. 372 */ 373 spdk_bdev_io_complete(orig_io, status); 374 spdk_bdev_free_io(bdev_io); 375 } 376 377 static int 378 zone_block_write(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 379 struct spdk_bdev_io *bdev_io) 380 { 381 struct block_zone *zone; 382 uint64_t len = bdev_io->u.bdev.num_blocks; 383 uint64_t lba = bdev_io->u.bdev.offset_blocks; 384 uint64_t num_blocks_left, wp; 385 int rc = 0; 386 bool is_append = bdev_io->type == SPDK_BDEV_IO_TYPE_ZONE_APPEND; 387 388 if (is_append) { 389 zone = zone_block_get_zone_by_slba(bdev_node, lba); 390 } else { 391 zone = zone_block_get_zone_containing_lba(bdev_node, lba); 392 } 393 if (!zone) { 394 SPDK_ERRLOG("Trying to write to invalid zone (lba 0x%" PRIx64 ")\n", lba); 395 return -EINVAL; 396 } 397 398 pthread_spin_lock(&zone->lock); 399 400 switch (zone->zone_info.state) { 401 case SPDK_BDEV_ZONE_STATE_OPEN: 402 case SPDK_BDEV_ZONE_STATE_EMPTY: 403 case SPDK_BDEV_ZONE_STATE_CLOSED: 404 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_OPEN; 405 break; 406 default: 407 SPDK_ERRLOG("Trying to write to zone in invalid state %u\n", zone->zone_info.state); 408 rc = -EINVAL; 409 goto write_fail; 410 } 411 412 wp = zone->zone_info.write_pointer; 413 if (is_append) { 414 lba = wp; 415 } else { 416 if (lba != wp) { 417 SPDK_ERRLOG("Trying to write to zone with invalid address (lba 0x%" PRIx64 ", wp 0x%" PRIx64 ")\n", 418 lba, wp); 419 rc = -EINVAL; 420 goto write_fail; 421 } 422 } 423 424 num_blocks_left = zone->zone_info.zone_id + zone->zone_info.capacity - wp; 425 if (len > num_blocks_left) { 426 SPDK_ERRLOG("Write exceeds zone capacity (lba 0x%" PRIx64 ", len 0x%" PRIx64 ", wp 0x%" PRIx64 427 ")\n", lba, len, wp); 428 rc = -EINVAL; 429 goto write_fail; 430 } 431 432 zone->zone_info.write_pointer += bdev_io->u.bdev.num_blocks; 433 assert(zone->zone_info.write_pointer <= zone->zone_info.zone_id + zone->zone_info.capacity); 434 if (zone->zone_info.write_pointer == zone->zone_info.zone_id + zone->zone_info.capacity) { 435 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 436 } 437 pthread_spin_unlock(&zone->lock); 438 439 if (bdev_io->u.bdev.md_buf == NULL) { 440 rc = spdk_bdev_writev_blocks(bdev_node->base_desc, ch->base_ch, bdev_io->u.bdev.iovs, 441 bdev_io->u.bdev.iovcnt, lba, 442 bdev_io->u.bdev.num_blocks, _zone_block_complete_write, 443 bdev_io); 444 } else { 445 rc = spdk_bdev_writev_blocks_with_md(bdev_node->base_desc, ch->base_ch, 446 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 447 bdev_io->u.bdev.md_buf, 448 lba, bdev_io->u.bdev.num_blocks, 449 _zone_block_complete_write, bdev_io); 450 } 451 452 return rc; 453 454 write_fail: 455 pthread_spin_unlock(&zone->lock); 456 return rc; 457 } 458 459 static void 460 _zone_block_complete_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 461 { 462 struct spdk_bdev_io *orig_io = cb_arg; 463 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 464 465 /* Complete the original IO and then free the one that we created here 466 * as a result of issuing an IO via submit_reqeust. 467 */ 468 spdk_bdev_io_complete(orig_io, status); 469 spdk_bdev_free_io(bdev_io); 470 } 471 472 static int 473 zone_block_read(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 474 struct spdk_bdev_io *bdev_io) 475 { 476 struct block_zone *zone; 477 uint64_t len = bdev_io->u.bdev.num_blocks; 478 uint64_t lba = bdev_io->u.bdev.offset_blocks; 479 int rc; 480 481 zone = zone_block_get_zone_containing_lba(bdev_node, lba); 482 if (!zone) { 483 SPDK_ERRLOG("Trying to read from invalid zone (lba 0x%" PRIx64 ")\n", lba); 484 return -EINVAL; 485 } 486 487 if ((lba + len) > (zone->zone_info.zone_id + zone->zone_info.capacity)) { 488 SPDK_ERRLOG("Read exceeds zone capacity (lba 0x%" PRIx64 ", len 0x%" PRIx64 ")\n", lba, len); 489 return -EINVAL; 490 } 491 492 if (bdev_io->u.bdev.md_buf == NULL) { 493 rc = spdk_bdev_readv_blocks(bdev_node->base_desc, ch->base_ch, bdev_io->u.bdev.iovs, 494 bdev_io->u.bdev.iovcnt, lba, 495 len, _zone_block_complete_read, 496 bdev_io); 497 } else { 498 rc = spdk_bdev_readv_blocks_with_md(bdev_node->base_desc, ch->base_ch, 499 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 500 bdev_io->u.bdev.md_buf, 501 lba, len, 502 _zone_block_complete_read, bdev_io); 503 } 504 505 return rc; 506 } 507 508 static void 509 zone_block_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 510 { 511 struct bdev_zone_block *bdev_node = SPDK_CONTAINEROF(bdev_io->bdev, struct bdev_zone_block, bdev); 512 struct zone_block_io_channel *dev_ch = spdk_io_channel_get_ctx(ch); 513 int rc = 0; 514 515 switch (bdev_io->type) { 516 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 517 rc = zone_block_get_zone_info(bdev_node, bdev_io); 518 break; 519 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 520 rc = zone_block_zone_management(bdev_node, dev_ch, bdev_io); 521 break; 522 case SPDK_BDEV_IO_TYPE_WRITE: 523 case SPDK_BDEV_IO_TYPE_ZONE_APPEND: 524 rc = zone_block_write(bdev_node, dev_ch, bdev_io); 525 break; 526 case SPDK_BDEV_IO_TYPE_READ: 527 rc = zone_block_read(bdev_node, dev_ch, bdev_io); 528 break; 529 default: 530 SPDK_ERRLOG("vbdev_block: unknown I/O type %u\n", bdev_io->type); 531 rc = -ENOTSUP; 532 break; 533 } 534 535 if (rc != 0) { 536 if (rc == -ENOMEM) { 537 SPDK_WARNLOG("ENOMEM, start to queue io for vbdev.\n"); 538 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 539 } else { 540 SPDK_ERRLOG("ERROR on bdev_io submission!\n"); 541 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 542 } 543 } 544 } 545 546 static bool 547 zone_block_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 548 { 549 switch (io_type) { 550 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 551 case SPDK_BDEV_IO_TYPE_WRITE: 552 case SPDK_BDEV_IO_TYPE_READ: 553 case SPDK_BDEV_IO_TYPE_ZONE_APPEND: 554 return true; 555 default: 556 return false; 557 } 558 } 559 560 static struct spdk_io_channel * 561 zone_block_get_io_channel(void *ctx) 562 { 563 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 564 565 return spdk_get_io_channel(bdev_node); 566 } 567 568 static int 569 zone_block_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 570 { 571 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 572 struct spdk_bdev *base_bdev = spdk_bdev_desc_get_bdev(bdev_node->base_desc); 573 574 spdk_json_write_name(w, "zoned_block"); 575 spdk_json_write_object_begin(w); 576 spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&bdev_node->bdev)); 577 spdk_json_write_named_string(w, "base_bdev", spdk_bdev_get_name(base_bdev)); 578 spdk_json_write_named_uint64(w, "zone_capacity", bdev_node->zone_capacity); 579 spdk_json_write_named_uint64(w, "optimal_open_zones", bdev_node->bdev.optimal_open_zones); 580 spdk_json_write_object_end(w); 581 582 return 0; 583 } 584 585 /* When we register our vbdev this is how we specify our entry points. */ 586 static const struct spdk_bdev_fn_table zone_block_fn_table = { 587 .destruct = zone_block_destruct, 588 .submit_request = zone_block_submit_request, 589 .io_type_supported = zone_block_io_type_supported, 590 .get_io_channel = zone_block_get_io_channel, 591 .dump_info_json = zone_block_dump_info_json, 592 }; 593 594 static void 595 zone_block_base_bdev_hotremove_cb(struct spdk_bdev *bdev_find) 596 { 597 struct bdev_zone_block *bdev_node, *tmp; 598 599 TAILQ_FOREACH_SAFE(bdev_node, &g_bdev_nodes, link, tmp) { 600 if (bdev_find == spdk_bdev_desc_get_bdev(bdev_node->base_desc)) { 601 spdk_bdev_unregister(&bdev_node->bdev, NULL, NULL); 602 } 603 } 604 } 605 606 static void 607 zone_block_base_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 608 void *event_ctx) 609 { 610 switch (type) { 611 case SPDK_BDEV_EVENT_REMOVE: 612 zone_block_base_bdev_hotremove_cb(bdev); 613 break; 614 default: 615 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 616 break; 617 } 618 } 619 620 static int 621 _zone_block_ch_create_cb(void *io_device, void *ctx_buf) 622 { 623 struct zone_block_io_channel *bdev_ch = ctx_buf; 624 struct bdev_zone_block *bdev_node = io_device; 625 626 bdev_ch->base_ch = spdk_bdev_get_io_channel(bdev_node->base_desc); 627 if (!bdev_ch->base_ch) { 628 return -ENOMEM; 629 } 630 631 return 0; 632 } 633 634 static void 635 _zone_block_ch_destroy_cb(void *io_device, void *ctx_buf) 636 { 637 struct zone_block_io_channel *bdev_ch = ctx_buf; 638 639 spdk_put_io_channel(bdev_ch->base_ch); 640 } 641 642 static int 643 zone_block_insert_name(const char *bdev_name, const char *vbdev_name, uint64_t zone_capacity, 644 uint64_t optimal_open_zones) 645 { 646 struct bdev_zone_block_config *name; 647 648 TAILQ_FOREACH(name, &g_bdev_configs, link) { 649 if (strcmp(vbdev_name, name->vbdev_name) == 0) { 650 SPDK_ERRLOG("block zoned bdev %s already exists\n", vbdev_name); 651 return -EEXIST; 652 } 653 if (strcmp(bdev_name, name->bdev_name) == 0) { 654 SPDK_ERRLOG("base bdev %s already claimed\n", bdev_name); 655 return -EEXIST; 656 } 657 } 658 659 name = calloc(1, sizeof(*name)); 660 if (!name) { 661 SPDK_ERRLOG("could not allocate bdev_names\n"); 662 return -ENOMEM; 663 } 664 665 name->bdev_name = strdup(bdev_name); 666 if (!name->bdev_name) { 667 SPDK_ERRLOG("could not allocate name->bdev_name\n"); 668 free(name); 669 return -ENOMEM; 670 } 671 672 name->vbdev_name = strdup(vbdev_name); 673 if (!name->vbdev_name) { 674 SPDK_ERRLOG("could not allocate name->vbdev_name\n"); 675 free(name->bdev_name); 676 free(name); 677 return -ENOMEM; 678 } 679 680 name->zone_capacity = zone_capacity; 681 name->optimal_open_zones = optimal_open_zones; 682 683 TAILQ_INSERT_TAIL(&g_bdev_configs, name, link); 684 685 return 0; 686 } 687 688 static int 689 zone_block_init_zone_info(struct bdev_zone_block *bdev_node) 690 { 691 size_t i; 692 struct block_zone *zone; 693 int rc = 0; 694 695 for (i = 0; i < bdev_node->num_zones; i++) { 696 zone = &bdev_node->zones[i]; 697 zone->zone_info.zone_id = bdev_node->bdev.zone_size * i; 698 zone->zone_info.capacity = bdev_node->zone_capacity; 699 zone->zone_info.write_pointer = zone->zone_info.zone_id + zone->zone_info.capacity; 700 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 701 if (pthread_spin_init(&zone->lock, PTHREAD_PROCESS_PRIVATE)) { 702 SPDK_ERRLOG("pthread_spin_init() failed\n"); 703 rc = -ENOMEM; 704 break; 705 } 706 } 707 708 if (rc) { 709 for (; i > 0; i--) { 710 pthread_spin_destroy(&bdev_node->zones[i - 1].lock); 711 } 712 } 713 714 return rc; 715 } 716 717 static int 718 zone_block_register(const char *base_bdev_name) 719 { 720 struct spdk_bdev_desc *base_desc; 721 struct spdk_bdev *base_bdev; 722 struct bdev_zone_block_config *name, *tmp; 723 struct bdev_zone_block *bdev_node; 724 uint64_t zone_size; 725 int rc = 0; 726 727 /* Check our list of names from config versus this bdev and if 728 * there's a match, create the bdev_node & bdev accordingly. 729 */ 730 TAILQ_FOREACH_SAFE(name, &g_bdev_configs, link, tmp) { 731 if (strcmp(name->bdev_name, base_bdev_name) != 0) { 732 continue; 733 } 734 735 rc = spdk_bdev_open_ext(base_bdev_name, true, zone_block_base_bdev_event_cb, 736 NULL, &base_desc); 737 if (rc == -ENODEV) { 738 return -ENODEV; 739 } else if (rc) { 740 SPDK_ERRLOG("could not open bdev %s\n", base_bdev_name); 741 goto free_config; 742 } 743 744 base_bdev = spdk_bdev_desc_get_bdev(base_desc); 745 746 if (spdk_bdev_is_zoned(base_bdev)) { 747 SPDK_ERRLOG("Base bdev %s is already a zoned bdev\n", base_bdev_name); 748 rc = -EEXIST; 749 goto zone_exist; 750 } 751 752 bdev_node = calloc(1, sizeof(struct bdev_zone_block)); 753 if (!bdev_node) { 754 rc = -ENOMEM; 755 SPDK_ERRLOG("could not allocate bdev_node\n"); 756 goto zone_exist; 757 } 758 759 bdev_node->base_desc = base_desc; 760 761 /* The base bdev that we're attaching to. */ 762 bdev_node->bdev.name = strdup(name->vbdev_name); 763 if (!bdev_node->bdev.name) { 764 rc = -ENOMEM; 765 SPDK_ERRLOG("could not allocate bdev_node name\n"); 766 goto strdup_failed; 767 } 768 769 zone_size = spdk_align64pow2(name->zone_capacity); 770 if (zone_size == 0) { 771 rc = -EINVAL; 772 SPDK_ERRLOG("invalid zone size\n"); 773 goto roundup_failed; 774 } 775 776 bdev_node->zone_shift = spdk_u64log2(zone_size); 777 bdev_node->num_zones = base_bdev->blockcnt / zone_size; 778 779 /* Align num_zones to optimal_open_zones */ 780 bdev_node->num_zones -= bdev_node->num_zones % name->optimal_open_zones; 781 bdev_node->zones = calloc(bdev_node->num_zones, sizeof(struct block_zone)); 782 if (!bdev_node->zones) { 783 rc = -ENOMEM; 784 SPDK_ERRLOG("could not allocate zones\n"); 785 goto calloc_failed; 786 } 787 788 bdev_node->bdev.product_name = "zone_block"; 789 790 /* Copy some properties from the underlying base bdev. */ 791 bdev_node->bdev.write_cache = base_bdev->write_cache; 792 bdev_node->bdev.required_alignment = base_bdev->required_alignment; 793 bdev_node->bdev.optimal_io_boundary = base_bdev->optimal_io_boundary; 794 795 bdev_node->bdev.blocklen = base_bdev->blocklen; 796 bdev_node->bdev.blockcnt = bdev_node->num_zones * zone_size; 797 798 if (bdev_node->num_zones * name->zone_capacity != base_bdev->blockcnt) { 799 SPDK_DEBUGLOG(vbdev_zone_block, 800 "Lost %" PRIu64 " blocks due to zone capacity and base bdev size misalignment\n", 801 base_bdev->blockcnt - bdev_node->num_zones * name->zone_capacity); 802 } 803 804 bdev_node->bdev.write_unit_size = base_bdev->write_unit_size; 805 806 bdev_node->bdev.md_interleave = base_bdev->md_interleave; 807 bdev_node->bdev.md_len = base_bdev->md_len; 808 bdev_node->bdev.dif_type = base_bdev->dif_type; 809 bdev_node->bdev.dif_is_head_of_md = base_bdev->dif_is_head_of_md; 810 bdev_node->bdev.dif_check_flags = base_bdev->dif_check_flags; 811 812 bdev_node->bdev.zoned = true; 813 bdev_node->bdev.ctxt = bdev_node; 814 bdev_node->bdev.fn_table = &zone_block_fn_table; 815 bdev_node->bdev.module = &bdev_zoned_if; 816 817 /* bdev specific info */ 818 bdev_node->bdev.zone_size = zone_size; 819 820 bdev_node->zone_capacity = name->zone_capacity; 821 bdev_node->bdev.optimal_open_zones = name->optimal_open_zones; 822 bdev_node->bdev.max_open_zones = 0; 823 rc = zone_block_init_zone_info(bdev_node); 824 if (rc) { 825 SPDK_ERRLOG("could not init zone info\n"); 826 goto zone_info_failed; 827 } 828 829 TAILQ_INSERT_TAIL(&g_bdev_nodes, bdev_node, link); 830 831 spdk_io_device_register(bdev_node, _zone_block_ch_create_cb, _zone_block_ch_destroy_cb, 832 sizeof(struct zone_block_io_channel), 833 name->vbdev_name); 834 835 /* Save the thread where the base device is opened */ 836 bdev_node->thread = spdk_get_thread(); 837 838 rc = spdk_bdev_module_claim_bdev(base_bdev, base_desc, bdev_node->bdev.module); 839 if (rc) { 840 SPDK_ERRLOG("could not claim bdev %s\n", base_bdev_name); 841 goto claim_failed; 842 } 843 844 rc = spdk_bdev_register(&bdev_node->bdev); 845 if (rc) { 846 SPDK_ERRLOG("could not register zoned bdev\n"); 847 goto register_failed; 848 } 849 } 850 851 return rc; 852 853 register_failed: 854 spdk_bdev_module_release_bdev(&bdev_node->bdev); 855 claim_failed: 856 TAILQ_REMOVE(&g_bdev_nodes, bdev_node, link); 857 spdk_io_device_unregister(bdev_node, NULL); 858 zone_info_failed: 859 free(bdev_node->zones); 860 calloc_failed: 861 roundup_failed: 862 free(bdev_node->bdev.name); 863 strdup_failed: 864 free(bdev_node); 865 zone_exist: 866 spdk_bdev_close(base_desc); 867 free_config: 868 zone_block_remove_config(name); 869 return rc; 870 } 871 872 int 873 vbdev_zone_block_create(const char *bdev_name, const char *vbdev_name, uint64_t zone_capacity, 874 uint64_t optimal_open_zones) 875 { 876 int rc = 0; 877 878 if (zone_capacity == 0) { 879 SPDK_ERRLOG("Zone capacity can't be 0\n"); 880 return -EINVAL; 881 } 882 883 if (optimal_open_zones == 0) { 884 SPDK_ERRLOG("Optimal open zones can't be 0\n"); 885 return -EINVAL; 886 } 887 888 /* Insert the bdev into our global name list even if it doesn't exist yet, 889 * it may show up soon... 890 */ 891 rc = zone_block_insert_name(bdev_name, vbdev_name, zone_capacity, optimal_open_zones); 892 if (rc) { 893 return rc; 894 } 895 896 rc = zone_block_register(bdev_name); 897 if (rc == -ENODEV) { 898 /* This is not an error, even though the bdev is not present at this time it may 899 * still show up later. 900 */ 901 rc = 0; 902 } 903 return rc; 904 } 905 906 void 907 vbdev_zone_block_delete(const char *name, spdk_bdev_unregister_cb cb_fn, void *cb_arg) 908 { 909 struct bdev_zone_block_config *name_node; 910 struct spdk_bdev *bdev = NULL; 911 912 bdev = spdk_bdev_get_by_name(name); 913 if (!bdev || bdev->module != &bdev_zoned_if) { 914 cb_fn(cb_arg, -ENODEV); 915 return; 916 } 917 918 TAILQ_FOREACH(name_node, &g_bdev_configs, link) { 919 if (strcmp(name_node->vbdev_name, bdev->name) == 0) { 920 zone_block_remove_config(name_node); 921 break; 922 } 923 } 924 925 spdk_bdev_unregister(bdev, cb_fn, cb_arg); 926 } 927 928 static void 929 zone_block_examine(struct spdk_bdev *bdev) 930 { 931 zone_block_register(bdev->name); 932 933 spdk_bdev_module_examine_done(&bdev_zoned_if); 934 } 935 936 SPDK_LOG_REGISTER_COMPONENT(vbdev_zone_block) 937