1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "vbdev_zone_block.h" 37 38 #include "spdk/config.h" 39 #include "spdk/nvme.h" 40 #include "spdk/bdev_zone.h" 41 42 #include "spdk/log.h" 43 44 static int zone_block_init(void); 45 static int zone_block_get_ctx_size(void); 46 static void zone_block_finish(void); 47 static int zone_block_config_json(struct spdk_json_write_ctx *w); 48 static void zone_block_examine(struct spdk_bdev *bdev); 49 50 static struct spdk_bdev_module bdev_zoned_if = { 51 .name = "bdev_zoned_block", 52 .module_init = zone_block_init, 53 .module_fini = zone_block_finish, 54 .config_text = NULL, 55 .config_json = zone_block_config_json, 56 .examine_config = zone_block_examine, 57 .get_ctx_size = zone_block_get_ctx_size, 58 }; 59 60 SPDK_BDEV_MODULE_REGISTER(bdev_zoned_block, &bdev_zoned_if) 61 62 /* List of block vbdev names and their base bdevs via configuration file. 63 * Used so we can parse the conf once at init and use this list in examine(). 64 */ 65 struct bdev_zone_block_config { 66 char *vbdev_name; 67 char *bdev_name; 68 uint64_t zone_capacity; 69 uint64_t optimal_open_zones; 70 TAILQ_ENTRY(bdev_zone_block_config) link; 71 }; 72 static TAILQ_HEAD(, bdev_zone_block_config) g_bdev_configs = TAILQ_HEAD_INITIALIZER(g_bdev_configs); 73 74 struct block_zone { 75 struct spdk_bdev_zone_info zone_info; 76 pthread_spinlock_t lock; 77 }; 78 79 /* List of block vbdevs and associated info for each. */ 80 struct bdev_zone_block { 81 struct spdk_bdev bdev; /* the block zoned bdev */ 82 struct spdk_bdev_desc *base_desc; /* its descriptor we get from open */ 83 struct block_zone *zones; /* array of zones */ 84 uint64_t num_zones; /* number of zones */ 85 uint64_t zone_capacity; /* zone capacity */ 86 uint64_t zone_shift; /* log2 of zone_size */ 87 TAILQ_ENTRY(bdev_zone_block) link; 88 struct spdk_thread *thread; /* thread where base device is opened */ 89 }; 90 static TAILQ_HEAD(, bdev_zone_block) g_bdev_nodes = TAILQ_HEAD_INITIALIZER(g_bdev_nodes); 91 92 struct zone_block_io_channel { 93 struct spdk_io_channel *base_ch; /* IO channel of base device */ 94 }; 95 96 struct zone_block_io { 97 /* vbdev to which IO was issued */ 98 struct bdev_zone_block *bdev_zone_block; 99 }; 100 101 static int 102 zone_block_init(void) 103 { 104 return 0; 105 } 106 107 static void 108 zone_block_remove_config(struct bdev_zone_block_config *name) 109 { 110 TAILQ_REMOVE(&g_bdev_configs, name, link); 111 free(name->bdev_name); 112 free(name->vbdev_name); 113 free(name); 114 } 115 116 static void 117 zone_block_finish(void) 118 { 119 struct bdev_zone_block_config *name; 120 121 while ((name = TAILQ_FIRST(&g_bdev_configs))) { 122 zone_block_remove_config(name); 123 } 124 } 125 126 static int 127 zone_block_get_ctx_size(void) 128 { 129 return sizeof(struct zone_block_io); 130 } 131 132 static int 133 zone_block_config_json(struct spdk_json_write_ctx *w) 134 { 135 struct bdev_zone_block *bdev_node; 136 struct spdk_bdev *base_bdev = NULL; 137 138 TAILQ_FOREACH(bdev_node, &g_bdev_nodes, link) { 139 base_bdev = spdk_bdev_desc_get_bdev(bdev_node->base_desc); 140 spdk_json_write_object_begin(w); 141 spdk_json_write_named_string(w, "method", "bdev_zone_block_create"); 142 spdk_json_write_named_object_begin(w, "params"); 143 spdk_json_write_named_string(w, "base_bdev", spdk_bdev_get_name(base_bdev)); 144 spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&bdev_node->bdev)); 145 spdk_json_write_named_uint64(w, "zone_capacity", bdev_node->zone_capacity); 146 spdk_json_write_named_uint64(w, "optimal_open_zones", bdev_node->bdev.optimal_open_zones); 147 spdk_json_write_object_end(w); 148 spdk_json_write_object_end(w); 149 } 150 151 return 0; 152 } 153 154 /* Callback for unregistering the IO device. */ 155 static void 156 _device_unregister_cb(void *io_device) 157 { 158 struct bdev_zone_block *bdev_node = io_device; 159 uint64_t i; 160 161 free(bdev_node->bdev.name); 162 for (i = 0; i < bdev_node->num_zones; i++) { 163 pthread_spin_destroy(&bdev_node->zones[i].lock); 164 } 165 free(bdev_node->zones); 166 free(bdev_node); 167 } 168 169 static void 170 _zone_block_destruct(void *ctx) 171 { 172 struct spdk_bdev_desc *desc = ctx; 173 174 spdk_bdev_close(desc); 175 } 176 177 static int 178 zone_block_destruct(void *ctx) 179 { 180 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 181 182 TAILQ_REMOVE(&g_bdev_nodes, bdev_node, link); 183 184 /* Unclaim the underlying bdev. */ 185 spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(bdev_node->base_desc)); 186 187 /* Close the underlying bdev on its same opened thread. */ 188 if (bdev_node->thread && bdev_node->thread != spdk_get_thread()) { 189 spdk_thread_send_msg(bdev_node->thread, _zone_block_destruct, bdev_node->base_desc); 190 } else { 191 spdk_bdev_close(bdev_node->base_desc); 192 } 193 194 /* Unregister the io_device. */ 195 spdk_io_device_unregister(bdev_node, _device_unregister_cb); 196 197 return 0; 198 } 199 200 static struct block_zone * 201 zone_block_get_zone_containing_lba(struct bdev_zone_block *bdev_node, uint64_t lba) 202 { 203 size_t index = lba >> bdev_node->zone_shift; 204 205 if (index >= bdev_node->num_zones) { 206 return NULL; 207 } 208 209 return &bdev_node->zones[index]; 210 } 211 212 static struct block_zone * 213 zone_block_get_zone_by_slba(struct bdev_zone_block *bdev_node, uint64_t start_lba) 214 { 215 struct block_zone *zone = zone_block_get_zone_containing_lba(bdev_node, start_lba); 216 217 if (zone && zone->zone_info.zone_id == start_lba) { 218 return zone; 219 } else { 220 return NULL; 221 } 222 } 223 224 static int 225 zone_block_get_zone_info(struct bdev_zone_block *bdev_node, struct spdk_bdev_io *bdev_io) 226 { 227 struct block_zone *zone; 228 struct spdk_bdev_zone_info *zone_info = bdev_io->u.zone_mgmt.buf; 229 uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id; 230 size_t i; 231 232 /* User can request info for more zones than exist, need to check both internal and user 233 * boundaries 234 */ 235 for (i = 0; i < bdev_io->u.zone_mgmt.num_zones; i++, zone_id += bdev_node->bdev.zone_size) { 236 zone = zone_block_get_zone_by_slba(bdev_node, zone_id); 237 if (!zone) { 238 return -EINVAL; 239 } 240 memcpy(&zone_info[i], &zone->zone_info, sizeof(*zone_info)); 241 } 242 243 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 244 return 0; 245 } 246 247 static int 248 zone_block_open_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 249 { 250 pthread_spin_lock(&zone->lock); 251 252 switch (zone->zone_info.state) { 253 case SPDK_BDEV_ZONE_STATE_EMPTY: 254 case SPDK_BDEV_ZONE_STATE_OPEN: 255 case SPDK_BDEV_ZONE_STATE_CLOSED: 256 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_OPEN; 257 pthread_spin_unlock(&zone->lock); 258 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 259 return 0; 260 default: 261 pthread_spin_unlock(&zone->lock); 262 return -EINVAL; 263 } 264 } 265 266 static void 267 _zone_block_complete_unmap(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 268 { 269 struct spdk_bdev_io *orig_io = cb_arg; 270 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 271 272 /* Complete the original IO and then free the one that we created here 273 * as a result of issuing an IO via submit_reqeust. 274 */ 275 spdk_bdev_io_complete(orig_io, status); 276 spdk_bdev_free_io(bdev_io); 277 } 278 279 static int 280 zone_block_reset_zone(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 281 struct block_zone *zone, struct spdk_bdev_io *bdev_io) 282 { 283 pthread_spin_lock(&zone->lock); 284 285 switch (zone->zone_info.state) { 286 case SPDK_BDEV_ZONE_STATE_EMPTY: 287 pthread_spin_unlock(&zone->lock); 288 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 289 return 0; 290 case SPDK_BDEV_ZONE_STATE_OPEN: 291 case SPDK_BDEV_ZONE_STATE_FULL: 292 case SPDK_BDEV_ZONE_STATE_CLOSED: 293 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_EMPTY; 294 zone->zone_info.write_pointer = zone->zone_info.zone_id; 295 pthread_spin_unlock(&zone->lock); 296 return spdk_bdev_unmap_blocks(bdev_node->base_desc, ch->base_ch, 297 zone->zone_info.zone_id, zone->zone_info.capacity, 298 _zone_block_complete_unmap, bdev_io); 299 default: 300 pthread_spin_unlock(&zone->lock); 301 return -EINVAL; 302 } 303 } 304 305 static int 306 zone_block_close_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 307 { 308 pthread_spin_lock(&zone->lock); 309 310 switch (zone->zone_info.state) { 311 case SPDK_BDEV_ZONE_STATE_OPEN: 312 case SPDK_BDEV_ZONE_STATE_CLOSED: 313 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_CLOSED; 314 pthread_spin_unlock(&zone->lock); 315 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 316 return 0; 317 default: 318 pthread_spin_unlock(&zone->lock); 319 return -EINVAL; 320 } 321 } 322 323 static int 324 zone_block_finish_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 325 { 326 pthread_spin_lock(&zone->lock); 327 328 zone->zone_info.write_pointer = zone->zone_info.zone_id + zone->zone_info.capacity; 329 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 330 331 pthread_spin_unlock(&zone->lock); 332 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 333 return 0; 334 } 335 336 static int 337 zone_block_zone_management(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 338 struct spdk_bdev_io *bdev_io) 339 { 340 struct block_zone *zone; 341 342 zone = zone_block_get_zone_by_slba(bdev_node, bdev_io->u.zone_mgmt.zone_id); 343 if (!zone) { 344 return -EINVAL; 345 } 346 347 switch (bdev_io->u.zone_mgmt.zone_action) { 348 case SPDK_BDEV_ZONE_RESET: 349 return zone_block_reset_zone(bdev_node, ch, zone, bdev_io); 350 case SPDK_BDEV_ZONE_OPEN: 351 return zone_block_open_zone(zone, bdev_io); 352 case SPDK_BDEV_ZONE_CLOSE: 353 return zone_block_close_zone(zone, bdev_io); 354 case SPDK_BDEV_ZONE_FINISH: 355 return zone_block_finish_zone(zone, bdev_io); 356 default: 357 return -EINVAL; 358 } 359 } 360 361 static void 362 _zone_block_complete_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 363 { 364 struct spdk_bdev_io *orig_io = cb_arg; 365 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 366 367 if (success && orig_io->type == SPDK_BDEV_IO_TYPE_ZONE_APPEND) { 368 orig_io->u.bdev.offset_blocks = bdev_io->u.bdev.offset_blocks; 369 } 370 371 /* Complete the original IO and then free the one that we created here 372 * as a result of issuing an IO via submit_reqeust. 373 */ 374 spdk_bdev_io_complete(orig_io, status); 375 spdk_bdev_free_io(bdev_io); 376 } 377 378 static int 379 zone_block_write(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 380 struct spdk_bdev_io *bdev_io) 381 { 382 struct block_zone *zone; 383 uint64_t len = bdev_io->u.bdev.num_blocks; 384 uint64_t lba = bdev_io->u.bdev.offset_blocks; 385 uint64_t num_blocks_left, wp; 386 int rc = 0; 387 bool is_append = bdev_io->type == SPDK_BDEV_IO_TYPE_ZONE_APPEND; 388 389 if (is_append) { 390 zone = zone_block_get_zone_by_slba(bdev_node, lba); 391 } else { 392 zone = zone_block_get_zone_containing_lba(bdev_node, lba); 393 } 394 if (!zone) { 395 SPDK_ERRLOG("Trying to write to invalid zone (lba 0x%lx)\n", lba); 396 return -EINVAL; 397 } 398 399 pthread_spin_lock(&zone->lock); 400 401 switch (zone->zone_info.state) { 402 case SPDK_BDEV_ZONE_STATE_OPEN: 403 case SPDK_BDEV_ZONE_STATE_EMPTY: 404 case SPDK_BDEV_ZONE_STATE_CLOSED: 405 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_OPEN; 406 break; 407 default: 408 SPDK_ERRLOG("Trying to write to zone in invalid state %u\n", zone->zone_info.state); 409 rc = -EINVAL; 410 goto write_fail; 411 } 412 413 wp = zone->zone_info.write_pointer; 414 if (is_append) { 415 lba = wp; 416 } else { 417 if (lba != wp) { 418 SPDK_ERRLOG("Trying to write to zone with invalid address (lba 0x%lx, wp 0x%lx)\n", lba, wp); 419 rc = -EINVAL; 420 goto write_fail; 421 } 422 } 423 424 num_blocks_left = zone->zone_info.zone_id + zone->zone_info.capacity - wp; 425 if (len > num_blocks_left) { 426 SPDK_ERRLOG("Write exceeds zone capacity (lba 0x%" PRIu64 ", len 0x%lx, wp 0x%lx)\n", lba, len, wp); 427 rc = -EINVAL; 428 goto write_fail; 429 } 430 431 zone->zone_info.write_pointer += bdev_io->u.bdev.num_blocks; 432 assert(zone->zone_info.write_pointer <= zone->zone_info.zone_id + zone->zone_info.capacity); 433 if (zone->zone_info.write_pointer == zone->zone_info.zone_id + zone->zone_info.capacity) { 434 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 435 } 436 pthread_spin_unlock(&zone->lock); 437 438 if (bdev_io->u.bdev.md_buf == NULL) { 439 rc = spdk_bdev_writev_blocks(bdev_node->base_desc, ch->base_ch, bdev_io->u.bdev.iovs, 440 bdev_io->u.bdev.iovcnt, lba, 441 bdev_io->u.bdev.num_blocks, _zone_block_complete_write, 442 bdev_io); 443 } else { 444 rc = spdk_bdev_writev_blocks_with_md(bdev_node->base_desc, ch->base_ch, 445 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 446 bdev_io->u.bdev.md_buf, 447 lba, bdev_io->u.bdev.num_blocks, 448 _zone_block_complete_write, bdev_io); 449 } 450 451 return rc; 452 453 write_fail: 454 pthread_spin_unlock(&zone->lock); 455 return rc; 456 } 457 458 static void 459 _zone_block_complete_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 460 { 461 struct spdk_bdev_io *orig_io = cb_arg; 462 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 463 464 /* Complete the original IO and then free the one that we created here 465 * as a result of issuing an IO via submit_reqeust. 466 */ 467 spdk_bdev_io_complete(orig_io, status); 468 spdk_bdev_free_io(bdev_io); 469 } 470 471 static int 472 zone_block_read(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 473 struct spdk_bdev_io *bdev_io) 474 { 475 struct block_zone *zone; 476 uint64_t len = bdev_io->u.bdev.num_blocks; 477 uint64_t lba = bdev_io->u.bdev.offset_blocks; 478 int rc; 479 480 zone = zone_block_get_zone_containing_lba(bdev_node, lba); 481 if (!zone) { 482 SPDK_ERRLOG("Trying to read from invalid zone (lba 0x%lx)\n", lba); 483 return -EINVAL; 484 } 485 486 if ((lba + len) > (zone->zone_info.zone_id + zone->zone_info.capacity)) { 487 SPDK_ERRLOG("Read exceeds zone capacity (lba 0x%lx, len 0x%lx)\n", lba, len); 488 return -EINVAL; 489 } 490 491 if (bdev_io->u.bdev.md_buf == NULL) { 492 rc = spdk_bdev_readv_blocks(bdev_node->base_desc, ch->base_ch, bdev_io->u.bdev.iovs, 493 bdev_io->u.bdev.iovcnt, lba, 494 len, _zone_block_complete_read, 495 bdev_io); 496 } else { 497 rc = spdk_bdev_readv_blocks_with_md(bdev_node->base_desc, ch->base_ch, 498 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 499 bdev_io->u.bdev.md_buf, 500 lba, len, 501 _zone_block_complete_read, bdev_io); 502 } 503 504 return rc; 505 } 506 507 static void 508 zone_block_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 509 { 510 struct bdev_zone_block *bdev_node = SPDK_CONTAINEROF(bdev_io->bdev, struct bdev_zone_block, bdev); 511 struct zone_block_io_channel *dev_ch = spdk_io_channel_get_ctx(ch); 512 int rc = 0; 513 514 switch (bdev_io->type) { 515 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 516 rc = zone_block_get_zone_info(bdev_node, bdev_io); 517 break; 518 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 519 rc = zone_block_zone_management(bdev_node, dev_ch, bdev_io); 520 break; 521 case SPDK_BDEV_IO_TYPE_WRITE: 522 case SPDK_BDEV_IO_TYPE_ZONE_APPEND: 523 rc = zone_block_write(bdev_node, dev_ch, bdev_io); 524 break; 525 case SPDK_BDEV_IO_TYPE_READ: 526 rc = zone_block_read(bdev_node, dev_ch, bdev_io); 527 break; 528 default: 529 SPDK_ERRLOG("vbdev_block: unknown I/O type %u\n", bdev_io->type); 530 rc = -ENOTSUP; 531 break; 532 } 533 534 if (rc != 0) { 535 if (rc == -ENOMEM) { 536 SPDK_WARNLOG("ENOMEM, start to queue io for vbdev.\n"); 537 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 538 } else { 539 SPDK_ERRLOG("ERROR on bdev_io submission!\n"); 540 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 541 } 542 } 543 } 544 545 static bool 546 zone_block_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 547 { 548 switch (io_type) { 549 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 550 case SPDK_BDEV_IO_TYPE_WRITE: 551 case SPDK_BDEV_IO_TYPE_READ: 552 case SPDK_BDEV_IO_TYPE_ZONE_APPEND: 553 return true; 554 default: 555 return false; 556 } 557 } 558 559 static struct spdk_io_channel * 560 zone_block_get_io_channel(void *ctx) 561 { 562 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 563 564 return spdk_get_io_channel(bdev_node); 565 } 566 567 static int 568 zone_block_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 569 { 570 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 571 struct spdk_bdev *base_bdev = spdk_bdev_desc_get_bdev(bdev_node->base_desc); 572 573 spdk_json_write_name(w, "zoned_block"); 574 spdk_json_write_object_begin(w); 575 spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&bdev_node->bdev)); 576 spdk_json_write_named_string(w, "base_bdev", spdk_bdev_get_name(base_bdev)); 577 spdk_json_write_named_uint64(w, "zone_capacity", bdev_node->zone_capacity); 578 spdk_json_write_named_uint64(w, "optimal_open_zones", bdev_node->bdev.optimal_open_zones); 579 spdk_json_write_object_end(w); 580 581 return 0; 582 } 583 584 /* When we register our vbdev this is how we specify our entry points. */ 585 static const struct spdk_bdev_fn_table zone_block_fn_table = { 586 .destruct = zone_block_destruct, 587 .submit_request = zone_block_submit_request, 588 .io_type_supported = zone_block_io_type_supported, 589 .get_io_channel = zone_block_get_io_channel, 590 .dump_info_json = zone_block_dump_info_json, 591 }; 592 593 static void 594 zone_block_base_bdev_hotremove_cb(struct spdk_bdev *bdev_find) 595 { 596 struct bdev_zone_block *bdev_node, *tmp; 597 598 TAILQ_FOREACH_SAFE(bdev_node, &g_bdev_nodes, link, tmp) { 599 if (bdev_find == spdk_bdev_desc_get_bdev(bdev_node->base_desc)) { 600 spdk_bdev_unregister(&bdev_node->bdev, NULL, NULL); 601 } 602 } 603 } 604 605 static void 606 zone_block_base_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 607 void *event_ctx) 608 { 609 switch (type) { 610 case SPDK_BDEV_EVENT_REMOVE: 611 zone_block_base_bdev_hotremove_cb(bdev); 612 break; 613 default: 614 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 615 break; 616 } 617 } 618 619 static int 620 _zone_block_ch_create_cb(void *io_device, void *ctx_buf) 621 { 622 struct zone_block_io_channel *bdev_ch = ctx_buf; 623 struct bdev_zone_block *bdev_node = io_device; 624 625 bdev_ch->base_ch = spdk_bdev_get_io_channel(bdev_node->base_desc); 626 if (!bdev_ch->base_ch) { 627 return -ENOMEM; 628 } 629 630 return 0; 631 } 632 633 static void 634 _zone_block_ch_destroy_cb(void *io_device, void *ctx_buf) 635 { 636 struct zone_block_io_channel *bdev_ch = ctx_buf; 637 638 spdk_put_io_channel(bdev_ch->base_ch); 639 } 640 641 static int 642 zone_block_insert_name(const char *bdev_name, const char *vbdev_name, uint64_t zone_capacity, 643 uint64_t optimal_open_zones) 644 { 645 struct bdev_zone_block_config *name; 646 647 TAILQ_FOREACH(name, &g_bdev_configs, link) { 648 if (strcmp(vbdev_name, name->vbdev_name) == 0) { 649 SPDK_ERRLOG("block zoned bdev %s already exists\n", vbdev_name); 650 return -EEXIST; 651 } 652 if (strcmp(bdev_name, name->bdev_name) == 0) { 653 SPDK_ERRLOG("base bdev %s already claimed\n", bdev_name); 654 return -EEXIST; 655 } 656 } 657 658 name = calloc(1, sizeof(*name)); 659 if (!name) { 660 SPDK_ERRLOG("could not allocate bdev_names\n"); 661 return -ENOMEM; 662 } 663 664 name->bdev_name = strdup(bdev_name); 665 if (!name->bdev_name) { 666 SPDK_ERRLOG("could not allocate name->bdev_name\n"); 667 free(name); 668 return -ENOMEM; 669 } 670 671 name->vbdev_name = strdup(vbdev_name); 672 if (!name->vbdev_name) { 673 SPDK_ERRLOG("could not allocate name->vbdev_name\n"); 674 free(name->bdev_name); 675 free(name); 676 return -ENOMEM; 677 } 678 679 name->zone_capacity = zone_capacity; 680 name->optimal_open_zones = optimal_open_zones; 681 682 TAILQ_INSERT_TAIL(&g_bdev_configs, name, link); 683 684 return 0; 685 } 686 687 static int 688 zone_block_init_zone_info(struct bdev_zone_block *bdev_node) 689 { 690 size_t i; 691 struct block_zone *zone; 692 int rc = 0; 693 694 for (i = 0; i < bdev_node->num_zones; i++) { 695 zone = &bdev_node->zones[i]; 696 zone->zone_info.zone_id = bdev_node->bdev.zone_size * i; 697 zone->zone_info.capacity = bdev_node->zone_capacity; 698 zone->zone_info.write_pointer = zone->zone_info.zone_id + zone->zone_info.capacity; 699 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 700 if (pthread_spin_init(&zone->lock, PTHREAD_PROCESS_PRIVATE)) { 701 SPDK_ERRLOG("pthread_spin_init() failed\n"); 702 rc = -ENOMEM; 703 break; 704 } 705 } 706 707 if (rc) { 708 for (; i > 0; i--) { 709 pthread_spin_destroy(&bdev_node->zones[i - 1].lock); 710 } 711 } 712 713 return rc; 714 } 715 716 static int 717 zone_block_register(const char *base_bdev_name) 718 { 719 struct spdk_bdev_desc *base_desc; 720 struct spdk_bdev *base_bdev; 721 struct bdev_zone_block_config *name, *tmp; 722 struct bdev_zone_block *bdev_node; 723 uint64_t zone_size; 724 int rc = 0; 725 726 /* Check our list of names from config versus this bdev and if 727 * there's a match, create the bdev_node & bdev accordingly. 728 */ 729 TAILQ_FOREACH_SAFE(name, &g_bdev_configs, link, tmp) { 730 if (strcmp(name->bdev_name, base_bdev_name) != 0) { 731 continue; 732 } 733 734 rc = spdk_bdev_open_ext(base_bdev_name, true, zone_block_base_bdev_event_cb, 735 NULL, &base_desc); 736 if (rc == -ENODEV) { 737 return -ENODEV; 738 } else if (rc) { 739 SPDK_ERRLOG("could not open bdev %s\n", base_bdev_name); 740 goto free_config; 741 } 742 743 base_bdev = spdk_bdev_desc_get_bdev(base_desc); 744 745 if (spdk_bdev_is_zoned(base_bdev)) { 746 SPDK_ERRLOG("Base bdev %s is already a zoned bdev\n", base_bdev_name); 747 rc = -EEXIST; 748 goto zone_exist; 749 } 750 751 bdev_node = calloc(1, sizeof(struct bdev_zone_block)); 752 if (!bdev_node) { 753 rc = -ENOMEM; 754 SPDK_ERRLOG("could not allocate bdev_node\n"); 755 goto zone_exist; 756 } 757 758 bdev_node->base_desc = base_desc; 759 760 /* The base bdev that we're attaching to. */ 761 bdev_node->bdev.name = strdup(name->vbdev_name); 762 if (!bdev_node->bdev.name) { 763 rc = -ENOMEM; 764 SPDK_ERRLOG("could not allocate bdev_node name\n"); 765 goto strdup_failed; 766 } 767 768 zone_size = spdk_align64pow2(name->zone_capacity); 769 if (zone_size == 0) { 770 rc = -EINVAL; 771 SPDK_ERRLOG("invalid zone size\n"); 772 goto roundup_failed; 773 } 774 775 bdev_node->zone_shift = spdk_u64log2(zone_size); 776 bdev_node->num_zones = base_bdev->blockcnt / zone_size; 777 778 /* Align num_zones to optimal_open_zones */ 779 bdev_node->num_zones -= bdev_node->num_zones % name->optimal_open_zones; 780 bdev_node->zones = calloc(bdev_node->num_zones, sizeof(struct block_zone)); 781 if (!bdev_node->zones) { 782 rc = -ENOMEM; 783 SPDK_ERRLOG("could not allocate zones\n"); 784 goto calloc_failed; 785 } 786 787 bdev_node->bdev.product_name = "zone_block"; 788 789 /* Copy some properties from the underlying base bdev. */ 790 bdev_node->bdev.write_cache = base_bdev->write_cache; 791 bdev_node->bdev.required_alignment = base_bdev->required_alignment; 792 bdev_node->bdev.optimal_io_boundary = base_bdev->optimal_io_boundary; 793 794 bdev_node->bdev.blocklen = base_bdev->blocklen; 795 bdev_node->bdev.blockcnt = bdev_node->num_zones * zone_size; 796 797 if (bdev_node->num_zones * name->zone_capacity != base_bdev->blockcnt) { 798 SPDK_DEBUGLOG(vbdev_zone_block, 799 "Lost %lu blocks due to zone capacity and base bdev size misalignment\n", 800 base_bdev->blockcnt - bdev_node->num_zones * name->zone_capacity); 801 } 802 803 bdev_node->bdev.write_unit_size = base_bdev->write_unit_size; 804 805 bdev_node->bdev.md_interleave = base_bdev->md_interleave; 806 bdev_node->bdev.md_len = base_bdev->md_len; 807 bdev_node->bdev.dif_type = base_bdev->dif_type; 808 bdev_node->bdev.dif_is_head_of_md = base_bdev->dif_is_head_of_md; 809 bdev_node->bdev.dif_check_flags = base_bdev->dif_check_flags; 810 811 bdev_node->bdev.zoned = true; 812 bdev_node->bdev.ctxt = bdev_node; 813 bdev_node->bdev.fn_table = &zone_block_fn_table; 814 bdev_node->bdev.module = &bdev_zoned_if; 815 816 /* bdev specific info */ 817 bdev_node->bdev.zone_size = zone_size; 818 819 bdev_node->zone_capacity = name->zone_capacity; 820 bdev_node->bdev.optimal_open_zones = name->optimal_open_zones; 821 bdev_node->bdev.max_open_zones = 0; 822 rc = zone_block_init_zone_info(bdev_node); 823 if (rc) { 824 SPDK_ERRLOG("could not init zone info\n"); 825 goto zone_info_failed; 826 } 827 828 TAILQ_INSERT_TAIL(&g_bdev_nodes, bdev_node, link); 829 830 spdk_io_device_register(bdev_node, _zone_block_ch_create_cb, _zone_block_ch_destroy_cb, 831 sizeof(struct zone_block_io_channel), 832 name->vbdev_name); 833 834 /* Save the thread where the base device is opened */ 835 bdev_node->thread = spdk_get_thread(); 836 837 rc = spdk_bdev_module_claim_bdev(base_bdev, base_desc, bdev_node->bdev.module); 838 if (rc) { 839 SPDK_ERRLOG("could not claim bdev %s\n", base_bdev_name); 840 goto claim_failed; 841 } 842 843 rc = spdk_bdev_register(&bdev_node->bdev); 844 if (rc) { 845 SPDK_ERRLOG("could not register zoned bdev\n"); 846 goto register_failed; 847 } 848 } 849 850 return rc; 851 852 register_failed: 853 spdk_bdev_module_release_bdev(&bdev_node->bdev); 854 claim_failed: 855 TAILQ_REMOVE(&g_bdev_nodes, bdev_node, link); 856 spdk_io_device_unregister(bdev_node, NULL); 857 zone_info_failed: 858 free(bdev_node->zones); 859 calloc_failed: 860 roundup_failed: 861 free(bdev_node->bdev.name); 862 strdup_failed: 863 free(bdev_node); 864 zone_exist: 865 spdk_bdev_close(base_desc); 866 free_config: 867 zone_block_remove_config(name); 868 return rc; 869 } 870 871 int 872 vbdev_zone_block_create(const char *bdev_name, const char *vbdev_name, uint64_t zone_capacity, 873 uint64_t optimal_open_zones) 874 { 875 int rc = 0; 876 877 if (zone_capacity == 0) { 878 SPDK_ERRLOG("Zone capacity can't be 0\n"); 879 return -EINVAL; 880 } 881 882 if (optimal_open_zones == 0) { 883 SPDK_ERRLOG("Optimal open zones can't be 0\n"); 884 return -EINVAL; 885 } 886 887 /* Insert the bdev into our global name list even if it doesn't exist yet, 888 * it may show up soon... 889 */ 890 rc = zone_block_insert_name(bdev_name, vbdev_name, zone_capacity, optimal_open_zones); 891 if (rc) { 892 return rc; 893 } 894 895 rc = zone_block_register(bdev_name); 896 if (rc == -ENODEV) { 897 /* This is not an error, even though the bdev is not present at this time it may 898 * still show up later. 899 */ 900 rc = 0; 901 } 902 return rc; 903 } 904 905 void 906 vbdev_zone_block_delete(const char *name, spdk_bdev_unregister_cb cb_fn, void *cb_arg) 907 { 908 struct bdev_zone_block_config *name_node; 909 struct spdk_bdev *bdev = NULL; 910 911 bdev = spdk_bdev_get_by_name(name); 912 if (!bdev || bdev->module != &bdev_zoned_if) { 913 cb_fn(cb_arg, -ENODEV); 914 return; 915 } 916 917 TAILQ_FOREACH(name_node, &g_bdev_configs, link) { 918 if (strcmp(name_node->vbdev_name, bdev->name) == 0) { 919 zone_block_remove_config(name_node); 920 break; 921 } 922 } 923 924 spdk_bdev_unregister(bdev, cb_fn, cb_arg); 925 } 926 927 static void 928 zone_block_examine(struct spdk_bdev *bdev) 929 { 930 zone_block_register(bdev->name); 931 932 spdk_bdev_module_examine_done(&bdev_zoned_if); 933 } 934 935 SPDK_LOG_REGISTER_COMPONENT(vbdev_zone_block) 936