1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "vbdev_zone_block.h" 37 38 #include "spdk/config.h" 39 #include "spdk/nvme.h" 40 #include "spdk/bdev_zone.h" 41 42 #include "spdk/log.h" 43 44 static int zone_block_init(void); 45 static int zone_block_get_ctx_size(void); 46 static void zone_block_finish(void); 47 static int zone_block_config_json(struct spdk_json_write_ctx *w); 48 static void zone_block_examine(struct spdk_bdev *bdev); 49 50 static struct spdk_bdev_module bdev_zoned_if = { 51 .name = "bdev_zoned_block", 52 .module_init = zone_block_init, 53 .module_fini = zone_block_finish, 54 .config_text = NULL, 55 .config_json = zone_block_config_json, 56 .examine_config = zone_block_examine, 57 .get_ctx_size = zone_block_get_ctx_size, 58 }; 59 60 SPDK_BDEV_MODULE_REGISTER(bdev_zoned_block, &bdev_zoned_if) 61 62 /* List of block vbdev names and their base bdevs via configuration file. 63 * Used so we can parse the conf once at init and use this list in examine(). 64 */ 65 struct bdev_zone_block_config { 66 char *vbdev_name; 67 char *bdev_name; 68 uint64_t zone_capacity; 69 uint64_t optimal_open_zones; 70 TAILQ_ENTRY(bdev_zone_block_config) link; 71 }; 72 static TAILQ_HEAD(, bdev_zone_block_config) g_bdev_configs = TAILQ_HEAD_INITIALIZER(g_bdev_configs); 73 74 struct block_zone { 75 struct spdk_bdev_zone_info zone_info; 76 pthread_spinlock_t lock; 77 }; 78 79 /* List of block vbdevs and associated info for each. */ 80 struct bdev_zone_block { 81 struct spdk_bdev bdev; /* the block zoned bdev */ 82 struct spdk_bdev_desc *base_desc; /* its descriptor we get from open */ 83 struct block_zone *zones; /* array of zones */ 84 uint64_t num_zones; /* number of zones */ 85 uint64_t zone_capacity; /* zone capacity */ 86 uint64_t zone_shift; /* log2 of zone_size */ 87 TAILQ_ENTRY(bdev_zone_block) link; 88 struct spdk_thread *thread; /* thread where base device is opened */ 89 }; 90 static TAILQ_HEAD(, bdev_zone_block) g_bdev_nodes = TAILQ_HEAD_INITIALIZER(g_bdev_nodes); 91 92 struct zone_block_io_channel { 93 struct spdk_io_channel *base_ch; /* IO channel of base device */ 94 }; 95 96 struct zone_block_io { 97 /* vbdev to which IO was issued */ 98 struct bdev_zone_block *bdev_zone_block; 99 }; 100 101 static int 102 zone_block_init(void) 103 { 104 return 0; 105 } 106 107 static void 108 zone_block_remove_config(struct bdev_zone_block_config *name) 109 { 110 TAILQ_REMOVE(&g_bdev_configs, name, link); 111 free(name->bdev_name); 112 free(name->vbdev_name); 113 free(name); 114 } 115 116 static void 117 zone_block_finish(void) 118 { 119 struct bdev_zone_block_config *name; 120 121 while ((name = TAILQ_FIRST(&g_bdev_configs))) { 122 zone_block_remove_config(name); 123 } 124 } 125 126 static int 127 zone_block_get_ctx_size(void) 128 { 129 return sizeof(struct zone_block_io); 130 } 131 132 static int 133 zone_block_config_json(struct spdk_json_write_ctx *w) 134 { 135 struct bdev_zone_block *bdev_node; 136 struct spdk_bdev *base_bdev = NULL; 137 138 TAILQ_FOREACH(bdev_node, &g_bdev_nodes, link) { 139 base_bdev = spdk_bdev_desc_get_bdev(bdev_node->base_desc); 140 spdk_json_write_object_begin(w); 141 spdk_json_write_named_string(w, "method", "bdev_zone_block_create"); 142 spdk_json_write_named_object_begin(w, "params"); 143 spdk_json_write_named_string(w, "base_bdev", spdk_bdev_get_name(base_bdev)); 144 spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&bdev_node->bdev)); 145 spdk_json_write_named_uint64(w, "zone_capacity", bdev_node->zone_capacity); 146 spdk_json_write_named_uint64(w, "optimal_open_zones", bdev_node->bdev.optimal_open_zones); 147 spdk_json_write_object_end(w); 148 spdk_json_write_object_end(w); 149 } 150 151 return 0; 152 } 153 154 /* Callback for unregistering the IO device. */ 155 static void 156 _device_unregister_cb(void *io_device) 157 { 158 struct bdev_zone_block *bdev_node = io_device; 159 uint64_t i; 160 161 free(bdev_node->bdev.name); 162 for (i = 0; i < bdev_node->num_zones; i++) { 163 pthread_spin_destroy(&bdev_node->zones[i].lock); 164 } 165 free(bdev_node->zones); 166 free(bdev_node); 167 } 168 169 static void 170 _zone_block_destruct(void *ctx) 171 { 172 struct spdk_bdev_desc *desc = ctx; 173 174 spdk_bdev_close(desc); 175 } 176 177 static int 178 zone_block_destruct(void *ctx) 179 { 180 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 181 182 TAILQ_REMOVE(&g_bdev_nodes, bdev_node, link); 183 184 /* Unclaim the underlying bdev. */ 185 spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(bdev_node->base_desc)); 186 187 /* Close the underlying bdev on its same opened thread. */ 188 if (bdev_node->thread && bdev_node->thread != spdk_get_thread()) { 189 spdk_thread_send_msg(bdev_node->thread, _zone_block_destruct, bdev_node->base_desc); 190 } else { 191 spdk_bdev_close(bdev_node->base_desc); 192 } 193 194 /* Unregister the io_device. */ 195 spdk_io_device_unregister(bdev_node, _device_unregister_cb); 196 197 return 0; 198 } 199 200 static struct block_zone * 201 zone_block_get_zone_containing_lba(struct bdev_zone_block *bdev_node, uint64_t lba) 202 { 203 size_t index = lba >> bdev_node->zone_shift; 204 205 if (index >= bdev_node->num_zones) { 206 return NULL; 207 } 208 209 return &bdev_node->zones[index]; 210 } 211 212 static struct block_zone * 213 zone_block_get_zone_by_slba(struct bdev_zone_block *bdev_node, uint64_t start_lba) 214 { 215 struct block_zone *zone = zone_block_get_zone_containing_lba(bdev_node, start_lba); 216 217 if (zone && zone->zone_info.zone_id == start_lba) { 218 return zone; 219 } else { 220 return NULL; 221 } 222 } 223 224 static int 225 zone_block_get_zone_info(struct bdev_zone_block *bdev_node, struct spdk_bdev_io *bdev_io) 226 { 227 struct block_zone *zone; 228 struct spdk_bdev_zone_info *zone_info = bdev_io->u.zone_mgmt.buf; 229 uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id; 230 size_t i; 231 232 /* User can request info for more zones than exist, need to check both internal and user 233 * boundaries 234 */ 235 for (i = 0; i < bdev_io->u.zone_mgmt.num_zones; i++, zone_id += bdev_node->bdev.zone_size) { 236 zone = zone_block_get_zone_by_slba(bdev_node, zone_id); 237 if (!zone) { 238 return -EINVAL; 239 } 240 memcpy(&zone_info[i], &zone->zone_info, sizeof(*zone_info)); 241 } 242 243 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 244 return 0; 245 } 246 247 static int 248 zone_block_open_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 249 { 250 pthread_spin_lock(&zone->lock); 251 252 switch (zone->zone_info.state) { 253 case SPDK_BDEV_ZONE_STATE_EMPTY: 254 case SPDK_BDEV_ZONE_STATE_OPEN: 255 case SPDK_BDEV_ZONE_STATE_CLOSED: 256 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_OPEN; 257 pthread_spin_unlock(&zone->lock); 258 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 259 return 0; 260 default: 261 pthread_spin_unlock(&zone->lock); 262 return -EINVAL; 263 } 264 } 265 266 static void 267 _zone_block_complete_unmap(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 268 { 269 struct spdk_bdev_io *orig_io = cb_arg; 270 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 271 272 /* Complete the original IO and then free the one that we created here 273 * as a result of issuing an IO via submit_reqeust. 274 */ 275 spdk_bdev_io_complete(orig_io, status); 276 spdk_bdev_free_io(bdev_io); 277 } 278 279 static int 280 zone_block_reset_zone(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 281 struct block_zone *zone, struct spdk_bdev_io *bdev_io) 282 { 283 pthread_spin_lock(&zone->lock); 284 285 switch (zone->zone_info.state) { 286 case SPDK_BDEV_ZONE_STATE_EMPTY: 287 pthread_spin_unlock(&zone->lock); 288 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 289 return 0; 290 case SPDK_BDEV_ZONE_STATE_OPEN: 291 case SPDK_BDEV_ZONE_STATE_FULL: 292 case SPDK_BDEV_ZONE_STATE_CLOSED: 293 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_EMPTY; 294 zone->zone_info.write_pointer = zone->zone_info.zone_id; 295 pthread_spin_unlock(&zone->lock); 296 return spdk_bdev_unmap_blocks(bdev_node->base_desc, ch->base_ch, 297 zone->zone_info.zone_id, zone->zone_info.capacity, 298 _zone_block_complete_unmap, bdev_io); 299 default: 300 pthread_spin_unlock(&zone->lock); 301 return -EINVAL; 302 } 303 } 304 305 static int 306 zone_block_close_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 307 { 308 pthread_spin_lock(&zone->lock); 309 310 switch (zone->zone_info.state) { 311 case SPDK_BDEV_ZONE_STATE_OPEN: 312 case SPDK_BDEV_ZONE_STATE_CLOSED: 313 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_CLOSED; 314 pthread_spin_unlock(&zone->lock); 315 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 316 return 0; 317 default: 318 pthread_spin_unlock(&zone->lock); 319 return -EINVAL; 320 } 321 } 322 323 static int 324 zone_block_finish_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 325 { 326 pthread_spin_lock(&zone->lock); 327 328 zone->zone_info.write_pointer = zone->zone_info.zone_id + zone->zone_info.capacity; 329 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 330 331 pthread_spin_unlock(&zone->lock); 332 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 333 return 0; 334 } 335 336 static int 337 zone_block_zone_management(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 338 struct spdk_bdev_io *bdev_io) 339 { 340 struct block_zone *zone; 341 342 zone = zone_block_get_zone_by_slba(bdev_node, bdev_io->u.zone_mgmt.zone_id); 343 if (!zone) { 344 return -EINVAL; 345 } 346 347 switch (bdev_io->u.zone_mgmt.zone_action) { 348 case SPDK_BDEV_ZONE_RESET: 349 return zone_block_reset_zone(bdev_node, ch, zone, bdev_io); 350 case SPDK_BDEV_ZONE_OPEN: 351 return zone_block_open_zone(zone, bdev_io); 352 case SPDK_BDEV_ZONE_CLOSE: 353 return zone_block_close_zone(zone, bdev_io); 354 case SPDK_BDEV_ZONE_FINISH: 355 return zone_block_finish_zone(zone, bdev_io); 356 default: 357 return -EINVAL; 358 } 359 } 360 361 static void 362 _zone_block_complete_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 363 { 364 struct spdk_bdev_io *orig_io = cb_arg; 365 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 366 367 if (success && orig_io->type == SPDK_BDEV_IO_TYPE_ZONE_APPEND) { 368 orig_io->u.bdev.offset_blocks = bdev_io->u.bdev.offset_blocks; 369 } 370 371 /* Complete the original IO and then free the one that we created here 372 * as a result of issuing an IO via submit_reqeust. 373 */ 374 spdk_bdev_io_complete(orig_io, status); 375 spdk_bdev_free_io(bdev_io); 376 } 377 378 static int 379 zone_block_write(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 380 struct spdk_bdev_io *bdev_io) 381 { 382 struct block_zone *zone; 383 uint64_t len = bdev_io->u.bdev.num_blocks; 384 uint64_t lba = bdev_io->u.bdev.offset_blocks; 385 uint64_t num_blocks_left, wp; 386 int rc = 0; 387 bool is_append = bdev_io->type == SPDK_BDEV_IO_TYPE_ZONE_APPEND; 388 389 if (is_append) { 390 zone = zone_block_get_zone_by_slba(bdev_node, lba); 391 } else { 392 zone = zone_block_get_zone_containing_lba(bdev_node, lba); 393 } 394 if (!zone) { 395 SPDK_ERRLOG("Trying to write to invalid zone (lba 0x%lx)\n", lba); 396 return -EINVAL; 397 } 398 399 pthread_spin_lock(&zone->lock); 400 401 switch (zone->zone_info.state) { 402 case SPDK_BDEV_ZONE_STATE_OPEN: 403 case SPDK_BDEV_ZONE_STATE_EMPTY: 404 case SPDK_BDEV_ZONE_STATE_CLOSED: 405 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_OPEN; 406 break; 407 default: 408 SPDK_ERRLOG("Trying to write to zone in invalid state %u\n", zone->zone_info.state); 409 rc = -EINVAL; 410 goto write_fail; 411 } 412 413 wp = zone->zone_info.write_pointer; 414 if (is_append) { 415 lba = wp; 416 } else { 417 if (lba != wp) { 418 SPDK_ERRLOG("Trying to write to zone with invalid address (lba 0x%lx, wp 0x%lx)\n", lba, wp); 419 rc = -EINVAL; 420 goto write_fail; 421 } 422 } 423 424 num_blocks_left = zone->zone_info.zone_id + zone->zone_info.capacity - wp; 425 if (len > num_blocks_left) { 426 SPDK_ERRLOG("Write exceeds zone capacity (lba 0x%" PRIu64 ", len 0x%lx, wp 0x%lx)\n", lba, len, wp); 427 rc = -EINVAL; 428 goto write_fail; 429 } 430 431 zone->zone_info.write_pointer += bdev_io->u.bdev.num_blocks; 432 assert(zone->zone_info.write_pointer <= zone->zone_info.zone_id + zone->zone_info.capacity); 433 if (zone->zone_info.write_pointer == zone->zone_info.zone_id + zone->zone_info.capacity) { 434 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 435 } 436 pthread_spin_unlock(&zone->lock); 437 438 if (bdev_io->u.bdev.md_buf == NULL) { 439 rc = spdk_bdev_writev_blocks(bdev_node->base_desc, ch->base_ch, bdev_io->u.bdev.iovs, 440 bdev_io->u.bdev.iovcnt, lba, 441 bdev_io->u.bdev.num_blocks, _zone_block_complete_write, 442 bdev_io); 443 } else { 444 rc = spdk_bdev_writev_blocks_with_md(bdev_node->base_desc, ch->base_ch, 445 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 446 bdev_io->u.bdev.md_buf, 447 lba, bdev_io->u.bdev.num_blocks, 448 _zone_block_complete_write, bdev_io); 449 } 450 451 return rc; 452 453 write_fail: 454 pthread_spin_unlock(&zone->lock); 455 return rc; 456 } 457 458 static void 459 _zone_block_complete_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 460 { 461 struct spdk_bdev_io *orig_io = cb_arg; 462 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 463 464 /* Complete the original IO and then free the one that we created here 465 * as a result of issuing an IO via submit_reqeust. 466 */ 467 spdk_bdev_io_complete(orig_io, status); 468 spdk_bdev_free_io(bdev_io); 469 } 470 471 static int 472 zone_block_read(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 473 struct spdk_bdev_io *bdev_io) 474 { 475 struct block_zone *zone; 476 uint64_t len = bdev_io->u.bdev.num_blocks; 477 uint64_t lba = bdev_io->u.bdev.offset_blocks; 478 int rc; 479 480 zone = zone_block_get_zone_containing_lba(bdev_node, lba); 481 if (!zone) { 482 SPDK_ERRLOG("Trying to read from invalid zone (lba 0x%lx)\n", lba); 483 return -EINVAL; 484 } 485 486 if ((lba + len) > (zone->zone_info.zone_id + zone->zone_info.capacity)) { 487 SPDK_ERRLOG("Read exceeds zone capacity (lba 0x%lx, len 0x%lx)\n", lba, len); 488 return -EINVAL; 489 } 490 491 if (bdev_io->u.bdev.md_buf == NULL) { 492 rc = spdk_bdev_readv_blocks(bdev_node->base_desc, ch->base_ch, bdev_io->u.bdev.iovs, 493 bdev_io->u.bdev.iovcnt, lba, 494 len, _zone_block_complete_read, 495 bdev_io); 496 } else { 497 rc = spdk_bdev_readv_blocks_with_md(bdev_node->base_desc, ch->base_ch, 498 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 499 bdev_io->u.bdev.md_buf, 500 lba, len, 501 _zone_block_complete_read, bdev_io); 502 } 503 504 return rc; 505 } 506 507 static void 508 zone_block_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 509 { 510 struct bdev_zone_block *bdev_node = SPDK_CONTAINEROF(bdev_io->bdev, struct bdev_zone_block, bdev); 511 struct zone_block_io_channel *dev_ch = spdk_io_channel_get_ctx(ch); 512 int rc = 0; 513 514 switch (bdev_io->type) { 515 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 516 rc = zone_block_get_zone_info(bdev_node, bdev_io); 517 break; 518 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 519 rc = zone_block_zone_management(bdev_node, dev_ch, bdev_io); 520 break; 521 case SPDK_BDEV_IO_TYPE_WRITE: 522 case SPDK_BDEV_IO_TYPE_ZONE_APPEND: 523 rc = zone_block_write(bdev_node, dev_ch, bdev_io); 524 break; 525 case SPDK_BDEV_IO_TYPE_READ: 526 rc = zone_block_read(bdev_node, dev_ch, bdev_io); 527 break; 528 default: 529 SPDK_ERRLOG("vbdev_block: unknown I/O type %u\n", bdev_io->type); 530 rc = -ENOTSUP; 531 break; 532 } 533 534 if (rc != 0) { 535 if (rc == -ENOMEM) { 536 SPDK_WARNLOG("ENOMEM, start to queue io for vbdev.\n"); 537 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 538 } else { 539 SPDK_ERRLOG("ERROR on bdev_io submission!\n"); 540 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 541 } 542 } 543 } 544 545 static bool 546 zone_block_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 547 { 548 switch (io_type) { 549 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 550 case SPDK_BDEV_IO_TYPE_WRITE: 551 case SPDK_BDEV_IO_TYPE_READ: 552 case SPDK_BDEV_IO_TYPE_ZONE_APPEND: 553 return true; 554 default: 555 return false; 556 } 557 } 558 559 static struct spdk_io_channel * 560 zone_block_get_io_channel(void *ctx) 561 { 562 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 563 564 return spdk_get_io_channel(bdev_node); 565 } 566 567 static int 568 zone_block_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 569 { 570 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 571 struct spdk_bdev *base_bdev = spdk_bdev_desc_get_bdev(bdev_node->base_desc); 572 573 spdk_json_write_name(w, "zoned_block"); 574 spdk_json_write_object_begin(w); 575 spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&bdev_node->bdev)); 576 spdk_json_write_named_string(w, "base_bdev", spdk_bdev_get_name(base_bdev)); 577 spdk_json_write_named_uint64(w, "zone_capacity", bdev_node->zone_capacity); 578 spdk_json_write_named_uint64(w, "optimal_open_zones", bdev_node->bdev.optimal_open_zones); 579 spdk_json_write_object_end(w); 580 581 return 0; 582 } 583 584 /* When we register our vbdev this is how we specify our entry points. */ 585 static const struct spdk_bdev_fn_table zone_block_fn_table = { 586 .destruct = zone_block_destruct, 587 .submit_request = zone_block_submit_request, 588 .io_type_supported = zone_block_io_type_supported, 589 .get_io_channel = zone_block_get_io_channel, 590 .dump_info_json = zone_block_dump_info_json, 591 }; 592 593 static void 594 zone_block_base_bdev_hotremove_cb(void *ctx) 595 { 596 struct bdev_zone_block *bdev_node, *tmp; 597 struct spdk_bdev *bdev_find = ctx; 598 599 TAILQ_FOREACH_SAFE(bdev_node, &g_bdev_nodes, link, tmp) { 600 if (bdev_find == spdk_bdev_desc_get_bdev(bdev_node->base_desc)) { 601 spdk_bdev_unregister(&bdev_node->bdev, NULL, NULL); 602 } 603 } 604 } 605 606 static int 607 _zone_block_ch_create_cb(void *io_device, void *ctx_buf) 608 { 609 struct zone_block_io_channel *bdev_ch = ctx_buf; 610 struct bdev_zone_block *bdev_node = io_device; 611 612 bdev_ch->base_ch = spdk_bdev_get_io_channel(bdev_node->base_desc); 613 if (!bdev_ch->base_ch) { 614 return -ENOMEM; 615 } 616 617 return 0; 618 } 619 620 static void 621 _zone_block_ch_destroy_cb(void *io_device, void *ctx_buf) 622 { 623 struct zone_block_io_channel *bdev_ch = ctx_buf; 624 625 spdk_put_io_channel(bdev_ch->base_ch); 626 } 627 628 static int 629 zone_block_insert_name(const char *bdev_name, const char *vbdev_name, uint64_t zone_capacity, 630 uint64_t optimal_open_zones) 631 { 632 struct bdev_zone_block_config *name; 633 634 TAILQ_FOREACH(name, &g_bdev_configs, link) { 635 if (strcmp(vbdev_name, name->vbdev_name) == 0) { 636 SPDK_ERRLOG("block zoned bdev %s already exists\n", vbdev_name); 637 return -EEXIST; 638 } 639 if (strcmp(bdev_name, name->bdev_name) == 0) { 640 SPDK_ERRLOG("base bdev %s already claimed\n", bdev_name); 641 return -EEXIST; 642 } 643 } 644 645 name = calloc(1, sizeof(*name)); 646 if (!name) { 647 SPDK_ERRLOG("could not allocate bdev_names\n"); 648 return -ENOMEM; 649 } 650 651 name->bdev_name = strdup(bdev_name); 652 if (!name->bdev_name) { 653 SPDK_ERRLOG("could not allocate name->bdev_name\n"); 654 free(name); 655 return -ENOMEM; 656 } 657 658 name->vbdev_name = strdup(vbdev_name); 659 if (!name->vbdev_name) { 660 SPDK_ERRLOG("could not allocate name->vbdev_name\n"); 661 free(name->bdev_name); 662 free(name); 663 return -ENOMEM; 664 } 665 666 name->zone_capacity = zone_capacity; 667 name->optimal_open_zones = optimal_open_zones; 668 669 TAILQ_INSERT_TAIL(&g_bdev_configs, name, link); 670 671 return 0; 672 } 673 674 static int 675 zone_block_init_zone_info(struct bdev_zone_block *bdev_node) 676 { 677 size_t i; 678 struct block_zone *zone; 679 int rc = 0; 680 681 for (i = 0; i < bdev_node->num_zones; i++) { 682 zone = &bdev_node->zones[i]; 683 zone->zone_info.zone_id = bdev_node->bdev.zone_size * i; 684 zone->zone_info.capacity = bdev_node->zone_capacity; 685 zone->zone_info.write_pointer = zone->zone_info.zone_id + zone->zone_info.capacity; 686 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 687 if (pthread_spin_init(&zone->lock, PTHREAD_PROCESS_PRIVATE)) { 688 SPDK_ERRLOG("pthread_spin_init() failed\n"); 689 rc = -ENOMEM; 690 break; 691 } 692 } 693 694 if (rc) { 695 for (; i > 0; i--) { 696 pthread_spin_destroy(&bdev_node->zones[i - 1].lock); 697 } 698 } 699 700 return rc; 701 } 702 703 static int 704 zone_block_register(struct spdk_bdev *base_bdev) 705 { 706 struct bdev_zone_block_config *name, *tmp; 707 struct bdev_zone_block *bdev_node; 708 uint64_t zone_size; 709 int rc = 0; 710 711 /* Check our list of names from config versus this bdev and if 712 * there's a match, create the bdev_node & bdev accordingly. 713 */ 714 TAILQ_FOREACH_SAFE(name, &g_bdev_configs, link, tmp) { 715 if (strcmp(name->bdev_name, base_bdev->name) != 0) { 716 continue; 717 } 718 719 if (spdk_bdev_is_zoned(base_bdev)) { 720 SPDK_ERRLOG("Base bdev %s is already a zoned bdev\n", base_bdev->name); 721 rc = -EEXIST; 722 goto free_config; 723 } 724 725 bdev_node = calloc(1, sizeof(struct bdev_zone_block)); 726 if (!bdev_node) { 727 rc = -ENOMEM; 728 SPDK_ERRLOG("could not allocate bdev_node\n"); 729 goto free_config; 730 } 731 732 /* The base bdev that we're attaching to. */ 733 bdev_node->bdev.name = strdup(name->vbdev_name); 734 if (!bdev_node->bdev.name) { 735 rc = -ENOMEM; 736 SPDK_ERRLOG("could not allocate bdev_node name\n"); 737 goto strdup_failed; 738 } 739 740 zone_size = spdk_align64pow2(name->zone_capacity); 741 if (zone_size == 0) { 742 rc = -EINVAL; 743 SPDK_ERRLOG("invalid zone size\n"); 744 goto roundup_failed; 745 } 746 747 bdev_node->zone_shift = spdk_u64log2(zone_size); 748 bdev_node->num_zones = base_bdev->blockcnt / zone_size; 749 750 /* Align num_zones to optimal_open_zones */ 751 bdev_node->num_zones -= bdev_node->num_zones % name->optimal_open_zones; 752 bdev_node->zones = calloc(bdev_node->num_zones, sizeof(struct block_zone)); 753 if (!bdev_node->zones) { 754 rc = -ENOMEM; 755 SPDK_ERRLOG("could not allocate zones\n"); 756 goto calloc_failed; 757 } 758 759 bdev_node->bdev.product_name = "zone_block"; 760 761 /* Copy some properties from the underlying base bdev. */ 762 bdev_node->bdev.write_cache = base_bdev->write_cache; 763 bdev_node->bdev.required_alignment = base_bdev->required_alignment; 764 bdev_node->bdev.optimal_io_boundary = base_bdev->optimal_io_boundary; 765 766 bdev_node->bdev.blocklen = base_bdev->blocklen; 767 bdev_node->bdev.blockcnt = bdev_node->num_zones * zone_size; 768 769 if (bdev_node->num_zones * name->zone_capacity != base_bdev->blockcnt) { 770 SPDK_DEBUGLOG(vbdev_zone_block, 771 "Lost %lu blocks due to zone capacity and base bdev size misalignment\n", 772 base_bdev->blockcnt - bdev_node->num_zones * name->zone_capacity); 773 } 774 775 bdev_node->bdev.write_unit_size = base_bdev->write_unit_size; 776 777 bdev_node->bdev.md_interleave = base_bdev->md_interleave; 778 bdev_node->bdev.md_len = base_bdev->md_len; 779 bdev_node->bdev.dif_type = base_bdev->dif_type; 780 bdev_node->bdev.dif_is_head_of_md = base_bdev->dif_is_head_of_md; 781 bdev_node->bdev.dif_check_flags = base_bdev->dif_check_flags; 782 783 bdev_node->bdev.zoned = true; 784 bdev_node->bdev.ctxt = bdev_node; 785 bdev_node->bdev.fn_table = &zone_block_fn_table; 786 bdev_node->bdev.module = &bdev_zoned_if; 787 788 /* bdev specific info */ 789 bdev_node->bdev.zone_size = zone_size; 790 791 bdev_node->zone_capacity = name->zone_capacity; 792 bdev_node->bdev.optimal_open_zones = name->optimal_open_zones; 793 bdev_node->bdev.max_open_zones = 0; 794 rc = zone_block_init_zone_info(bdev_node); 795 if (rc) { 796 SPDK_ERRLOG("could not init zone info\n"); 797 goto zone_info_failed; 798 } 799 800 TAILQ_INSERT_TAIL(&g_bdev_nodes, bdev_node, link); 801 802 spdk_io_device_register(bdev_node, _zone_block_ch_create_cb, _zone_block_ch_destroy_cb, 803 sizeof(struct zone_block_io_channel), 804 name->vbdev_name); 805 806 rc = spdk_bdev_open(base_bdev, true, zone_block_base_bdev_hotremove_cb, 807 base_bdev, &bdev_node->base_desc); 808 if (rc) { 809 SPDK_ERRLOG("could not open bdev %s\n", spdk_bdev_get_name(base_bdev)); 810 goto open_failed; 811 } 812 813 /* Save the thread where the base device is opened */ 814 bdev_node->thread = spdk_get_thread(); 815 816 rc = spdk_bdev_module_claim_bdev(base_bdev, bdev_node->base_desc, bdev_node->bdev.module); 817 if (rc) { 818 SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(base_bdev)); 819 goto claim_failed; 820 } 821 822 rc = spdk_bdev_register(&bdev_node->bdev); 823 if (rc) { 824 SPDK_ERRLOG("could not register zoned bdev\n"); 825 goto register_failed; 826 } 827 } 828 829 return rc; 830 831 register_failed: 832 spdk_bdev_module_release_bdev(&bdev_node->bdev); 833 claim_failed: 834 spdk_bdev_close(bdev_node->base_desc); 835 open_failed: 836 TAILQ_REMOVE(&g_bdev_nodes, bdev_node, link); 837 spdk_io_device_unregister(bdev_node, NULL); 838 zone_info_failed: 839 free(bdev_node->zones); 840 calloc_failed: 841 roundup_failed: 842 free(bdev_node->bdev.name); 843 strdup_failed: 844 free(bdev_node); 845 free_config: 846 zone_block_remove_config(name); 847 return rc; 848 } 849 850 int 851 vbdev_zone_block_create(const char *bdev_name, const char *vbdev_name, uint64_t zone_capacity, 852 uint64_t optimal_open_zones) 853 { 854 struct spdk_bdev *bdev = NULL; 855 int rc = 0; 856 857 if (zone_capacity == 0) { 858 SPDK_ERRLOG("Zone capacity can't be 0\n"); 859 return -EINVAL; 860 } 861 862 if (optimal_open_zones == 0) { 863 SPDK_ERRLOG("Optimal open zones can't be 0\n"); 864 return -EINVAL; 865 } 866 867 /* Insert the bdev into our global name list even if it doesn't exist yet, 868 * it may show up soon... 869 */ 870 rc = zone_block_insert_name(bdev_name, vbdev_name, zone_capacity, optimal_open_zones); 871 if (rc) { 872 return rc; 873 } 874 875 bdev = spdk_bdev_get_by_name(bdev_name); 876 if (!bdev) { 877 /* This is not an error, even though the bdev is not present at this time it may 878 * still show up later. 879 */ 880 return 0; 881 } 882 883 return zone_block_register(bdev); 884 } 885 886 void 887 vbdev_zone_block_delete(const char *name, spdk_bdev_unregister_cb cb_fn, void *cb_arg) 888 { 889 struct bdev_zone_block_config *name_node; 890 struct spdk_bdev *bdev = NULL; 891 892 bdev = spdk_bdev_get_by_name(name); 893 if (!bdev || bdev->module != &bdev_zoned_if) { 894 cb_fn(cb_arg, -ENODEV); 895 return; 896 } 897 898 TAILQ_FOREACH(name_node, &g_bdev_configs, link) { 899 if (strcmp(name_node->vbdev_name, bdev->name) == 0) { 900 zone_block_remove_config(name_node); 901 break; 902 } 903 } 904 905 spdk_bdev_unregister(bdev, cb_fn, cb_arg); 906 } 907 908 static void 909 zone_block_examine(struct spdk_bdev *bdev) 910 { 911 zone_block_register(bdev); 912 913 spdk_bdev_module_examine_done(&bdev_zoned_if); 914 } 915 916 SPDK_LOG_REGISTER_COMPONENT(vbdev_zone_block) 917