1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2019 Intel Corporation. 3 * Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. 4 * All rights reserved. 5 */ 6 7 #include "spdk/stdinc.h" 8 9 #include "vbdev_zone_block.h" 10 11 #include "spdk/config.h" 12 #include "spdk/nvme.h" 13 #include "spdk/bdev_zone.h" 14 15 #include "spdk/log.h" 16 17 /* This namespace UUID was generated using uuid_generate() method. */ 18 #define BDEV_ZONE_BLOCK_NAMESPACE_UUID "5f3f485a-d6bb-4443-9de7-023683b77389" 19 20 static int zone_block_init(void); 21 static int zone_block_get_ctx_size(void); 22 static void zone_block_finish(void); 23 static int zone_block_config_json(struct spdk_json_write_ctx *w); 24 static void zone_block_examine(struct spdk_bdev *bdev); 25 26 static struct spdk_bdev_module bdev_zoned_if = { 27 .name = "bdev_zoned_block", 28 .module_init = zone_block_init, 29 .module_fini = zone_block_finish, 30 .config_json = zone_block_config_json, 31 .examine_config = zone_block_examine, 32 .get_ctx_size = zone_block_get_ctx_size, 33 }; 34 35 SPDK_BDEV_MODULE_REGISTER(bdev_zoned_block, &bdev_zoned_if) 36 37 /* List of block vbdev names and their base bdevs via configuration file. 38 * Used so we can parse the conf once at init and use this list in examine(). 39 */ 40 struct bdev_zone_block_config { 41 char *vbdev_name; 42 char *bdev_name; 43 uint64_t zone_capacity; 44 uint64_t optimal_open_zones; 45 TAILQ_ENTRY(bdev_zone_block_config) link; 46 }; 47 static TAILQ_HEAD(, bdev_zone_block_config) g_bdev_configs = TAILQ_HEAD_INITIALIZER(g_bdev_configs); 48 49 struct block_zone { 50 struct spdk_bdev_zone_info zone_info; 51 pthread_spinlock_t lock; 52 }; 53 54 /* List of block vbdevs and associated info for each. */ 55 struct bdev_zone_block { 56 struct spdk_bdev bdev; /* the block zoned bdev */ 57 struct spdk_bdev_desc *base_desc; /* its descriptor we get from open */ 58 struct block_zone *zones; /* array of zones */ 59 uint64_t num_zones; /* number of zones */ 60 uint64_t zone_capacity; /* zone capacity */ 61 uint64_t zone_shift; /* log2 of zone_size */ 62 TAILQ_ENTRY(bdev_zone_block) link; 63 struct spdk_thread *thread; /* thread where base device is opened */ 64 }; 65 static TAILQ_HEAD(, bdev_zone_block) g_bdev_nodes = TAILQ_HEAD_INITIALIZER(g_bdev_nodes); 66 67 struct zone_block_io_channel { 68 struct spdk_io_channel *base_ch; /* IO channel of base device */ 69 }; 70 71 struct zone_block_io { 72 /* vbdev to which IO was issued */ 73 struct bdev_zone_block *bdev_zone_block; 74 }; 75 76 static int 77 zone_block_init(void) 78 { 79 return 0; 80 } 81 82 static void 83 zone_block_remove_config(struct bdev_zone_block_config *name) 84 { 85 TAILQ_REMOVE(&g_bdev_configs, name, link); 86 free(name->bdev_name); 87 free(name->vbdev_name); 88 free(name); 89 } 90 91 static void 92 zone_block_finish(void) 93 { 94 struct bdev_zone_block_config *name; 95 96 while ((name = TAILQ_FIRST(&g_bdev_configs))) { 97 zone_block_remove_config(name); 98 } 99 } 100 101 static int 102 zone_block_get_ctx_size(void) 103 { 104 return sizeof(struct zone_block_io); 105 } 106 107 static int 108 zone_block_config_json(struct spdk_json_write_ctx *w) 109 { 110 struct bdev_zone_block *bdev_node; 111 struct spdk_bdev *base_bdev = NULL; 112 113 TAILQ_FOREACH(bdev_node, &g_bdev_nodes, link) { 114 base_bdev = spdk_bdev_desc_get_bdev(bdev_node->base_desc); 115 spdk_json_write_object_begin(w); 116 spdk_json_write_named_string(w, "method", "bdev_zone_block_create"); 117 spdk_json_write_named_object_begin(w, "params"); 118 spdk_json_write_named_string(w, "base_bdev", spdk_bdev_get_name(base_bdev)); 119 spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&bdev_node->bdev)); 120 spdk_json_write_named_uint64(w, "zone_capacity", bdev_node->zone_capacity); 121 spdk_json_write_named_uint64(w, "optimal_open_zones", bdev_node->bdev.optimal_open_zones); 122 spdk_json_write_object_end(w); 123 spdk_json_write_object_end(w); 124 } 125 126 return 0; 127 } 128 129 /* Callback for unregistering the IO device. */ 130 static void 131 _device_unregister_cb(void *io_device) 132 { 133 struct bdev_zone_block *bdev_node = io_device; 134 uint64_t i; 135 136 free(bdev_node->bdev.name); 137 for (i = 0; i < bdev_node->num_zones; i++) { 138 pthread_spin_destroy(&bdev_node->zones[i].lock); 139 } 140 free(bdev_node->zones); 141 free(bdev_node); 142 } 143 144 static void 145 _zone_block_destruct(void *ctx) 146 { 147 struct spdk_bdev_desc *desc = ctx; 148 149 spdk_bdev_close(desc); 150 } 151 152 static int 153 zone_block_destruct(void *ctx) 154 { 155 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 156 157 TAILQ_REMOVE(&g_bdev_nodes, bdev_node, link); 158 159 /* Unclaim the underlying bdev. */ 160 spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(bdev_node->base_desc)); 161 162 /* Close the underlying bdev on its same opened thread. */ 163 if (bdev_node->thread && bdev_node->thread != spdk_get_thread()) { 164 spdk_thread_send_msg(bdev_node->thread, _zone_block_destruct, bdev_node->base_desc); 165 } else { 166 spdk_bdev_close(bdev_node->base_desc); 167 } 168 169 /* Unregister the io_device. */ 170 spdk_io_device_unregister(bdev_node, _device_unregister_cb); 171 172 return 0; 173 } 174 175 static struct block_zone * 176 zone_block_get_zone_containing_lba(struct bdev_zone_block *bdev_node, uint64_t lba) 177 { 178 size_t index = lba >> bdev_node->zone_shift; 179 180 if (index >= bdev_node->num_zones) { 181 return NULL; 182 } 183 184 return &bdev_node->zones[index]; 185 } 186 187 static struct block_zone * 188 zone_block_get_zone_by_slba(struct bdev_zone_block *bdev_node, uint64_t start_lba) 189 { 190 struct block_zone *zone = zone_block_get_zone_containing_lba(bdev_node, start_lba); 191 192 if (zone && zone->zone_info.zone_id == start_lba) { 193 return zone; 194 } else { 195 return NULL; 196 } 197 } 198 199 static int 200 zone_block_get_zone_info(struct bdev_zone_block *bdev_node, struct spdk_bdev_io *bdev_io) 201 { 202 struct block_zone *zone; 203 struct spdk_bdev_zone_info *zone_info = bdev_io->u.zone_mgmt.buf; 204 uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id; 205 size_t i; 206 207 /* User can request info for more zones than exist, need to check both internal and user 208 * boundaries 209 */ 210 for (i = 0; i < bdev_io->u.zone_mgmt.num_zones; i++, zone_id += bdev_node->bdev.zone_size) { 211 zone = zone_block_get_zone_by_slba(bdev_node, zone_id); 212 if (!zone) { 213 return -EINVAL; 214 } 215 memcpy(&zone_info[i], &zone->zone_info, sizeof(*zone_info)); 216 } 217 218 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 219 return 0; 220 } 221 222 static int 223 zone_block_open_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 224 { 225 pthread_spin_lock(&zone->lock); 226 227 switch (zone->zone_info.state) { 228 case SPDK_BDEV_ZONE_STATE_EMPTY: 229 case SPDK_BDEV_ZONE_STATE_OPEN: 230 case SPDK_BDEV_ZONE_STATE_CLOSED: 231 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_OPEN; 232 pthread_spin_unlock(&zone->lock); 233 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 234 return 0; 235 default: 236 pthread_spin_unlock(&zone->lock); 237 return -EINVAL; 238 } 239 } 240 241 static void 242 _zone_block_complete_unmap(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 243 { 244 struct spdk_bdev_io *orig_io = cb_arg; 245 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 246 247 /* Complete the original IO and then free the one that we created here 248 * as a result of issuing an IO via submit_request. 249 */ 250 spdk_bdev_io_complete(orig_io, status); 251 spdk_bdev_free_io(bdev_io); 252 } 253 254 static int 255 zone_block_reset_zone(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 256 struct block_zone *zone, struct spdk_bdev_io *bdev_io) 257 { 258 pthread_spin_lock(&zone->lock); 259 260 switch (zone->zone_info.state) { 261 case SPDK_BDEV_ZONE_STATE_EMPTY: 262 pthread_spin_unlock(&zone->lock); 263 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 264 return 0; 265 case SPDK_BDEV_ZONE_STATE_OPEN: 266 case SPDK_BDEV_ZONE_STATE_FULL: 267 case SPDK_BDEV_ZONE_STATE_CLOSED: 268 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_EMPTY; 269 zone->zone_info.write_pointer = zone->zone_info.zone_id; 270 pthread_spin_unlock(&zone->lock); 271 272 /* The unmap isn't necessary, so if the base bdev doesn't support it, we're done */ 273 if (!spdk_bdev_io_type_supported(spdk_bdev_desc_get_bdev(bdev_node->base_desc), 274 SPDK_BDEV_IO_TYPE_UNMAP)) { 275 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 276 return 0; 277 } 278 279 return spdk_bdev_unmap_blocks(bdev_node->base_desc, ch->base_ch, 280 zone->zone_info.zone_id, zone->zone_info.capacity, 281 _zone_block_complete_unmap, bdev_io); 282 default: 283 pthread_spin_unlock(&zone->lock); 284 return -EINVAL; 285 } 286 } 287 288 static int 289 zone_block_close_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 290 { 291 pthread_spin_lock(&zone->lock); 292 293 switch (zone->zone_info.state) { 294 case SPDK_BDEV_ZONE_STATE_OPEN: 295 case SPDK_BDEV_ZONE_STATE_CLOSED: 296 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_CLOSED; 297 pthread_spin_unlock(&zone->lock); 298 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 299 return 0; 300 default: 301 pthread_spin_unlock(&zone->lock); 302 return -EINVAL; 303 } 304 } 305 306 static int 307 zone_block_finish_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 308 { 309 pthread_spin_lock(&zone->lock); 310 311 zone->zone_info.write_pointer = zone->zone_info.zone_id + zone->zone_info.capacity; 312 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 313 314 pthread_spin_unlock(&zone->lock); 315 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 316 return 0; 317 } 318 319 static int 320 zone_block_zone_management(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 321 struct spdk_bdev_io *bdev_io) 322 { 323 struct block_zone *zone; 324 325 zone = zone_block_get_zone_by_slba(bdev_node, bdev_io->u.zone_mgmt.zone_id); 326 if (!zone) { 327 return -EINVAL; 328 } 329 330 switch (bdev_io->u.zone_mgmt.zone_action) { 331 case SPDK_BDEV_ZONE_RESET: 332 return zone_block_reset_zone(bdev_node, ch, zone, bdev_io); 333 case SPDK_BDEV_ZONE_OPEN: 334 return zone_block_open_zone(zone, bdev_io); 335 case SPDK_BDEV_ZONE_CLOSE: 336 return zone_block_close_zone(zone, bdev_io); 337 case SPDK_BDEV_ZONE_FINISH: 338 return zone_block_finish_zone(zone, bdev_io); 339 default: 340 return -EINVAL; 341 } 342 } 343 344 static void 345 _zone_block_complete_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 346 { 347 struct spdk_bdev_io *orig_io = cb_arg; 348 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 349 350 if (success && orig_io->type == SPDK_BDEV_IO_TYPE_ZONE_APPEND) { 351 orig_io->u.bdev.offset_blocks = bdev_io->u.bdev.offset_blocks; 352 } 353 354 /* Complete the original IO and then free the one that we created here 355 * as a result of issuing an IO via submit_request. 356 */ 357 spdk_bdev_io_complete(orig_io, status); 358 spdk_bdev_free_io(bdev_io); 359 } 360 361 static int 362 zone_block_write(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 363 struct spdk_bdev_io *bdev_io) 364 { 365 struct block_zone *zone; 366 uint64_t len = bdev_io->u.bdev.num_blocks; 367 uint64_t lba = bdev_io->u.bdev.offset_blocks; 368 uint64_t num_blocks_left, wp; 369 int rc = 0; 370 bool is_append = bdev_io->type == SPDK_BDEV_IO_TYPE_ZONE_APPEND; 371 372 if (is_append) { 373 zone = zone_block_get_zone_by_slba(bdev_node, lba); 374 } else { 375 zone = zone_block_get_zone_containing_lba(bdev_node, lba); 376 } 377 if (!zone) { 378 SPDK_ERRLOG("Trying to write to invalid zone (lba 0x%" PRIx64 ")\n", lba); 379 return -EINVAL; 380 } 381 382 pthread_spin_lock(&zone->lock); 383 384 switch (zone->zone_info.state) { 385 case SPDK_BDEV_ZONE_STATE_OPEN: 386 case SPDK_BDEV_ZONE_STATE_EMPTY: 387 case SPDK_BDEV_ZONE_STATE_CLOSED: 388 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_OPEN; 389 break; 390 default: 391 SPDK_ERRLOG("Trying to write to zone in invalid state %u\n", zone->zone_info.state); 392 rc = -EINVAL; 393 goto write_fail; 394 } 395 396 wp = zone->zone_info.write_pointer; 397 if (is_append) { 398 lba = wp; 399 } else { 400 if (lba != wp) { 401 SPDK_ERRLOG("Trying to write to zone with invalid address (lba 0x%" PRIx64 ", wp 0x%" PRIx64 ")\n", 402 lba, wp); 403 rc = -EINVAL; 404 goto write_fail; 405 } 406 } 407 408 num_blocks_left = zone->zone_info.zone_id + zone->zone_info.capacity - wp; 409 if (len > num_blocks_left) { 410 SPDK_ERRLOG("Write exceeds zone capacity (lba 0x%" PRIx64 ", len 0x%" PRIx64 ", wp 0x%" PRIx64 411 ")\n", lba, len, wp); 412 rc = -EINVAL; 413 goto write_fail; 414 } 415 416 zone->zone_info.write_pointer += bdev_io->u.bdev.num_blocks; 417 assert(zone->zone_info.write_pointer <= zone->zone_info.zone_id + zone->zone_info.capacity); 418 if (zone->zone_info.write_pointer == zone->zone_info.zone_id + zone->zone_info.capacity) { 419 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 420 } 421 pthread_spin_unlock(&zone->lock); 422 423 rc = spdk_bdev_writev_blocks_with_md(bdev_node->base_desc, ch->base_ch, 424 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 425 bdev_io->u.bdev.md_buf, 426 lba, bdev_io->u.bdev.num_blocks, 427 _zone_block_complete_write, bdev_io); 428 429 return rc; 430 431 write_fail: 432 pthread_spin_unlock(&zone->lock); 433 return rc; 434 } 435 436 static void 437 _zone_block_complete_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 438 { 439 struct spdk_bdev_io *orig_io = cb_arg; 440 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 441 442 /* Complete the original IO and then free the one that we created here 443 * as a result of issuing an IO via submit_request. 444 */ 445 spdk_bdev_io_complete(orig_io, status); 446 spdk_bdev_free_io(bdev_io); 447 } 448 449 static int 450 zone_block_read(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 451 struct spdk_bdev_io *bdev_io) 452 { 453 struct block_zone *zone; 454 uint64_t len = bdev_io->u.bdev.num_blocks; 455 uint64_t lba = bdev_io->u.bdev.offset_blocks; 456 int rc; 457 458 zone = zone_block_get_zone_containing_lba(bdev_node, lba); 459 if (!zone) { 460 SPDK_ERRLOG("Trying to read from invalid zone (lba 0x%" PRIx64 ")\n", lba); 461 return -EINVAL; 462 } 463 464 if ((lba + len) > (zone->zone_info.zone_id + zone->zone_info.capacity)) { 465 SPDK_ERRLOG("Read exceeds zone capacity (lba 0x%" PRIx64 ", len 0x%" PRIx64 ")\n", lba, len); 466 return -EINVAL; 467 } 468 469 rc = spdk_bdev_readv_blocks_with_md(bdev_node->base_desc, ch->base_ch, 470 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 471 bdev_io->u.bdev.md_buf, 472 lba, len, 473 _zone_block_complete_read, bdev_io); 474 475 return rc; 476 } 477 478 static void 479 zone_block_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 480 { 481 struct bdev_zone_block *bdev_node = SPDK_CONTAINEROF(bdev_io->bdev, struct bdev_zone_block, bdev); 482 struct zone_block_io_channel *dev_ch = spdk_io_channel_get_ctx(ch); 483 int rc = 0; 484 485 switch (bdev_io->type) { 486 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 487 rc = zone_block_get_zone_info(bdev_node, bdev_io); 488 break; 489 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 490 rc = zone_block_zone_management(bdev_node, dev_ch, bdev_io); 491 break; 492 case SPDK_BDEV_IO_TYPE_WRITE: 493 case SPDK_BDEV_IO_TYPE_ZONE_APPEND: 494 rc = zone_block_write(bdev_node, dev_ch, bdev_io); 495 break; 496 case SPDK_BDEV_IO_TYPE_READ: 497 rc = zone_block_read(bdev_node, dev_ch, bdev_io); 498 break; 499 default: 500 SPDK_ERRLOG("vbdev_block: unknown I/O type %u\n", bdev_io->type); 501 rc = -ENOTSUP; 502 break; 503 } 504 505 if (rc != 0) { 506 if (rc == -ENOMEM) { 507 SPDK_WARNLOG("ENOMEM, start to queue io for vbdev.\n"); 508 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 509 } else { 510 SPDK_ERRLOG("ERROR on bdev_io submission!\n"); 511 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 512 } 513 } 514 } 515 516 static bool 517 zone_block_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 518 { 519 switch (io_type) { 520 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 521 case SPDK_BDEV_IO_TYPE_WRITE: 522 case SPDK_BDEV_IO_TYPE_READ: 523 case SPDK_BDEV_IO_TYPE_ZONE_APPEND: 524 return true; 525 default: 526 return false; 527 } 528 } 529 530 static struct spdk_io_channel * 531 zone_block_get_io_channel(void *ctx) 532 { 533 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 534 535 return spdk_get_io_channel(bdev_node); 536 } 537 538 static int 539 zone_block_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 540 { 541 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 542 struct spdk_bdev *base_bdev = spdk_bdev_desc_get_bdev(bdev_node->base_desc); 543 544 spdk_json_write_name(w, "zoned_block"); 545 spdk_json_write_object_begin(w); 546 spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&bdev_node->bdev)); 547 spdk_json_write_named_string(w, "base_bdev", spdk_bdev_get_name(base_bdev)); 548 spdk_json_write_named_uint64(w, "zone_capacity", bdev_node->zone_capacity); 549 spdk_json_write_named_uint64(w, "optimal_open_zones", bdev_node->bdev.optimal_open_zones); 550 spdk_json_write_object_end(w); 551 552 return 0; 553 } 554 555 /* When we register our vbdev this is how we specify our entry points. */ 556 static const struct spdk_bdev_fn_table zone_block_fn_table = { 557 .destruct = zone_block_destruct, 558 .submit_request = zone_block_submit_request, 559 .io_type_supported = zone_block_io_type_supported, 560 .get_io_channel = zone_block_get_io_channel, 561 .dump_info_json = zone_block_dump_info_json, 562 }; 563 564 static void 565 zone_block_base_bdev_hotremove_cb(struct spdk_bdev *bdev_find) 566 { 567 struct bdev_zone_block *bdev_node, *tmp; 568 569 TAILQ_FOREACH_SAFE(bdev_node, &g_bdev_nodes, link, tmp) { 570 if (bdev_find == spdk_bdev_desc_get_bdev(bdev_node->base_desc)) { 571 spdk_bdev_unregister(&bdev_node->bdev, NULL, NULL); 572 } 573 } 574 } 575 576 static void 577 zone_block_base_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 578 void *event_ctx) 579 { 580 switch (type) { 581 case SPDK_BDEV_EVENT_REMOVE: 582 zone_block_base_bdev_hotremove_cb(bdev); 583 break; 584 default: 585 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 586 break; 587 } 588 } 589 590 static int 591 _zone_block_ch_create_cb(void *io_device, void *ctx_buf) 592 { 593 struct zone_block_io_channel *bdev_ch = ctx_buf; 594 struct bdev_zone_block *bdev_node = io_device; 595 596 bdev_ch->base_ch = spdk_bdev_get_io_channel(bdev_node->base_desc); 597 if (!bdev_ch->base_ch) { 598 return -ENOMEM; 599 } 600 601 return 0; 602 } 603 604 static void 605 _zone_block_ch_destroy_cb(void *io_device, void *ctx_buf) 606 { 607 struct zone_block_io_channel *bdev_ch = ctx_buf; 608 609 spdk_put_io_channel(bdev_ch->base_ch); 610 } 611 612 static int 613 zone_block_insert_name(const char *bdev_name, const char *vbdev_name, uint64_t zone_capacity, 614 uint64_t optimal_open_zones) 615 { 616 struct bdev_zone_block_config *name; 617 618 TAILQ_FOREACH(name, &g_bdev_configs, link) { 619 if (strcmp(vbdev_name, name->vbdev_name) == 0) { 620 SPDK_ERRLOG("block zoned bdev %s already exists\n", vbdev_name); 621 return -EEXIST; 622 } 623 if (strcmp(bdev_name, name->bdev_name) == 0) { 624 SPDK_ERRLOG("base bdev %s already claimed\n", bdev_name); 625 return -EEXIST; 626 } 627 } 628 629 name = calloc(1, sizeof(*name)); 630 if (!name) { 631 SPDK_ERRLOG("could not allocate bdev_names\n"); 632 return -ENOMEM; 633 } 634 635 name->bdev_name = strdup(bdev_name); 636 if (!name->bdev_name) { 637 SPDK_ERRLOG("could not allocate name->bdev_name\n"); 638 free(name); 639 return -ENOMEM; 640 } 641 642 name->vbdev_name = strdup(vbdev_name); 643 if (!name->vbdev_name) { 644 SPDK_ERRLOG("could not allocate name->vbdev_name\n"); 645 free(name->bdev_name); 646 free(name); 647 return -ENOMEM; 648 } 649 650 name->zone_capacity = zone_capacity; 651 name->optimal_open_zones = optimal_open_zones; 652 653 TAILQ_INSERT_TAIL(&g_bdev_configs, name, link); 654 655 return 0; 656 } 657 658 static int 659 zone_block_init_zone_info(struct bdev_zone_block *bdev_node) 660 { 661 size_t i; 662 struct block_zone *zone; 663 int rc = 0; 664 665 for (i = 0; i < bdev_node->num_zones; i++) { 666 zone = &bdev_node->zones[i]; 667 zone->zone_info.zone_id = bdev_node->bdev.zone_size * i; 668 zone->zone_info.capacity = bdev_node->zone_capacity; 669 zone->zone_info.write_pointer = zone->zone_info.zone_id + zone->zone_info.capacity; 670 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 671 zone->zone_info.type = SPDK_BDEV_ZONE_TYPE_SEQWR; 672 if (pthread_spin_init(&zone->lock, PTHREAD_PROCESS_PRIVATE)) { 673 SPDK_ERRLOG("pthread_spin_init() failed\n"); 674 rc = -ENOMEM; 675 break; 676 } 677 } 678 679 if (rc) { 680 for (; i > 0; i--) { 681 pthread_spin_destroy(&bdev_node->zones[i - 1].lock); 682 } 683 } 684 685 return rc; 686 } 687 688 static int 689 zone_block_register(const char *base_bdev_name) 690 { 691 struct spdk_bdev_desc *base_desc; 692 struct spdk_bdev *base_bdev; 693 struct bdev_zone_block_config *name, *tmp; 694 struct bdev_zone_block *bdev_node; 695 struct spdk_uuid ns_uuid; 696 uint64_t zone_size; 697 int rc = 0; 698 699 spdk_uuid_parse(&ns_uuid, BDEV_ZONE_BLOCK_NAMESPACE_UUID); 700 701 /* Check our list of names from config versus this bdev and if 702 * there's a match, create the bdev_node & bdev accordingly. 703 */ 704 TAILQ_FOREACH_SAFE(name, &g_bdev_configs, link, tmp) { 705 if (strcmp(name->bdev_name, base_bdev_name) != 0) { 706 continue; 707 } 708 709 rc = spdk_bdev_open_ext(base_bdev_name, true, zone_block_base_bdev_event_cb, 710 NULL, &base_desc); 711 if (rc == -ENODEV) { 712 return -ENODEV; 713 } else if (rc) { 714 SPDK_ERRLOG("could not open bdev %s\n", base_bdev_name); 715 goto free_config; 716 } 717 718 base_bdev = spdk_bdev_desc_get_bdev(base_desc); 719 720 if (spdk_bdev_is_zoned(base_bdev)) { 721 SPDK_ERRLOG("Base bdev %s is already a zoned bdev\n", base_bdev_name); 722 rc = -EEXIST; 723 goto zone_exist; 724 } 725 726 bdev_node = calloc(1, sizeof(struct bdev_zone_block)); 727 if (!bdev_node) { 728 rc = -ENOMEM; 729 SPDK_ERRLOG("could not allocate bdev_node\n"); 730 goto zone_exist; 731 } 732 733 bdev_node->base_desc = base_desc; 734 735 /* The base bdev that we're attaching to. */ 736 bdev_node->bdev.name = strdup(name->vbdev_name); 737 if (!bdev_node->bdev.name) { 738 rc = -ENOMEM; 739 SPDK_ERRLOG("could not allocate bdev_node name\n"); 740 goto strdup_failed; 741 } 742 743 zone_size = spdk_align64pow2(name->zone_capacity); 744 if (zone_size == 0) { 745 rc = -EINVAL; 746 SPDK_ERRLOG("invalid zone size\n"); 747 goto roundup_failed; 748 } 749 750 bdev_node->zone_shift = spdk_u64log2(zone_size); 751 bdev_node->num_zones = base_bdev->blockcnt / zone_size; 752 753 bdev_node->zones = calloc(bdev_node->num_zones, sizeof(struct block_zone)); 754 if (!bdev_node->zones) { 755 rc = -ENOMEM; 756 SPDK_ERRLOG("could not allocate zones\n"); 757 goto calloc_failed; 758 } 759 760 bdev_node->bdev.product_name = "zone_block"; 761 762 /* Copy some properties from the underlying base bdev. */ 763 bdev_node->bdev.write_cache = base_bdev->write_cache; 764 bdev_node->bdev.required_alignment = base_bdev->required_alignment; 765 bdev_node->bdev.optimal_io_boundary = base_bdev->optimal_io_boundary; 766 767 bdev_node->bdev.blocklen = base_bdev->blocklen; 768 bdev_node->bdev.blockcnt = bdev_node->num_zones * zone_size; 769 770 if (bdev_node->num_zones * name->zone_capacity != base_bdev->blockcnt) { 771 SPDK_DEBUGLOG(vbdev_zone_block, 772 "Lost %" PRIu64 " blocks due to zone capacity and base bdev size misalignment\n", 773 base_bdev->blockcnt - bdev_node->num_zones * name->zone_capacity); 774 } 775 776 bdev_node->bdev.write_unit_size = base_bdev->write_unit_size; 777 778 bdev_node->bdev.md_interleave = base_bdev->md_interleave; 779 bdev_node->bdev.md_len = base_bdev->md_len; 780 bdev_node->bdev.dif_type = base_bdev->dif_type; 781 bdev_node->bdev.dif_is_head_of_md = base_bdev->dif_is_head_of_md; 782 bdev_node->bdev.dif_check_flags = base_bdev->dif_check_flags; 783 bdev_node->bdev.dif_pi_format = base_bdev->dif_pi_format; 784 785 bdev_node->bdev.zoned = true; 786 bdev_node->bdev.ctxt = bdev_node; 787 bdev_node->bdev.fn_table = &zone_block_fn_table; 788 bdev_node->bdev.module = &bdev_zoned_if; 789 790 /* Generate UUID based on namespace UUID + base bdev UUID. */ 791 rc = spdk_uuid_generate_sha1(&bdev_node->bdev.uuid, &ns_uuid, 792 (const char *)&base_bdev->uuid, sizeof(struct spdk_uuid)); 793 if (rc) { 794 SPDK_ERRLOG("Unable to generate new UUID for zone block bdev\n"); 795 goto uuid_generation_failed; 796 } 797 798 /* bdev specific info */ 799 bdev_node->bdev.zone_size = zone_size; 800 801 bdev_node->zone_capacity = name->zone_capacity; 802 bdev_node->bdev.optimal_open_zones = name->optimal_open_zones; 803 bdev_node->bdev.max_open_zones = 0; 804 rc = zone_block_init_zone_info(bdev_node); 805 if (rc) { 806 SPDK_ERRLOG("could not init zone info\n"); 807 goto zone_info_failed; 808 } 809 810 TAILQ_INSERT_TAIL(&g_bdev_nodes, bdev_node, link); 811 812 spdk_io_device_register(bdev_node, _zone_block_ch_create_cb, _zone_block_ch_destroy_cb, 813 sizeof(struct zone_block_io_channel), 814 name->vbdev_name); 815 816 /* Save the thread where the base device is opened */ 817 bdev_node->thread = spdk_get_thread(); 818 819 rc = spdk_bdev_module_claim_bdev(base_bdev, base_desc, bdev_node->bdev.module); 820 if (rc) { 821 SPDK_ERRLOG("could not claim bdev %s\n", base_bdev_name); 822 goto claim_failed; 823 } 824 825 rc = spdk_bdev_register(&bdev_node->bdev); 826 if (rc) { 827 SPDK_ERRLOG("could not register zoned bdev\n"); 828 goto register_failed; 829 } 830 } 831 832 return rc; 833 834 register_failed: 835 spdk_bdev_module_release_bdev(&bdev_node->bdev); 836 claim_failed: 837 TAILQ_REMOVE(&g_bdev_nodes, bdev_node, link); 838 spdk_io_device_unregister(bdev_node, NULL); 839 zone_info_failed: 840 uuid_generation_failed: 841 free(bdev_node->zones); 842 calloc_failed: 843 roundup_failed: 844 free(bdev_node->bdev.name); 845 strdup_failed: 846 free(bdev_node); 847 zone_exist: 848 spdk_bdev_close(base_desc); 849 free_config: 850 zone_block_remove_config(name); 851 return rc; 852 } 853 854 int 855 vbdev_zone_block_create(const char *bdev_name, const char *vbdev_name, uint64_t zone_capacity, 856 uint64_t optimal_open_zones) 857 { 858 int rc = 0; 859 860 if (zone_capacity == 0) { 861 SPDK_ERRLOG("Zone capacity can't be 0\n"); 862 return -EINVAL; 863 } 864 865 if (optimal_open_zones == 0) { 866 SPDK_ERRLOG("Optimal open zones can't be 0\n"); 867 return -EINVAL; 868 } 869 870 /* Insert the bdev into our global name list even if it doesn't exist yet, 871 * it may show up soon... 872 */ 873 rc = zone_block_insert_name(bdev_name, vbdev_name, zone_capacity, optimal_open_zones); 874 if (rc) { 875 return rc; 876 } 877 878 rc = zone_block_register(bdev_name); 879 if (rc == -ENODEV) { 880 /* This is not an error, even though the bdev is not present at this time it may 881 * still show up later. 882 */ 883 rc = 0; 884 } 885 return rc; 886 } 887 888 void 889 vbdev_zone_block_delete(const char *name, spdk_bdev_unregister_cb cb_fn, void *cb_arg) 890 { 891 struct bdev_zone_block_config *name_node; 892 int rc; 893 894 rc = spdk_bdev_unregister_by_name(name, &bdev_zoned_if, cb_fn, cb_arg); 895 if (rc == 0) { 896 TAILQ_FOREACH(name_node, &g_bdev_configs, link) { 897 if (strcmp(name_node->vbdev_name, name) == 0) { 898 zone_block_remove_config(name_node); 899 break; 900 } 901 } 902 } else { 903 cb_fn(cb_arg, rc); 904 } 905 } 906 907 static void 908 zone_block_examine(struct spdk_bdev *bdev) 909 { 910 zone_block_register(bdev->name); 911 912 spdk_bdev_module_examine_done(&bdev_zoned_if); 913 } 914 915 SPDK_LOG_REGISTER_COMPONENT(vbdev_zone_block) 916