1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2019 Intel Corporation. 3 * Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. 4 * All rights reserved. 5 */ 6 7 #include "spdk/stdinc.h" 8 9 #include "vbdev_zone_block.h" 10 11 #include "spdk/config.h" 12 #include "spdk/nvme.h" 13 #include "spdk/bdev_zone.h" 14 15 #include "spdk/log.h" 16 17 static int zone_block_init(void); 18 static int zone_block_get_ctx_size(void); 19 static void zone_block_finish(void); 20 static int zone_block_config_json(struct spdk_json_write_ctx *w); 21 static void zone_block_examine(struct spdk_bdev *bdev); 22 23 static struct spdk_bdev_module bdev_zoned_if = { 24 .name = "bdev_zoned_block", 25 .module_init = zone_block_init, 26 .module_fini = zone_block_finish, 27 .config_json = zone_block_config_json, 28 .examine_config = zone_block_examine, 29 .get_ctx_size = zone_block_get_ctx_size, 30 }; 31 32 SPDK_BDEV_MODULE_REGISTER(bdev_zoned_block, &bdev_zoned_if) 33 34 /* List of block vbdev names and their base bdevs via configuration file. 35 * Used so we can parse the conf once at init and use this list in examine(). 36 */ 37 struct bdev_zone_block_config { 38 char *vbdev_name; 39 char *bdev_name; 40 uint64_t zone_capacity; 41 uint64_t optimal_open_zones; 42 TAILQ_ENTRY(bdev_zone_block_config) link; 43 }; 44 static TAILQ_HEAD(, bdev_zone_block_config) g_bdev_configs = TAILQ_HEAD_INITIALIZER(g_bdev_configs); 45 46 struct block_zone { 47 struct spdk_bdev_zone_info zone_info; 48 pthread_spinlock_t lock; 49 }; 50 51 /* List of block vbdevs and associated info for each. */ 52 struct bdev_zone_block { 53 struct spdk_bdev bdev; /* the block zoned bdev */ 54 struct spdk_bdev_desc *base_desc; /* its descriptor we get from open */ 55 struct block_zone *zones; /* array of zones */ 56 uint64_t num_zones; /* number of zones */ 57 uint64_t zone_capacity; /* zone capacity */ 58 uint64_t zone_shift; /* log2 of zone_size */ 59 TAILQ_ENTRY(bdev_zone_block) link; 60 struct spdk_thread *thread; /* thread where base device is opened */ 61 }; 62 static TAILQ_HEAD(, bdev_zone_block) g_bdev_nodes = TAILQ_HEAD_INITIALIZER(g_bdev_nodes); 63 64 struct zone_block_io_channel { 65 struct spdk_io_channel *base_ch; /* IO channel of base device */ 66 }; 67 68 struct zone_block_io { 69 /* vbdev to which IO was issued */ 70 struct bdev_zone_block *bdev_zone_block; 71 }; 72 73 static int 74 zone_block_init(void) 75 { 76 return 0; 77 } 78 79 static void 80 zone_block_remove_config(struct bdev_zone_block_config *name) 81 { 82 TAILQ_REMOVE(&g_bdev_configs, name, link); 83 free(name->bdev_name); 84 free(name->vbdev_name); 85 free(name); 86 } 87 88 static void 89 zone_block_finish(void) 90 { 91 struct bdev_zone_block_config *name; 92 93 while ((name = TAILQ_FIRST(&g_bdev_configs))) { 94 zone_block_remove_config(name); 95 } 96 } 97 98 static int 99 zone_block_get_ctx_size(void) 100 { 101 return sizeof(struct zone_block_io); 102 } 103 104 static int 105 zone_block_config_json(struct spdk_json_write_ctx *w) 106 { 107 struct bdev_zone_block *bdev_node; 108 struct spdk_bdev *base_bdev = NULL; 109 110 TAILQ_FOREACH(bdev_node, &g_bdev_nodes, link) { 111 base_bdev = spdk_bdev_desc_get_bdev(bdev_node->base_desc); 112 spdk_json_write_object_begin(w); 113 spdk_json_write_named_string(w, "method", "bdev_zone_block_create"); 114 spdk_json_write_named_object_begin(w, "params"); 115 spdk_json_write_named_string(w, "base_bdev", spdk_bdev_get_name(base_bdev)); 116 spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&bdev_node->bdev)); 117 spdk_json_write_named_uint64(w, "zone_capacity", bdev_node->zone_capacity); 118 spdk_json_write_named_uint64(w, "optimal_open_zones", bdev_node->bdev.optimal_open_zones); 119 spdk_json_write_object_end(w); 120 spdk_json_write_object_end(w); 121 } 122 123 return 0; 124 } 125 126 /* Callback for unregistering the IO device. */ 127 static void 128 _device_unregister_cb(void *io_device) 129 { 130 struct bdev_zone_block *bdev_node = io_device; 131 uint64_t i; 132 133 free(bdev_node->bdev.name); 134 for (i = 0; i < bdev_node->num_zones; i++) { 135 pthread_spin_destroy(&bdev_node->zones[i].lock); 136 } 137 free(bdev_node->zones); 138 free(bdev_node); 139 } 140 141 static void 142 _zone_block_destruct(void *ctx) 143 { 144 struct spdk_bdev_desc *desc = ctx; 145 146 spdk_bdev_close(desc); 147 } 148 149 static int 150 zone_block_destruct(void *ctx) 151 { 152 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 153 154 TAILQ_REMOVE(&g_bdev_nodes, bdev_node, link); 155 156 /* Unclaim the underlying bdev. */ 157 spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(bdev_node->base_desc)); 158 159 /* Close the underlying bdev on its same opened thread. */ 160 if (bdev_node->thread && bdev_node->thread != spdk_get_thread()) { 161 spdk_thread_send_msg(bdev_node->thread, _zone_block_destruct, bdev_node->base_desc); 162 } else { 163 spdk_bdev_close(bdev_node->base_desc); 164 } 165 166 /* Unregister the io_device. */ 167 spdk_io_device_unregister(bdev_node, _device_unregister_cb); 168 169 return 0; 170 } 171 172 static struct block_zone * 173 zone_block_get_zone_containing_lba(struct bdev_zone_block *bdev_node, uint64_t lba) 174 { 175 size_t index = lba >> bdev_node->zone_shift; 176 177 if (index >= bdev_node->num_zones) { 178 return NULL; 179 } 180 181 return &bdev_node->zones[index]; 182 } 183 184 static struct block_zone * 185 zone_block_get_zone_by_slba(struct bdev_zone_block *bdev_node, uint64_t start_lba) 186 { 187 struct block_zone *zone = zone_block_get_zone_containing_lba(bdev_node, start_lba); 188 189 if (zone && zone->zone_info.zone_id == start_lba) { 190 return zone; 191 } else { 192 return NULL; 193 } 194 } 195 196 static int 197 zone_block_get_zone_info(struct bdev_zone_block *bdev_node, struct spdk_bdev_io *bdev_io) 198 { 199 struct block_zone *zone; 200 struct spdk_bdev_zone_info *zone_info = bdev_io->u.zone_mgmt.buf; 201 uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id; 202 size_t i; 203 204 /* User can request info for more zones than exist, need to check both internal and user 205 * boundaries 206 */ 207 for (i = 0; i < bdev_io->u.zone_mgmt.num_zones; i++, zone_id += bdev_node->bdev.zone_size) { 208 zone = zone_block_get_zone_by_slba(bdev_node, zone_id); 209 if (!zone) { 210 return -EINVAL; 211 } 212 memcpy(&zone_info[i], &zone->zone_info, sizeof(*zone_info)); 213 } 214 215 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 216 return 0; 217 } 218 219 static int 220 zone_block_open_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 221 { 222 pthread_spin_lock(&zone->lock); 223 224 switch (zone->zone_info.state) { 225 case SPDK_BDEV_ZONE_STATE_EMPTY: 226 case SPDK_BDEV_ZONE_STATE_OPEN: 227 case SPDK_BDEV_ZONE_STATE_CLOSED: 228 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_OPEN; 229 pthread_spin_unlock(&zone->lock); 230 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 231 return 0; 232 default: 233 pthread_spin_unlock(&zone->lock); 234 return -EINVAL; 235 } 236 } 237 238 static void 239 _zone_block_complete_unmap(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 240 { 241 struct spdk_bdev_io *orig_io = cb_arg; 242 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 243 244 /* Complete the original IO and then free the one that we created here 245 * as a result of issuing an IO via submit_request. 246 */ 247 spdk_bdev_io_complete(orig_io, status); 248 spdk_bdev_free_io(bdev_io); 249 } 250 251 static int 252 zone_block_reset_zone(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 253 struct block_zone *zone, struct spdk_bdev_io *bdev_io) 254 { 255 pthread_spin_lock(&zone->lock); 256 257 switch (zone->zone_info.state) { 258 case SPDK_BDEV_ZONE_STATE_EMPTY: 259 pthread_spin_unlock(&zone->lock); 260 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 261 return 0; 262 case SPDK_BDEV_ZONE_STATE_OPEN: 263 case SPDK_BDEV_ZONE_STATE_FULL: 264 case SPDK_BDEV_ZONE_STATE_CLOSED: 265 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_EMPTY; 266 zone->zone_info.write_pointer = zone->zone_info.zone_id; 267 pthread_spin_unlock(&zone->lock); 268 269 /* The unmap isn't necessary, so if the base bdev doesn't support it, we're done */ 270 if (!spdk_bdev_io_type_supported(spdk_bdev_desc_get_bdev(bdev_node->base_desc), 271 SPDK_BDEV_IO_TYPE_UNMAP)) { 272 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 273 return 0; 274 } 275 276 return spdk_bdev_unmap_blocks(bdev_node->base_desc, ch->base_ch, 277 zone->zone_info.zone_id, zone->zone_info.capacity, 278 _zone_block_complete_unmap, bdev_io); 279 default: 280 pthread_spin_unlock(&zone->lock); 281 return -EINVAL; 282 } 283 } 284 285 static int 286 zone_block_close_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 287 { 288 pthread_spin_lock(&zone->lock); 289 290 switch (zone->zone_info.state) { 291 case SPDK_BDEV_ZONE_STATE_OPEN: 292 case SPDK_BDEV_ZONE_STATE_CLOSED: 293 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_CLOSED; 294 pthread_spin_unlock(&zone->lock); 295 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 296 return 0; 297 default: 298 pthread_spin_unlock(&zone->lock); 299 return -EINVAL; 300 } 301 } 302 303 static int 304 zone_block_finish_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 305 { 306 pthread_spin_lock(&zone->lock); 307 308 zone->zone_info.write_pointer = zone->zone_info.zone_id + zone->zone_info.capacity; 309 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 310 311 pthread_spin_unlock(&zone->lock); 312 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 313 return 0; 314 } 315 316 static int 317 zone_block_zone_management(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 318 struct spdk_bdev_io *bdev_io) 319 { 320 struct block_zone *zone; 321 322 zone = zone_block_get_zone_by_slba(bdev_node, bdev_io->u.zone_mgmt.zone_id); 323 if (!zone) { 324 return -EINVAL; 325 } 326 327 switch (bdev_io->u.zone_mgmt.zone_action) { 328 case SPDK_BDEV_ZONE_RESET: 329 return zone_block_reset_zone(bdev_node, ch, zone, bdev_io); 330 case SPDK_BDEV_ZONE_OPEN: 331 return zone_block_open_zone(zone, bdev_io); 332 case SPDK_BDEV_ZONE_CLOSE: 333 return zone_block_close_zone(zone, bdev_io); 334 case SPDK_BDEV_ZONE_FINISH: 335 return zone_block_finish_zone(zone, bdev_io); 336 default: 337 return -EINVAL; 338 } 339 } 340 341 static void 342 _zone_block_complete_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 343 { 344 struct spdk_bdev_io *orig_io = cb_arg; 345 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 346 347 if (success && orig_io->type == SPDK_BDEV_IO_TYPE_ZONE_APPEND) { 348 orig_io->u.bdev.offset_blocks = bdev_io->u.bdev.offset_blocks; 349 } 350 351 /* Complete the original IO and then free the one that we created here 352 * as a result of issuing an IO via submit_request. 353 */ 354 spdk_bdev_io_complete(orig_io, status); 355 spdk_bdev_free_io(bdev_io); 356 } 357 358 static int 359 zone_block_write(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 360 struct spdk_bdev_io *bdev_io) 361 { 362 struct block_zone *zone; 363 uint64_t len = bdev_io->u.bdev.num_blocks; 364 uint64_t lba = bdev_io->u.bdev.offset_blocks; 365 uint64_t num_blocks_left, wp; 366 int rc = 0; 367 bool is_append = bdev_io->type == SPDK_BDEV_IO_TYPE_ZONE_APPEND; 368 369 if (is_append) { 370 zone = zone_block_get_zone_by_slba(bdev_node, lba); 371 } else { 372 zone = zone_block_get_zone_containing_lba(bdev_node, lba); 373 } 374 if (!zone) { 375 SPDK_ERRLOG("Trying to write to invalid zone (lba 0x%" PRIx64 ")\n", lba); 376 return -EINVAL; 377 } 378 379 pthread_spin_lock(&zone->lock); 380 381 switch (zone->zone_info.state) { 382 case SPDK_BDEV_ZONE_STATE_OPEN: 383 case SPDK_BDEV_ZONE_STATE_EMPTY: 384 case SPDK_BDEV_ZONE_STATE_CLOSED: 385 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_OPEN; 386 break; 387 default: 388 SPDK_ERRLOG("Trying to write to zone in invalid state %u\n", zone->zone_info.state); 389 rc = -EINVAL; 390 goto write_fail; 391 } 392 393 wp = zone->zone_info.write_pointer; 394 if (is_append) { 395 lba = wp; 396 } else { 397 if (lba != wp) { 398 SPDK_ERRLOG("Trying to write to zone with invalid address (lba 0x%" PRIx64 ", wp 0x%" PRIx64 ")\n", 399 lba, wp); 400 rc = -EINVAL; 401 goto write_fail; 402 } 403 } 404 405 num_blocks_left = zone->zone_info.zone_id + zone->zone_info.capacity - wp; 406 if (len > num_blocks_left) { 407 SPDK_ERRLOG("Write exceeds zone capacity (lba 0x%" PRIx64 ", len 0x%" PRIx64 ", wp 0x%" PRIx64 408 ")\n", lba, len, wp); 409 rc = -EINVAL; 410 goto write_fail; 411 } 412 413 zone->zone_info.write_pointer += bdev_io->u.bdev.num_blocks; 414 assert(zone->zone_info.write_pointer <= zone->zone_info.zone_id + zone->zone_info.capacity); 415 if (zone->zone_info.write_pointer == zone->zone_info.zone_id + zone->zone_info.capacity) { 416 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 417 } 418 pthread_spin_unlock(&zone->lock); 419 420 rc = spdk_bdev_writev_blocks_with_md(bdev_node->base_desc, ch->base_ch, 421 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 422 bdev_io->u.bdev.md_buf, 423 lba, bdev_io->u.bdev.num_blocks, 424 _zone_block_complete_write, bdev_io); 425 426 return rc; 427 428 write_fail: 429 pthread_spin_unlock(&zone->lock); 430 return rc; 431 } 432 433 static void 434 _zone_block_complete_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 435 { 436 struct spdk_bdev_io *orig_io = cb_arg; 437 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 438 439 /* Complete the original IO and then free the one that we created here 440 * as a result of issuing an IO via submit_request. 441 */ 442 spdk_bdev_io_complete(orig_io, status); 443 spdk_bdev_free_io(bdev_io); 444 } 445 446 static int 447 zone_block_read(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 448 struct spdk_bdev_io *bdev_io) 449 { 450 struct block_zone *zone; 451 uint64_t len = bdev_io->u.bdev.num_blocks; 452 uint64_t lba = bdev_io->u.bdev.offset_blocks; 453 int rc; 454 455 zone = zone_block_get_zone_containing_lba(bdev_node, lba); 456 if (!zone) { 457 SPDK_ERRLOG("Trying to read from invalid zone (lba 0x%" PRIx64 ")\n", lba); 458 return -EINVAL; 459 } 460 461 if ((lba + len) > (zone->zone_info.zone_id + zone->zone_info.capacity)) { 462 SPDK_ERRLOG("Read exceeds zone capacity (lba 0x%" PRIx64 ", len 0x%" PRIx64 ")\n", lba, len); 463 return -EINVAL; 464 } 465 466 rc = spdk_bdev_readv_blocks_with_md(bdev_node->base_desc, ch->base_ch, 467 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 468 bdev_io->u.bdev.md_buf, 469 lba, len, 470 _zone_block_complete_read, bdev_io); 471 472 return rc; 473 } 474 475 static void 476 zone_block_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 477 { 478 struct bdev_zone_block *bdev_node = SPDK_CONTAINEROF(bdev_io->bdev, struct bdev_zone_block, bdev); 479 struct zone_block_io_channel *dev_ch = spdk_io_channel_get_ctx(ch); 480 int rc = 0; 481 482 switch (bdev_io->type) { 483 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 484 rc = zone_block_get_zone_info(bdev_node, bdev_io); 485 break; 486 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 487 rc = zone_block_zone_management(bdev_node, dev_ch, bdev_io); 488 break; 489 case SPDK_BDEV_IO_TYPE_WRITE: 490 case SPDK_BDEV_IO_TYPE_ZONE_APPEND: 491 rc = zone_block_write(bdev_node, dev_ch, bdev_io); 492 break; 493 case SPDK_BDEV_IO_TYPE_READ: 494 rc = zone_block_read(bdev_node, dev_ch, bdev_io); 495 break; 496 default: 497 SPDK_ERRLOG("vbdev_block: unknown I/O type %u\n", bdev_io->type); 498 rc = -ENOTSUP; 499 break; 500 } 501 502 if (rc != 0) { 503 if (rc == -ENOMEM) { 504 SPDK_WARNLOG("ENOMEM, start to queue io for vbdev.\n"); 505 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 506 } else { 507 SPDK_ERRLOG("ERROR on bdev_io submission!\n"); 508 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 509 } 510 } 511 } 512 513 static bool 514 zone_block_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 515 { 516 switch (io_type) { 517 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 518 case SPDK_BDEV_IO_TYPE_WRITE: 519 case SPDK_BDEV_IO_TYPE_READ: 520 case SPDK_BDEV_IO_TYPE_ZONE_APPEND: 521 return true; 522 default: 523 return false; 524 } 525 } 526 527 static struct spdk_io_channel * 528 zone_block_get_io_channel(void *ctx) 529 { 530 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 531 532 return spdk_get_io_channel(bdev_node); 533 } 534 535 static int 536 zone_block_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 537 { 538 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 539 struct spdk_bdev *base_bdev = spdk_bdev_desc_get_bdev(bdev_node->base_desc); 540 541 spdk_json_write_name(w, "zoned_block"); 542 spdk_json_write_object_begin(w); 543 spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&bdev_node->bdev)); 544 spdk_json_write_named_string(w, "base_bdev", spdk_bdev_get_name(base_bdev)); 545 spdk_json_write_named_uint64(w, "zone_capacity", bdev_node->zone_capacity); 546 spdk_json_write_named_uint64(w, "optimal_open_zones", bdev_node->bdev.optimal_open_zones); 547 spdk_json_write_object_end(w); 548 549 return 0; 550 } 551 552 /* When we register our vbdev this is how we specify our entry points. */ 553 static const struct spdk_bdev_fn_table zone_block_fn_table = { 554 .destruct = zone_block_destruct, 555 .submit_request = zone_block_submit_request, 556 .io_type_supported = zone_block_io_type_supported, 557 .get_io_channel = zone_block_get_io_channel, 558 .dump_info_json = zone_block_dump_info_json, 559 }; 560 561 static void 562 zone_block_base_bdev_hotremove_cb(struct spdk_bdev *bdev_find) 563 { 564 struct bdev_zone_block *bdev_node, *tmp; 565 566 TAILQ_FOREACH_SAFE(bdev_node, &g_bdev_nodes, link, tmp) { 567 if (bdev_find == spdk_bdev_desc_get_bdev(bdev_node->base_desc)) { 568 spdk_bdev_unregister(&bdev_node->bdev, NULL, NULL); 569 } 570 } 571 } 572 573 static void 574 zone_block_base_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 575 void *event_ctx) 576 { 577 switch (type) { 578 case SPDK_BDEV_EVENT_REMOVE: 579 zone_block_base_bdev_hotremove_cb(bdev); 580 break; 581 default: 582 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 583 break; 584 } 585 } 586 587 static int 588 _zone_block_ch_create_cb(void *io_device, void *ctx_buf) 589 { 590 struct zone_block_io_channel *bdev_ch = ctx_buf; 591 struct bdev_zone_block *bdev_node = io_device; 592 593 bdev_ch->base_ch = spdk_bdev_get_io_channel(bdev_node->base_desc); 594 if (!bdev_ch->base_ch) { 595 return -ENOMEM; 596 } 597 598 return 0; 599 } 600 601 static void 602 _zone_block_ch_destroy_cb(void *io_device, void *ctx_buf) 603 { 604 struct zone_block_io_channel *bdev_ch = ctx_buf; 605 606 spdk_put_io_channel(bdev_ch->base_ch); 607 } 608 609 static int 610 zone_block_insert_name(const char *bdev_name, const char *vbdev_name, uint64_t zone_capacity, 611 uint64_t optimal_open_zones) 612 { 613 struct bdev_zone_block_config *name; 614 615 TAILQ_FOREACH(name, &g_bdev_configs, link) { 616 if (strcmp(vbdev_name, name->vbdev_name) == 0) { 617 SPDK_ERRLOG("block zoned bdev %s already exists\n", vbdev_name); 618 return -EEXIST; 619 } 620 if (strcmp(bdev_name, name->bdev_name) == 0) { 621 SPDK_ERRLOG("base bdev %s already claimed\n", bdev_name); 622 return -EEXIST; 623 } 624 } 625 626 name = calloc(1, sizeof(*name)); 627 if (!name) { 628 SPDK_ERRLOG("could not allocate bdev_names\n"); 629 return -ENOMEM; 630 } 631 632 name->bdev_name = strdup(bdev_name); 633 if (!name->bdev_name) { 634 SPDK_ERRLOG("could not allocate name->bdev_name\n"); 635 free(name); 636 return -ENOMEM; 637 } 638 639 name->vbdev_name = strdup(vbdev_name); 640 if (!name->vbdev_name) { 641 SPDK_ERRLOG("could not allocate name->vbdev_name\n"); 642 free(name->bdev_name); 643 free(name); 644 return -ENOMEM; 645 } 646 647 name->zone_capacity = zone_capacity; 648 name->optimal_open_zones = optimal_open_zones; 649 650 TAILQ_INSERT_TAIL(&g_bdev_configs, name, link); 651 652 return 0; 653 } 654 655 static int 656 zone_block_init_zone_info(struct bdev_zone_block *bdev_node) 657 { 658 size_t i; 659 struct block_zone *zone; 660 int rc = 0; 661 662 for (i = 0; i < bdev_node->num_zones; i++) { 663 zone = &bdev_node->zones[i]; 664 zone->zone_info.zone_id = bdev_node->bdev.zone_size * i; 665 zone->zone_info.capacity = bdev_node->zone_capacity; 666 zone->zone_info.write_pointer = zone->zone_info.zone_id + zone->zone_info.capacity; 667 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 668 zone->zone_info.type = SPDK_BDEV_ZONE_TYPE_SEQWR; 669 if (pthread_spin_init(&zone->lock, PTHREAD_PROCESS_PRIVATE)) { 670 SPDK_ERRLOG("pthread_spin_init() failed\n"); 671 rc = -ENOMEM; 672 break; 673 } 674 } 675 676 if (rc) { 677 for (; i > 0; i--) { 678 pthread_spin_destroy(&bdev_node->zones[i - 1].lock); 679 } 680 } 681 682 return rc; 683 } 684 685 static int 686 zone_block_register(const char *base_bdev_name) 687 { 688 struct spdk_bdev_desc *base_desc; 689 struct spdk_bdev *base_bdev; 690 struct bdev_zone_block_config *name, *tmp; 691 struct bdev_zone_block *bdev_node; 692 uint64_t zone_size; 693 int rc = 0; 694 695 /* Check our list of names from config versus this bdev and if 696 * there's a match, create the bdev_node & bdev accordingly. 697 */ 698 TAILQ_FOREACH_SAFE(name, &g_bdev_configs, link, tmp) { 699 if (strcmp(name->bdev_name, base_bdev_name) != 0) { 700 continue; 701 } 702 703 rc = spdk_bdev_open_ext(base_bdev_name, true, zone_block_base_bdev_event_cb, 704 NULL, &base_desc); 705 if (rc == -ENODEV) { 706 return -ENODEV; 707 } else if (rc) { 708 SPDK_ERRLOG("could not open bdev %s\n", base_bdev_name); 709 goto free_config; 710 } 711 712 base_bdev = spdk_bdev_desc_get_bdev(base_desc); 713 714 if (spdk_bdev_is_zoned(base_bdev)) { 715 SPDK_ERRLOG("Base bdev %s is already a zoned bdev\n", base_bdev_name); 716 rc = -EEXIST; 717 goto zone_exist; 718 } 719 720 bdev_node = calloc(1, sizeof(struct bdev_zone_block)); 721 if (!bdev_node) { 722 rc = -ENOMEM; 723 SPDK_ERRLOG("could not allocate bdev_node\n"); 724 goto zone_exist; 725 } 726 727 bdev_node->base_desc = base_desc; 728 729 /* The base bdev that we're attaching to. */ 730 bdev_node->bdev.name = strdup(name->vbdev_name); 731 if (!bdev_node->bdev.name) { 732 rc = -ENOMEM; 733 SPDK_ERRLOG("could not allocate bdev_node name\n"); 734 goto strdup_failed; 735 } 736 737 zone_size = spdk_align64pow2(name->zone_capacity); 738 if (zone_size == 0) { 739 rc = -EINVAL; 740 SPDK_ERRLOG("invalid zone size\n"); 741 goto roundup_failed; 742 } 743 744 bdev_node->zone_shift = spdk_u64log2(zone_size); 745 bdev_node->num_zones = base_bdev->blockcnt / zone_size; 746 747 bdev_node->zones = calloc(bdev_node->num_zones, sizeof(struct block_zone)); 748 if (!bdev_node->zones) { 749 rc = -ENOMEM; 750 SPDK_ERRLOG("could not allocate zones\n"); 751 goto calloc_failed; 752 } 753 754 bdev_node->bdev.product_name = "zone_block"; 755 756 /* Copy some properties from the underlying base bdev. */ 757 bdev_node->bdev.write_cache = base_bdev->write_cache; 758 bdev_node->bdev.required_alignment = base_bdev->required_alignment; 759 bdev_node->bdev.optimal_io_boundary = base_bdev->optimal_io_boundary; 760 761 bdev_node->bdev.blocklen = base_bdev->blocklen; 762 bdev_node->bdev.blockcnt = bdev_node->num_zones * zone_size; 763 764 if (bdev_node->num_zones * name->zone_capacity != base_bdev->blockcnt) { 765 SPDK_DEBUGLOG(vbdev_zone_block, 766 "Lost %" PRIu64 " blocks due to zone capacity and base bdev size misalignment\n", 767 base_bdev->blockcnt - bdev_node->num_zones * name->zone_capacity); 768 } 769 770 bdev_node->bdev.write_unit_size = base_bdev->write_unit_size; 771 772 bdev_node->bdev.md_interleave = base_bdev->md_interleave; 773 bdev_node->bdev.md_len = base_bdev->md_len; 774 bdev_node->bdev.dif_type = base_bdev->dif_type; 775 bdev_node->bdev.dif_is_head_of_md = base_bdev->dif_is_head_of_md; 776 bdev_node->bdev.dif_check_flags = base_bdev->dif_check_flags; 777 778 bdev_node->bdev.zoned = true; 779 bdev_node->bdev.ctxt = bdev_node; 780 bdev_node->bdev.fn_table = &zone_block_fn_table; 781 bdev_node->bdev.module = &bdev_zoned_if; 782 783 /* bdev specific info */ 784 bdev_node->bdev.zone_size = zone_size; 785 786 bdev_node->zone_capacity = name->zone_capacity; 787 bdev_node->bdev.optimal_open_zones = name->optimal_open_zones; 788 bdev_node->bdev.max_open_zones = 0; 789 rc = zone_block_init_zone_info(bdev_node); 790 if (rc) { 791 SPDK_ERRLOG("could not init zone info\n"); 792 goto zone_info_failed; 793 } 794 795 TAILQ_INSERT_TAIL(&g_bdev_nodes, bdev_node, link); 796 797 spdk_io_device_register(bdev_node, _zone_block_ch_create_cb, _zone_block_ch_destroy_cb, 798 sizeof(struct zone_block_io_channel), 799 name->vbdev_name); 800 801 /* Save the thread where the base device is opened */ 802 bdev_node->thread = spdk_get_thread(); 803 804 rc = spdk_bdev_module_claim_bdev(base_bdev, base_desc, bdev_node->bdev.module); 805 if (rc) { 806 SPDK_ERRLOG("could not claim bdev %s\n", base_bdev_name); 807 goto claim_failed; 808 } 809 810 rc = spdk_bdev_register(&bdev_node->bdev); 811 if (rc) { 812 SPDK_ERRLOG("could not register zoned bdev\n"); 813 goto register_failed; 814 } 815 } 816 817 return rc; 818 819 register_failed: 820 spdk_bdev_module_release_bdev(&bdev_node->bdev); 821 claim_failed: 822 TAILQ_REMOVE(&g_bdev_nodes, bdev_node, link); 823 spdk_io_device_unregister(bdev_node, NULL); 824 zone_info_failed: 825 free(bdev_node->zones); 826 calloc_failed: 827 roundup_failed: 828 free(bdev_node->bdev.name); 829 strdup_failed: 830 free(bdev_node); 831 zone_exist: 832 spdk_bdev_close(base_desc); 833 free_config: 834 zone_block_remove_config(name); 835 return rc; 836 } 837 838 int 839 vbdev_zone_block_create(const char *bdev_name, const char *vbdev_name, uint64_t zone_capacity, 840 uint64_t optimal_open_zones) 841 { 842 int rc = 0; 843 844 if (zone_capacity == 0) { 845 SPDK_ERRLOG("Zone capacity can't be 0\n"); 846 return -EINVAL; 847 } 848 849 if (optimal_open_zones == 0) { 850 SPDK_ERRLOG("Optimal open zones can't be 0\n"); 851 return -EINVAL; 852 } 853 854 /* Insert the bdev into our global name list even if it doesn't exist yet, 855 * it may show up soon... 856 */ 857 rc = zone_block_insert_name(bdev_name, vbdev_name, zone_capacity, optimal_open_zones); 858 if (rc) { 859 return rc; 860 } 861 862 rc = zone_block_register(bdev_name); 863 if (rc == -ENODEV) { 864 /* This is not an error, even though the bdev is not present at this time it may 865 * still show up later. 866 */ 867 rc = 0; 868 } 869 return rc; 870 } 871 872 void 873 vbdev_zone_block_delete(const char *name, spdk_bdev_unregister_cb cb_fn, void *cb_arg) 874 { 875 struct bdev_zone_block_config *name_node; 876 int rc; 877 878 rc = spdk_bdev_unregister_by_name(name, &bdev_zoned_if, cb_fn, cb_arg); 879 if (rc == 0) { 880 TAILQ_FOREACH(name_node, &g_bdev_configs, link) { 881 if (strcmp(name_node->vbdev_name, name) == 0) { 882 zone_block_remove_config(name_node); 883 break; 884 } 885 } 886 } else { 887 cb_fn(cb_arg, rc); 888 } 889 } 890 891 static void 892 zone_block_examine(struct spdk_bdev *bdev) 893 { 894 zone_block_register(bdev->name); 895 896 spdk_bdev_module_examine_done(&bdev_zoned_if); 897 } 898 899 SPDK_LOG_REGISTER_COMPONENT(vbdev_zone_block) 900