1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "spdk/stdinc.h" 7 8 #include "vbdev_zone_block.h" 9 10 #include "spdk/config.h" 11 #include "spdk/nvme.h" 12 #include "spdk/bdev_zone.h" 13 14 #include "spdk/log.h" 15 16 static int zone_block_init(void); 17 static int zone_block_get_ctx_size(void); 18 static void zone_block_finish(void); 19 static int zone_block_config_json(struct spdk_json_write_ctx *w); 20 static void zone_block_examine(struct spdk_bdev *bdev); 21 22 static struct spdk_bdev_module bdev_zoned_if = { 23 .name = "bdev_zoned_block", 24 .module_init = zone_block_init, 25 .module_fini = zone_block_finish, 26 .config_json = zone_block_config_json, 27 .examine_config = zone_block_examine, 28 .get_ctx_size = zone_block_get_ctx_size, 29 }; 30 31 SPDK_BDEV_MODULE_REGISTER(bdev_zoned_block, &bdev_zoned_if) 32 33 /* List of block vbdev names and their base bdevs via configuration file. 34 * Used so we can parse the conf once at init and use this list in examine(). 35 */ 36 struct bdev_zone_block_config { 37 char *vbdev_name; 38 char *bdev_name; 39 uint64_t zone_capacity; 40 uint64_t optimal_open_zones; 41 TAILQ_ENTRY(bdev_zone_block_config) link; 42 }; 43 static TAILQ_HEAD(, bdev_zone_block_config) g_bdev_configs = TAILQ_HEAD_INITIALIZER(g_bdev_configs); 44 45 struct block_zone { 46 struct spdk_bdev_zone_info zone_info; 47 pthread_spinlock_t lock; 48 }; 49 50 /* List of block vbdevs and associated info for each. */ 51 struct bdev_zone_block { 52 struct spdk_bdev bdev; /* the block zoned bdev */ 53 struct spdk_bdev_desc *base_desc; /* its descriptor we get from open */ 54 struct block_zone *zones; /* array of zones */ 55 uint64_t num_zones; /* number of zones */ 56 uint64_t zone_capacity; /* zone capacity */ 57 uint64_t zone_shift; /* log2 of zone_size */ 58 TAILQ_ENTRY(bdev_zone_block) link; 59 struct spdk_thread *thread; /* thread where base device is opened */ 60 }; 61 static TAILQ_HEAD(, bdev_zone_block) g_bdev_nodes = TAILQ_HEAD_INITIALIZER(g_bdev_nodes); 62 63 struct zone_block_io_channel { 64 struct spdk_io_channel *base_ch; /* IO channel of base device */ 65 }; 66 67 struct zone_block_io { 68 /* vbdev to which IO was issued */ 69 struct bdev_zone_block *bdev_zone_block; 70 }; 71 72 static int 73 zone_block_init(void) 74 { 75 return 0; 76 } 77 78 static void 79 zone_block_remove_config(struct bdev_zone_block_config *name) 80 { 81 TAILQ_REMOVE(&g_bdev_configs, name, link); 82 free(name->bdev_name); 83 free(name->vbdev_name); 84 free(name); 85 } 86 87 static void 88 zone_block_finish(void) 89 { 90 struct bdev_zone_block_config *name; 91 92 while ((name = TAILQ_FIRST(&g_bdev_configs))) { 93 zone_block_remove_config(name); 94 } 95 } 96 97 static int 98 zone_block_get_ctx_size(void) 99 { 100 return sizeof(struct zone_block_io); 101 } 102 103 static int 104 zone_block_config_json(struct spdk_json_write_ctx *w) 105 { 106 struct bdev_zone_block *bdev_node; 107 struct spdk_bdev *base_bdev = NULL; 108 109 TAILQ_FOREACH(bdev_node, &g_bdev_nodes, link) { 110 base_bdev = spdk_bdev_desc_get_bdev(bdev_node->base_desc); 111 spdk_json_write_object_begin(w); 112 spdk_json_write_named_string(w, "method", "bdev_zone_block_create"); 113 spdk_json_write_named_object_begin(w, "params"); 114 spdk_json_write_named_string(w, "base_bdev", spdk_bdev_get_name(base_bdev)); 115 spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&bdev_node->bdev)); 116 spdk_json_write_named_uint64(w, "zone_capacity", bdev_node->zone_capacity); 117 spdk_json_write_named_uint64(w, "optimal_open_zones", bdev_node->bdev.optimal_open_zones); 118 spdk_json_write_object_end(w); 119 spdk_json_write_object_end(w); 120 } 121 122 return 0; 123 } 124 125 /* Callback for unregistering the IO device. */ 126 static void 127 _device_unregister_cb(void *io_device) 128 { 129 struct bdev_zone_block *bdev_node = io_device; 130 uint64_t i; 131 132 free(bdev_node->bdev.name); 133 for (i = 0; i < bdev_node->num_zones; i++) { 134 pthread_spin_destroy(&bdev_node->zones[i].lock); 135 } 136 free(bdev_node->zones); 137 free(bdev_node); 138 } 139 140 static void 141 _zone_block_destruct(void *ctx) 142 { 143 struct spdk_bdev_desc *desc = ctx; 144 145 spdk_bdev_close(desc); 146 } 147 148 static int 149 zone_block_destruct(void *ctx) 150 { 151 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 152 153 TAILQ_REMOVE(&g_bdev_nodes, bdev_node, link); 154 155 /* Unclaim the underlying bdev. */ 156 spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(bdev_node->base_desc)); 157 158 /* Close the underlying bdev on its same opened thread. */ 159 if (bdev_node->thread && bdev_node->thread != spdk_get_thread()) { 160 spdk_thread_send_msg(bdev_node->thread, _zone_block_destruct, bdev_node->base_desc); 161 } else { 162 spdk_bdev_close(bdev_node->base_desc); 163 } 164 165 /* Unregister the io_device. */ 166 spdk_io_device_unregister(bdev_node, _device_unregister_cb); 167 168 return 0; 169 } 170 171 static struct block_zone * 172 zone_block_get_zone_containing_lba(struct bdev_zone_block *bdev_node, uint64_t lba) 173 { 174 size_t index = lba >> bdev_node->zone_shift; 175 176 if (index >= bdev_node->num_zones) { 177 return NULL; 178 } 179 180 return &bdev_node->zones[index]; 181 } 182 183 static struct block_zone * 184 zone_block_get_zone_by_slba(struct bdev_zone_block *bdev_node, uint64_t start_lba) 185 { 186 struct block_zone *zone = zone_block_get_zone_containing_lba(bdev_node, start_lba); 187 188 if (zone && zone->zone_info.zone_id == start_lba) { 189 return zone; 190 } else { 191 return NULL; 192 } 193 } 194 195 static int 196 zone_block_get_zone_info(struct bdev_zone_block *bdev_node, struct spdk_bdev_io *bdev_io) 197 { 198 struct block_zone *zone; 199 struct spdk_bdev_zone_info *zone_info = bdev_io->u.zone_mgmt.buf; 200 uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id; 201 size_t i; 202 203 /* User can request info for more zones than exist, need to check both internal and user 204 * boundaries 205 */ 206 for (i = 0; i < bdev_io->u.zone_mgmt.num_zones; i++, zone_id += bdev_node->bdev.zone_size) { 207 zone = zone_block_get_zone_by_slba(bdev_node, zone_id); 208 if (!zone) { 209 return -EINVAL; 210 } 211 memcpy(&zone_info[i], &zone->zone_info, sizeof(*zone_info)); 212 } 213 214 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 215 return 0; 216 } 217 218 static int 219 zone_block_open_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 220 { 221 pthread_spin_lock(&zone->lock); 222 223 switch (zone->zone_info.state) { 224 case SPDK_BDEV_ZONE_STATE_EMPTY: 225 case SPDK_BDEV_ZONE_STATE_OPEN: 226 case SPDK_BDEV_ZONE_STATE_CLOSED: 227 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_OPEN; 228 pthread_spin_unlock(&zone->lock); 229 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 230 return 0; 231 default: 232 pthread_spin_unlock(&zone->lock); 233 return -EINVAL; 234 } 235 } 236 237 static void 238 _zone_block_complete_unmap(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 239 { 240 struct spdk_bdev_io *orig_io = cb_arg; 241 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 242 243 /* Complete the original IO and then free the one that we created here 244 * as a result of issuing an IO via submit_request. 245 */ 246 spdk_bdev_io_complete(orig_io, status); 247 spdk_bdev_free_io(bdev_io); 248 } 249 250 static int 251 zone_block_reset_zone(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 252 struct block_zone *zone, struct spdk_bdev_io *bdev_io) 253 { 254 pthread_spin_lock(&zone->lock); 255 256 switch (zone->zone_info.state) { 257 case SPDK_BDEV_ZONE_STATE_EMPTY: 258 pthread_spin_unlock(&zone->lock); 259 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 260 return 0; 261 case SPDK_BDEV_ZONE_STATE_OPEN: 262 case SPDK_BDEV_ZONE_STATE_FULL: 263 case SPDK_BDEV_ZONE_STATE_CLOSED: 264 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_EMPTY; 265 zone->zone_info.write_pointer = zone->zone_info.zone_id; 266 pthread_spin_unlock(&zone->lock); 267 268 /* The unmap isn't necessary, so if the base bdev doesn't support it, we're done */ 269 if (!spdk_bdev_io_type_supported(spdk_bdev_desc_get_bdev(bdev_node->base_desc), 270 SPDK_BDEV_IO_TYPE_UNMAP)) { 271 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 272 return 0; 273 } 274 275 return spdk_bdev_unmap_blocks(bdev_node->base_desc, ch->base_ch, 276 zone->zone_info.zone_id, zone->zone_info.capacity, 277 _zone_block_complete_unmap, bdev_io); 278 default: 279 pthread_spin_unlock(&zone->lock); 280 return -EINVAL; 281 } 282 } 283 284 static int 285 zone_block_close_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 286 { 287 pthread_spin_lock(&zone->lock); 288 289 switch (zone->zone_info.state) { 290 case SPDK_BDEV_ZONE_STATE_OPEN: 291 case SPDK_BDEV_ZONE_STATE_CLOSED: 292 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_CLOSED; 293 pthread_spin_unlock(&zone->lock); 294 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 295 return 0; 296 default: 297 pthread_spin_unlock(&zone->lock); 298 return -EINVAL; 299 } 300 } 301 302 static int 303 zone_block_finish_zone(struct block_zone *zone, struct spdk_bdev_io *bdev_io) 304 { 305 pthread_spin_lock(&zone->lock); 306 307 zone->zone_info.write_pointer = zone->zone_info.zone_id + zone->zone_info.capacity; 308 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 309 310 pthread_spin_unlock(&zone->lock); 311 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 312 return 0; 313 } 314 315 static int 316 zone_block_zone_management(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 317 struct spdk_bdev_io *bdev_io) 318 { 319 struct block_zone *zone; 320 321 zone = zone_block_get_zone_by_slba(bdev_node, bdev_io->u.zone_mgmt.zone_id); 322 if (!zone) { 323 return -EINVAL; 324 } 325 326 switch (bdev_io->u.zone_mgmt.zone_action) { 327 case SPDK_BDEV_ZONE_RESET: 328 return zone_block_reset_zone(bdev_node, ch, zone, bdev_io); 329 case SPDK_BDEV_ZONE_OPEN: 330 return zone_block_open_zone(zone, bdev_io); 331 case SPDK_BDEV_ZONE_CLOSE: 332 return zone_block_close_zone(zone, bdev_io); 333 case SPDK_BDEV_ZONE_FINISH: 334 return zone_block_finish_zone(zone, bdev_io); 335 default: 336 return -EINVAL; 337 } 338 } 339 340 static void 341 _zone_block_complete_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 342 { 343 struct spdk_bdev_io *orig_io = cb_arg; 344 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 345 346 if (success && orig_io->type == SPDK_BDEV_IO_TYPE_ZONE_APPEND) { 347 orig_io->u.bdev.offset_blocks = bdev_io->u.bdev.offset_blocks; 348 } 349 350 /* Complete the original IO and then free the one that we created here 351 * as a result of issuing an IO via submit_request. 352 */ 353 spdk_bdev_io_complete(orig_io, status); 354 spdk_bdev_free_io(bdev_io); 355 } 356 357 static int 358 zone_block_write(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 359 struct spdk_bdev_io *bdev_io) 360 { 361 struct block_zone *zone; 362 uint64_t len = bdev_io->u.bdev.num_blocks; 363 uint64_t lba = bdev_io->u.bdev.offset_blocks; 364 uint64_t num_blocks_left, wp; 365 int rc = 0; 366 bool is_append = bdev_io->type == SPDK_BDEV_IO_TYPE_ZONE_APPEND; 367 368 if (is_append) { 369 zone = zone_block_get_zone_by_slba(bdev_node, lba); 370 } else { 371 zone = zone_block_get_zone_containing_lba(bdev_node, lba); 372 } 373 if (!zone) { 374 SPDK_ERRLOG("Trying to write to invalid zone (lba 0x%" PRIx64 ")\n", lba); 375 return -EINVAL; 376 } 377 378 pthread_spin_lock(&zone->lock); 379 380 switch (zone->zone_info.state) { 381 case SPDK_BDEV_ZONE_STATE_OPEN: 382 case SPDK_BDEV_ZONE_STATE_EMPTY: 383 case SPDK_BDEV_ZONE_STATE_CLOSED: 384 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_OPEN; 385 break; 386 default: 387 SPDK_ERRLOG("Trying to write to zone in invalid state %u\n", zone->zone_info.state); 388 rc = -EINVAL; 389 goto write_fail; 390 } 391 392 wp = zone->zone_info.write_pointer; 393 if (is_append) { 394 lba = wp; 395 } else { 396 if (lba != wp) { 397 SPDK_ERRLOG("Trying to write to zone with invalid address (lba 0x%" PRIx64 ", wp 0x%" PRIx64 ")\n", 398 lba, wp); 399 rc = -EINVAL; 400 goto write_fail; 401 } 402 } 403 404 num_blocks_left = zone->zone_info.zone_id + zone->zone_info.capacity - wp; 405 if (len > num_blocks_left) { 406 SPDK_ERRLOG("Write exceeds zone capacity (lba 0x%" PRIx64 ", len 0x%" PRIx64 ", wp 0x%" PRIx64 407 ")\n", lba, len, wp); 408 rc = -EINVAL; 409 goto write_fail; 410 } 411 412 zone->zone_info.write_pointer += bdev_io->u.bdev.num_blocks; 413 assert(zone->zone_info.write_pointer <= zone->zone_info.zone_id + zone->zone_info.capacity); 414 if (zone->zone_info.write_pointer == zone->zone_info.zone_id + zone->zone_info.capacity) { 415 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 416 } 417 pthread_spin_unlock(&zone->lock); 418 419 if (bdev_io->u.bdev.md_buf == NULL) { 420 rc = spdk_bdev_writev_blocks(bdev_node->base_desc, ch->base_ch, bdev_io->u.bdev.iovs, 421 bdev_io->u.bdev.iovcnt, lba, 422 bdev_io->u.bdev.num_blocks, _zone_block_complete_write, 423 bdev_io); 424 } else { 425 rc = spdk_bdev_writev_blocks_with_md(bdev_node->base_desc, ch->base_ch, 426 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 427 bdev_io->u.bdev.md_buf, 428 lba, bdev_io->u.bdev.num_blocks, 429 _zone_block_complete_write, bdev_io); 430 } 431 432 return rc; 433 434 write_fail: 435 pthread_spin_unlock(&zone->lock); 436 return rc; 437 } 438 439 static void 440 _zone_block_complete_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 441 { 442 struct spdk_bdev_io *orig_io = cb_arg; 443 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 444 445 /* Complete the original IO and then free the one that we created here 446 * as a result of issuing an IO via submit_request. 447 */ 448 spdk_bdev_io_complete(orig_io, status); 449 spdk_bdev_free_io(bdev_io); 450 } 451 452 static int 453 zone_block_read(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch, 454 struct spdk_bdev_io *bdev_io) 455 { 456 struct block_zone *zone; 457 uint64_t len = bdev_io->u.bdev.num_blocks; 458 uint64_t lba = bdev_io->u.bdev.offset_blocks; 459 int rc; 460 461 zone = zone_block_get_zone_containing_lba(bdev_node, lba); 462 if (!zone) { 463 SPDK_ERRLOG("Trying to read from invalid zone (lba 0x%" PRIx64 ")\n", lba); 464 return -EINVAL; 465 } 466 467 if ((lba + len) > (zone->zone_info.zone_id + zone->zone_info.capacity)) { 468 SPDK_ERRLOG("Read exceeds zone capacity (lba 0x%" PRIx64 ", len 0x%" PRIx64 ")\n", lba, len); 469 return -EINVAL; 470 } 471 472 if (bdev_io->u.bdev.md_buf == NULL) { 473 rc = spdk_bdev_readv_blocks(bdev_node->base_desc, ch->base_ch, bdev_io->u.bdev.iovs, 474 bdev_io->u.bdev.iovcnt, lba, 475 len, _zone_block_complete_read, 476 bdev_io); 477 } else { 478 rc = spdk_bdev_readv_blocks_with_md(bdev_node->base_desc, ch->base_ch, 479 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 480 bdev_io->u.bdev.md_buf, 481 lba, len, 482 _zone_block_complete_read, bdev_io); 483 } 484 485 return rc; 486 } 487 488 static void 489 zone_block_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 490 { 491 struct bdev_zone_block *bdev_node = SPDK_CONTAINEROF(bdev_io->bdev, struct bdev_zone_block, bdev); 492 struct zone_block_io_channel *dev_ch = spdk_io_channel_get_ctx(ch); 493 int rc = 0; 494 495 switch (bdev_io->type) { 496 case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO: 497 rc = zone_block_get_zone_info(bdev_node, bdev_io); 498 break; 499 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 500 rc = zone_block_zone_management(bdev_node, dev_ch, bdev_io); 501 break; 502 case SPDK_BDEV_IO_TYPE_WRITE: 503 case SPDK_BDEV_IO_TYPE_ZONE_APPEND: 504 rc = zone_block_write(bdev_node, dev_ch, bdev_io); 505 break; 506 case SPDK_BDEV_IO_TYPE_READ: 507 rc = zone_block_read(bdev_node, dev_ch, bdev_io); 508 break; 509 default: 510 SPDK_ERRLOG("vbdev_block: unknown I/O type %u\n", bdev_io->type); 511 rc = -ENOTSUP; 512 break; 513 } 514 515 if (rc != 0) { 516 if (rc == -ENOMEM) { 517 SPDK_WARNLOG("ENOMEM, start to queue io for vbdev.\n"); 518 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); 519 } else { 520 SPDK_ERRLOG("ERROR on bdev_io submission!\n"); 521 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 522 } 523 } 524 } 525 526 static bool 527 zone_block_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 528 { 529 switch (io_type) { 530 case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: 531 case SPDK_BDEV_IO_TYPE_WRITE: 532 case SPDK_BDEV_IO_TYPE_READ: 533 case SPDK_BDEV_IO_TYPE_ZONE_APPEND: 534 return true; 535 default: 536 return false; 537 } 538 } 539 540 static struct spdk_io_channel * 541 zone_block_get_io_channel(void *ctx) 542 { 543 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 544 545 return spdk_get_io_channel(bdev_node); 546 } 547 548 static int 549 zone_block_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 550 { 551 struct bdev_zone_block *bdev_node = (struct bdev_zone_block *)ctx; 552 struct spdk_bdev *base_bdev = spdk_bdev_desc_get_bdev(bdev_node->base_desc); 553 554 spdk_json_write_name(w, "zoned_block"); 555 spdk_json_write_object_begin(w); 556 spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&bdev_node->bdev)); 557 spdk_json_write_named_string(w, "base_bdev", spdk_bdev_get_name(base_bdev)); 558 spdk_json_write_named_uint64(w, "zone_capacity", bdev_node->zone_capacity); 559 spdk_json_write_named_uint64(w, "optimal_open_zones", bdev_node->bdev.optimal_open_zones); 560 spdk_json_write_object_end(w); 561 562 return 0; 563 } 564 565 /* When we register our vbdev this is how we specify our entry points. */ 566 static const struct spdk_bdev_fn_table zone_block_fn_table = { 567 .destruct = zone_block_destruct, 568 .submit_request = zone_block_submit_request, 569 .io_type_supported = zone_block_io_type_supported, 570 .get_io_channel = zone_block_get_io_channel, 571 .dump_info_json = zone_block_dump_info_json, 572 }; 573 574 static void 575 zone_block_base_bdev_hotremove_cb(struct spdk_bdev *bdev_find) 576 { 577 struct bdev_zone_block *bdev_node, *tmp; 578 579 TAILQ_FOREACH_SAFE(bdev_node, &g_bdev_nodes, link, tmp) { 580 if (bdev_find == spdk_bdev_desc_get_bdev(bdev_node->base_desc)) { 581 spdk_bdev_unregister(&bdev_node->bdev, NULL, NULL); 582 } 583 } 584 } 585 586 static void 587 zone_block_base_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 588 void *event_ctx) 589 { 590 switch (type) { 591 case SPDK_BDEV_EVENT_REMOVE: 592 zone_block_base_bdev_hotremove_cb(bdev); 593 break; 594 default: 595 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 596 break; 597 } 598 } 599 600 static int 601 _zone_block_ch_create_cb(void *io_device, void *ctx_buf) 602 { 603 struct zone_block_io_channel *bdev_ch = ctx_buf; 604 struct bdev_zone_block *bdev_node = io_device; 605 606 bdev_ch->base_ch = spdk_bdev_get_io_channel(bdev_node->base_desc); 607 if (!bdev_ch->base_ch) { 608 return -ENOMEM; 609 } 610 611 return 0; 612 } 613 614 static void 615 _zone_block_ch_destroy_cb(void *io_device, void *ctx_buf) 616 { 617 struct zone_block_io_channel *bdev_ch = ctx_buf; 618 619 spdk_put_io_channel(bdev_ch->base_ch); 620 } 621 622 static int 623 zone_block_insert_name(const char *bdev_name, const char *vbdev_name, uint64_t zone_capacity, 624 uint64_t optimal_open_zones) 625 { 626 struct bdev_zone_block_config *name; 627 628 TAILQ_FOREACH(name, &g_bdev_configs, link) { 629 if (strcmp(vbdev_name, name->vbdev_name) == 0) { 630 SPDK_ERRLOG("block zoned bdev %s already exists\n", vbdev_name); 631 return -EEXIST; 632 } 633 if (strcmp(bdev_name, name->bdev_name) == 0) { 634 SPDK_ERRLOG("base bdev %s already claimed\n", bdev_name); 635 return -EEXIST; 636 } 637 } 638 639 name = calloc(1, sizeof(*name)); 640 if (!name) { 641 SPDK_ERRLOG("could not allocate bdev_names\n"); 642 return -ENOMEM; 643 } 644 645 name->bdev_name = strdup(bdev_name); 646 if (!name->bdev_name) { 647 SPDK_ERRLOG("could not allocate name->bdev_name\n"); 648 free(name); 649 return -ENOMEM; 650 } 651 652 name->vbdev_name = strdup(vbdev_name); 653 if (!name->vbdev_name) { 654 SPDK_ERRLOG("could not allocate name->vbdev_name\n"); 655 free(name->bdev_name); 656 free(name); 657 return -ENOMEM; 658 } 659 660 name->zone_capacity = zone_capacity; 661 name->optimal_open_zones = optimal_open_zones; 662 663 TAILQ_INSERT_TAIL(&g_bdev_configs, name, link); 664 665 return 0; 666 } 667 668 static int 669 zone_block_init_zone_info(struct bdev_zone_block *bdev_node) 670 { 671 size_t i; 672 struct block_zone *zone; 673 int rc = 0; 674 675 for (i = 0; i < bdev_node->num_zones; i++) { 676 zone = &bdev_node->zones[i]; 677 zone->zone_info.zone_id = bdev_node->bdev.zone_size * i; 678 zone->zone_info.capacity = bdev_node->zone_capacity; 679 zone->zone_info.write_pointer = zone->zone_info.zone_id + zone->zone_info.capacity; 680 zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL; 681 zone->zone_info.type = SPDK_BDEV_ZONE_TYPE_SEQWR; 682 if (pthread_spin_init(&zone->lock, PTHREAD_PROCESS_PRIVATE)) { 683 SPDK_ERRLOG("pthread_spin_init() failed\n"); 684 rc = -ENOMEM; 685 break; 686 } 687 } 688 689 if (rc) { 690 for (; i > 0; i--) { 691 pthread_spin_destroy(&bdev_node->zones[i - 1].lock); 692 } 693 } 694 695 return rc; 696 } 697 698 static int 699 zone_block_register(const char *base_bdev_name) 700 { 701 struct spdk_bdev_desc *base_desc; 702 struct spdk_bdev *base_bdev; 703 struct bdev_zone_block_config *name, *tmp; 704 struct bdev_zone_block *bdev_node; 705 uint64_t zone_size; 706 int rc = 0; 707 708 /* Check our list of names from config versus this bdev and if 709 * there's a match, create the bdev_node & bdev accordingly. 710 */ 711 TAILQ_FOREACH_SAFE(name, &g_bdev_configs, link, tmp) { 712 if (strcmp(name->bdev_name, base_bdev_name) != 0) { 713 continue; 714 } 715 716 rc = spdk_bdev_open_ext(base_bdev_name, true, zone_block_base_bdev_event_cb, 717 NULL, &base_desc); 718 if (rc == -ENODEV) { 719 return -ENODEV; 720 } else if (rc) { 721 SPDK_ERRLOG("could not open bdev %s\n", base_bdev_name); 722 goto free_config; 723 } 724 725 base_bdev = spdk_bdev_desc_get_bdev(base_desc); 726 727 if (spdk_bdev_is_zoned(base_bdev)) { 728 SPDK_ERRLOG("Base bdev %s is already a zoned bdev\n", base_bdev_name); 729 rc = -EEXIST; 730 goto zone_exist; 731 } 732 733 bdev_node = calloc(1, sizeof(struct bdev_zone_block)); 734 if (!bdev_node) { 735 rc = -ENOMEM; 736 SPDK_ERRLOG("could not allocate bdev_node\n"); 737 goto zone_exist; 738 } 739 740 bdev_node->base_desc = base_desc; 741 742 /* The base bdev that we're attaching to. */ 743 bdev_node->bdev.name = strdup(name->vbdev_name); 744 if (!bdev_node->bdev.name) { 745 rc = -ENOMEM; 746 SPDK_ERRLOG("could not allocate bdev_node name\n"); 747 goto strdup_failed; 748 } 749 750 zone_size = spdk_align64pow2(name->zone_capacity); 751 if (zone_size == 0) { 752 rc = -EINVAL; 753 SPDK_ERRLOG("invalid zone size\n"); 754 goto roundup_failed; 755 } 756 757 bdev_node->zone_shift = spdk_u64log2(zone_size); 758 bdev_node->num_zones = base_bdev->blockcnt / zone_size; 759 760 bdev_node->zones = calloc(bdev_node->num_zones, sizeof(struct block_zone)); 761 if (!bdev_node->zones) { 762 rc = -ENOMEM; 763 SPDK_ERRLOG("could not allocate zones\n"); 764 goto calloc_failed; 765 } 766 767 bdev_node->bdev.product_name = "zone_block"; 768 769 /* Copy some properties from the underlying base bdev. */ 770 bdev_node->bdev.write_cache = base_bdev->write_cache; 771 bdev_node->bdev.required_alignment = base_bdev->required_alignment; 772 bdev_node->bdev.optimal_io_boundary = base_bdev->optimal_io_boundary; 773 774 bdev_node->bdev.blocklen = base_bdev->blocklen; 775 bdev_node->bdev.blockcnt = bdev_node->num_zones * zone_size; 776 777 if (bdev_node->num_zones * name->zone_capacity != base_bdev->blockcnt) { 778 SPDK_DEBUGLOG(vbdev_zone_block, 779 "Lost %" PRIu64 " blocks due to zone capacity and base bdev size misalignment\n", 780 base_bdev->blockcnt - bdev_node->num_zones * name->zone_capacity); 781 } 782 783 bdev_node->bdev.write_unit_size = base_bdev->write_unit_size; 784 785 bdev_node->bdev.md_interleave = base_bdev->md_interleave; 786 bdev_node->bdev.md_len = base_bdev->md_len; 787 bdev_node->bdev.dif_type = base_bdev->dif_type; 788 bdev_node->bdev.dif_is_head_of_md = base_bdev->dif_is_head_of_md; 789 bdev_node->bdev.dif_check_flags = base_bdev->dif_check_flags; 790 791 bdev_node->bdev.zoned = true; 792 bdev_node->bdev.ctxt = bdev_node; 793 bdev_node->bdev.fn_table = &zone_block_fn_table; 794 bdev_node->bdev.module = &bdev_zoned_if; 795 796 /* bdev specific info */ 797 bdev_node->bdev.zone_size = zone_size; 798 799 bdev_node->zone_capacity = name->zone_capacity; 800 bdev_node->bdev.optimal_open_zones = name->optimal_open_zones; 801 bdev_node->bdev.max_open_zones = 0; 802 rc = zone_block_init_zone_info(bdev_node); 803 if (rc) { 804 SPDK_ERRLOG("could not init zone info\n"); 805 goto zone_info_failed; 806 } 807 808 TAILQ_INSERT_TAIL(&g_bdev_nodes, bdev_node, link); 809 810 spdk_io_device_register(bdev_node, _zone_block_ch_create_cb, _zone_block_ch_destroy_cb, 811 sizeof(struct zone_block_io_channel), 812 name->vbdev_name); 813 814 /* Save the thread where the base device is opened */ 815 bdev_node->thread = spdk_get_thread(); 816 817 rc = spdk_bdev_module_claim_bdev(base_bdev, base_desc, bdev_node->bdev.module); 818 if (rc) { 819 SPDK_ERRLOG("could not claim bdev %s\n", base_bdev_name); 820 goto claim_failed; 821 } 822 823 rc = spdk_bdev_register(&bdev_node->bdev); 824 if (rc) { 825 SPDK_ERRLOG("could not register zoned bdev\n"); 826 goto register_failed; 827 } 828 } 829 830 return rc; 831 832 register_failed: 833 spdk_bdev_module_release_bdev(&bdev_node->bdev); 834 claim_failed: 835 TAILQ_REMOVE(&g_bdev_nodes, bdev_node, link); 836 spdk_io_device_unregister(bdev_node, NULL); 837 zone_info_failed: 838 free(bdev_node->zones); 839 calloc_failed: 840 roundup_failed: 841 free(bdev_node->bdev.name); 842 strdup_failed: 843 free(bdev_node); 844 zone_exist: 845 spdk_bdev_close(base_desc); 846 free_config: 847 zone_block_remove_config(name); 848 return rc; 849 } 850 851 int 852 vbdev_zone_block_create(const char *bdev_name, const char *vbdev_name, uint64_t zone_capacity, 853 uint64_t optimal_open_zones) 854 { 855 int rc = 0; 856 857 if (zone_capacity == 0) { 858 SPDK_ERRLOG("Zone capacity can't be 0\n"); 859 return -EINVAL; 860 } 861 862 if (optimal_open_zones == 0) { 863 SPDK_ERRLOG("Optimal open zones can't be 0\n"); 864 return -EINVAL; 865 } 866 867 /* Insert the bdev into our global name list even if it doesn't exist yet, 868 * it may show up soon... 869 */ 870 rc = zone_block_insert_name(bdev_name, vbdev_name, zone_capacity, optimal_open_zones); 871 if (rc) { 872 return rc; 873 } 874 875 rc = zone_block_register(bdev_name); 876 if (rc == -ENODEV) { 877 /* This is not an error, even though the bdev is not present at this time it may 878 * still show up later. 879 */ 880 rc = 0; 881 } 882 return rc; 883 } 884 885 void 886 vbdev_zone_block_delete(const char *name, spdk_bdev_unregister_cb cb_fn, void *cb_arg) 887 { 888 struct bdev_zone_block_config *name_node; 889 int rc; 890 891 rc = spdk_bdev_unregister_by_name(name, &bdev_zoned_if, cb_fn, cb_arg); 892 if (rc == 0) { 893 TAILQ_FOREACH(name_node, &g_bdev_configs, link) { 894 if (strcmp(name_node->vbdev_name, name) == 0) { 895 zone_block_remove_config(name_node); 896 break; 897 } 898 } 899 } else { 900 cb_fn(cb_arg, rc); 901 } 902 } 903 904 static void 905 zone_block_examine(struct spdk_bdev *bdev) 906 { 907 zone_block_register(bdev->name); 908 909 spdk_bdev_module_examine_done(&bdev_zoned_if); 910 } 911 912 SPDK_LOG_REGISTER_COMPONENT(vbdev_zone_block) 913