1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. 3 * All rights reserved. 4 * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "bdev_raid.h" 8 #include "spdk/env.h" 9 #include "spdk/thread.h" 10 #include "spdk/log.h" 11 #include "spdk/string.h" 12 #include "spdk/util.h" 13 #include "spdk/json.h" 14 #include "spdk/likely.h" 15 16 #define RAID_OFFSET_BLOCKS_INVALID UINT64_MAX 17 #define RAID_BDEV_PROCESS_MAX_QD 16 18 19 #define RAID_BDEV_PROCESS_WINDOW_SIZE_KB_DEFAULT 1024 20 21 static bool g_shutdown_started = false; 22 23 /* List of all raid bdevs */ 24 struct raid_all_tailq g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list); 25 26 static TAILQ_HEAD(, raid_bdev_module) g_raid_modules = TAILQ_HEAD_INITIALIZER(g_raid_modules); 27 28 /* 29 * raid_bdev_io_channel is the context of spdk_io_channel for raid bdev device. It 30 * contains the relationship of raid bdev io channel with base bdev io channels. 31 */ 32 struct raid_bdev_io_channel { 33 /* Array of IO channels of base bdevs */ 34 struct spdk_io_channel **base_channel; 35 36 /* Private raid module IO channel */ 37 struct spdk_io_channel *module_channel; 38 39 /* Background process data */ 40 struct { 41 uint64_t offset; 42 struct spdk_io_channel *target_ch; 43 struct raid_bdev_io_channel *ch_processed; 44 } process; 45 }; 46 47 enum raid_bdev_process_state { 48 RAID_PROCESS_STATE_INIT, 49 RAID_PROCESS_STATE_RUNNING, 50 RAID_PROCESS_STATE_STOPPING, 51 RAID_PROCESS_STATE_STOPPED, 52 }; 53 54 struct raid_bdev_process { 55 struct raid_bdev *raid_bdev; 56 enum raid_process_type type; 57 enum raid_bdev_process_state state; 58 struct spdk_thread *thread; 59 struct raid_bdev_io_channel *raid_ch; 60 TAILQ_HEAD(, raid_bdev_process_request) requests; 61 uint64_t max_window_size; 62 uint64_t window_size; 63 uint64_t window_remaining; 64 int window_status; 65 uint64_t window_offset; 66 bool window_range_locked; 67 struct raid_base_bdev_info *target; 68 int status; 69 TAILQ_HEAD(, raid_process_finish_action) finish_actions; 70 }; 71 72 struct raid_process_finish_action { 73 spdk_msg_fn cb; 74 void *cb_ctx; 75 TAILQ_ENTRY(raid_process_finish_action) link; 76 }; 77 78 static struct spdk_raid_bdev_opts g_opts = { 79 .process_window_size_kb = RAID_BDEV_PROCESS_WINDOW_SIZE_KB_DEFAULT, 80 }; 81 82 void 83 raid_bdev_get_opts(struct spdk_raid_bdev_opts *opts) 84 { 85 *opts = g_opts; 86 } 87 88 int 89 raid_bdev_set_opts(const struct spdk_raid_bdev_opts *opts) 90 { 91 if (opts->process_window_size_kb == 0) { 92 return -EINVAL; 93 } 94 95 g_opts = *opts; 96 97 return 0; 98 } 99 100 static struct raid_bdev_module * 101 raid_bdev_module_find(enum raid_level level) 102 { 103 struct raid_bdev_module *raid_module; 104 105 TAILQ_FOREACH(raid_module, &g_raid_modules, link) { 106 if (raid_module->level == level) { 107 return raid_module; 108 } 109 } 110 111 return NULL; 112 } 113 114 void 115 raid_bdev_module_list_add(struct raid_bdev_module *raid_module) 116 { 117 if (raid_bdev_module_find(raid_module->level) != NULL) { 118 SPDK_ERRLOG("module for raid level '%s' already registered.\n", 119 raid_bdev_level_to_str(raid_module->level)); 120 assert(false); 121 } else { 122 TAILQ_INSERT_TAIL(&g_raid_modules, raid_module, link); 123 } 124 } 125 126 struct spdk_io_channel * 127 raid_bdev_channel_get_base_channel(struct raid_bdev_io_channel *raid_ch, uint8_t idx) 128 { 129 return raid_ch->base_channel[idx]; 130 } 131 132 void * 133 raid_bdev_channel_get_module_ctx(struct raid_bdev_io_channel *raid_ch) 134 { 135 assert(raid_ch->module_channel != NULL); 136 137 return spdk_io_channel_get_ctx(raid_ch->module_channel); 138 } 139 140 struct raid_base_bdev_info * 141 raid_bdev_channel_get_base_info(struct raid_bdev_io_channel *raid_ch, struct spdk_bdev *base_bdev) 142 { 143 struct spdk_io_channel *ch = spdk_io_channel_from_ctx(raid_ch); 144 struct raid_bdev *raid_bdev = spdk_io_channel_get_io_device(ch); 145 uint8_t i; 146 147 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 148 struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[i]; 149 150 if (base_info->is_configured && 151 spdk_bdev_desc_get_bdev(base_info->desc) == base_bdev) { 152 return base_info; 153 } 154 } 155 156 return NULL; 157 } 158 159 /* Function declarations */ 160 static void raid_bdev_examine(struct spdk_bdev *bdev); 161 static int raid_bdev_init(void); 162 static void raid_bdev_deconfigure(struct raid_bdev *raid_bdev, 163 raid_bdev_destruct_cb cb_fn, void *cb_arg); 164 165 static void 166 raid_bdev_ch_process_cleanup(struct raid_bdev_io_channel *raid_ch) 167 { 168 raid_ch->process.offset = RAID_OFFSET_BLOCKS_INVALID; 169 170 if (raid_ch->process.target_ch != NULL) { 171 spdk_put_io_channel(raid_ch->process.target_ch); 172 raid_ch->process.target_ch = NULL; 173 } 174 175 if (raid_ch->process.ch_processed != NULL) { 176 free(raid_ch->process.ch_processed->base_channel); 177 free(raid_ch->process.ch_processed); 178 raid_ch->process.ch_processed = NULL; 179 } 180 } 181 182 static int 183 raid_bdev_ch_process_setup(struct raid_bdev_io_channel *raid_ch, struct raid_bdev_process *process) 184 { 185 struct raid_bdev *raid_bdev = process->raid_bdev; 186 struct raid_bdev_io_channel *raid_ch_processed; 187 struct raid_base_bdev_info *base_info; 188 189 raid_ch->process.offset = process->window_offset; 190 191 /* In the future we may have other types of processes which don't use a target bdev, 192 * like data scrubbing or strip size migration. Until then, expect that there always is 193 * a process target. */ 194 assert(process->target != NULL); 195 196 raid_ch->process.target_ch = spdk_bdev_get_io_channel(process->target->desc); 197 if (raid_ch->process.target_ch == NULL) { 198 goto err; 199 } 200 201 raid_ch_processed = calloc(1, sizeof(*raid_ch_processed)); 202 if (raid_ch_processed == NULL) { 203 goto err; 204 } 205 raid_ch->process.ch_processed = raid_ch_processed; 206 207 raid_ch_processed->base_channel = calloc(raid_bdev->num_base_bdevs, 208 sizeof(*raid_ch_processed->base_channel)); 209 if (raid_ch_processed->base_channel == NULL) { 210 goto err; 211 } 212 213 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 214 uint8_t slot = raid_bdev_base_bdev_slot(base_info); 215 216 if (base_info != process->target) { 217 raid_ch_processed->base_channel[slot] = raid_ch->base_channel[slot]; 218 } else { 219 raid_ch_processed->base_channel[slot] = raid_ch->process.target_ch; 220 } 221 } 222 223 raid_ch_processed->module_channel = raid_ch->module_channel; 224 raid_ch_processed->process.offset = RAID_OFFSET_BLOCKS_INVALID; 225 226 return 0; 227 err: 228 raid_bdev_ch_process_cleanup(raid_ch); 229 return -ENOMEM; 230 } 231 232 /* 233 * brief: 234 * raid_bdev_create_cb function is a cb function for raid bdev which creates the 235 * hierarchy from raid bdev to base bdev io channels. It will be called per core 236 * params: 237 * io_device - pointer to raid bdev io device represented by raid_bdev 238 * ctx_buf - pointer to context buffer for raid bdev io channel 239 * returns: 240 * 0 - success 241 * non zero - failure 242 */ 243 static int 244 raid_bdev_create_cb(void *io_device, void *ctx_buf) 245 { 246 struct raid_bdev *raid_bdev = io_device; 247 struct raid_bdev_io_channel *raid_ch = ctx_buf; 248 uint8_t i; 249 int ret = -ENOMEM; 250 251 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_create_cb, %p\n", raid_ch); 252 253 assert(raid_bdev != NULL); 254 assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); 255 256 raid_ch->base_channel = calloc(raid_bdev->num_base_bdevs, sizeof(struct spdk_io_channel *)); 257 if (!raid_ch->base_channel) { 258 SPDK_ERRLOG("Unable to allocate base bdevs io channel\n"); 259 return -ENOMEM; 260 } 261 262 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 263 /* 264 * Get the spdk_io_channel for all the base bdevs. This is used during 265 * split logic to send the respective child bdev ios to respective base 266 * bdev io channel. 267 * Skip missing base bdevs and the process target, which should also be treated as 268 * missing until the process completes. 269 */ 270 if (raid_bdev->base_bdev_info[i].is_configured == false || 271 raid_bdev->base_bdev_info[i].is_process_target == true) { 272 continue; 273 } 274 raid_ch->base_channel[i] = spdk_bdev_get_io_channel( 275 raid_bdev->base_bdev_info[i].desc); 276 if (!raid_ch->base_channel[i]) { 277 SPDK_ERRLOG("Unable to create io channel for base bdev\n"); 278 goto err; 279 } 280 } 281 282 if (raid_bdev->module->get_io_channel) { 283 raid_ch->module_channel = raid_bdev->module->get_io_channel(raid_bdev); 284 if (!raid_ch->module_channel) { 285 SPDK_ERRLOG("Unable to create io channel for raid module\n"); 286 goto err; 287 } 288 } 289 290 if (raid_bdev->process != NULL) { 291 ret = raid_bdev_ch_process_setup(raid_ch, raid_bdev->process); 292 if (ret != 0) { 293 SPDK_ERRLOG("Failed to setup process io channel\n"); 294 goto err; 295 } 296 } else { 297 raid_ch->process.offset = RAID_OFFSET_BLOCKS_INVALID; 298 } 299 300 return 0; 301 err: 302 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 303 if (raid_ch->base_channel[i] != NULL) { 304 spdk_put_io_channel(raid_ch->base_channel[i]); 305 } 306 } 307 free(raid_ch->base_channel); 308 309 raid_bdev_ch_process_cleanup(raid_ch); 310 311 return ret; 312 } 313 314 /* 315 * brief: 316 * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the 317 * hierarchy from raid bdev to base bdev io channels. It will be called per core 318 * params: 319 * io_device - pointer to raid bdev io device represented by raid_bdev 320 * ctx_buf - pointer to context buffer for raid bdev io channel 321 * returns: 322 * none 323 */ 324 static void 325 raid_bdev_destroy_cb(void *io_device, void *ctx_buf) 326 { 327 struct raid_bdev *raid_bdev = io_device; 328 struct raid_bdev_io_channel *raid_ch = ctx_buf; 329 uint8_t i; 330 331 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destroy_cb\n"); 332 333 assert(raid_ch != NULL); 334 assert(raid_ch->base_channel); 335 336 if (raid_ch->module_channel) { 337 spdk_put_io_channel(raid_ch->module_channel); 338 } 339 340 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 341 /* Free base bdev channels */ 342 if (raid_ch->base_channel[i] != NULL) { 343 spdk_put_io_channel(raid_ch->base_channel[i]); 344 } 345 } 346 free(raid_ch->base_channel); 347 raid_ch->base_channel = NULL; 348 349 raid_bdev_ch_process_cleanup(raid_ch); 350 } 351 352 /* 353 * brief: 354 * raid_bdev_cleanup is used to cleanup raid_bdev related data 355 * structures. 356 * params: 357 * raid_bdev - pointer to raid_bdev 358 * returns: 359 * none 360 */ 361 static void 362 raid_bdev_cleanup(struct raid_bdev *raid_bdev) 363 { 364 struct raid_base_bdev_info *base_info; 365 366 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_cleanup, %p name %s, state %s\n", 367 raid_bdev, raid_bdev->bdev.name, raid_bdev_state_to_str(raid_bdev->state)); 368 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 369 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 370 371 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 372 assert(base_info->desc == NULL); 373 free(base_info->name); 374 } 375 376 TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link); 377 } 378 379 static void 380 raid_bdev_free(struct raid_bdev *raid_bdev) 381 { 382 raid_bdev_free_superblock(raid_bdev); 383 free(raid_bdev->base_bdev_info); 384 free(raid_bdev->bdev.name); 385 free(raid_bdev); 386 } 387 388 static void 389 raid_bdev_cleanup_and_free(struct raid_bdev *raid_bdev) 390 { 391 raid_bdev_cleanup(raid_bdev); 392 raid_bdev_free(raid_bdev); 393 } 394 395 static void 396 raid_bdev_deconfigure_base_bdev(struct raid_base_bdev_info *base_info) 397 { 398 struct raid_bdev *raid_bdev = base_info->raid_bdev; 399 400 assert(base_info->is_configured); 401 assert(raid_bdev->num_base_bdevs_discovered); 402 raid_bdev->num_base_bdevs_discovered--; 403 base_info->is_configured = false; 404 base_info->is_process_target = false; 405 } 406 407 /* 408 * brief: 409 * free resource of base bdev for raid bdev 410 * params: 411 * base_info - raid base bdev info 412 * returns: 413 * none 414 */ 415 static void 416 raid_bdev_free_base_bdev_resource(struct raid_base_bdev_info *base_info) 417 { 418 struct raid_bdev *raid_bdev = base_info->raid_bdev; 419 420 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 421 422 free(base_info->name); 423 base_info->name = NULL; 424 if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) { 425 spdk_uuid_set_null(&base_info->uuid); 426 } 427 base_info->is_failed = false; 428 429 if (base_info->desc == NULL) { 430 return; 431 } 432 433 spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(base_info->desc)); 434 spdk_bdev_close(base_info->desc); 435 base_info->desc = NULL; 436 spdk_put_io_channel(base_info->app_thread_ch); 437 base_info->app_thread_ch = NULL; 438 439 if (base_info->is_configured) { 440 raid_bdev_deconfigure_base_bdev(base_info); 441 } 442 } 443 444 static void 445 raid_bdev_io_device_unregister_cb(void *io_device) 446 { 447 struct raid_bdev *raid_bdev = io_device; 448 449 if (raid_bdev->num_base_bdevs_discovered == 0) { 450 /* Free raid_bdev when there are no base bdevs left */ 451 SPDK_DEBUGLOG(bdev_raid, "raid bdev base bdevs is 0, going to free all in destruct\n"); 452 raid_bdev_cleanup(raid_bdev); 453 spdk_bdev_destruct_done(&raid_bdev->bdev, 0); 454 raid_bdev_free(raid_bdev); 455 } else { 456 spdk_bdev_destruct_done(&raid_bdev->bdev, 0); 457 } 458 } 459 460 void 461 raid_bdev_module_stop_done(struct raid_bdev *raid_bdev) 462 { 463 if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) { 464 spdk_io_device_unregister(raid_bdev, raid_bdev_io_device_unregister_cb); 465 } 466 } 467 468 static void 469 _raid_bdev_destruct(void *ctxt) 470 { 471 struct raid_bdev *raid_bdev = ctxt; 472 struct raid_base_bdev_info *base_info; 473 474 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destruct\n"); 475 476 assert(raid_bdev->process == NULL); 477 478 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 479 /* 480 * Close all base bdev descriptors for which call has come from below 481 * layers. Also close the descriptors if we have started shutdown. 482 */ 483 if (g_shutdown_started || base_info->remove_scheduled == true) { 484 raid_bdev_free_base_bdev_resource(base_info); 485 } 486 } 487 488 if (g_shutdown_started) { 489 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 490 } 491 492 if (raid_bdev->module->stop != NULL) { 493 if (raid_bdev->module->stop(raid_bdev) == false) { 494 return; 495 } 496 } 497 498 raid_bdev_module_stop_done(raid_bdev); 499 } 500 501 static int 502 raid_bdev_destruct(void *ctx) 503 { 504 spdk_thread_exec_msg(spdk_thread_get_app_thread(), _raid_bdev_destruct, ctx); 505 506 return 1; 507 } 508 509 int 510 raid_bdev_remap_dix_reftag(void *md_buf, uint64_t num_blocks, 511 struct spdk_bdev *bdev, uint32_t remapped_offset) 512 { 513 struct spdk_dif_ctx dif_ctx; 514 struct spdk_dif_error err_blk = {}; 515 int rc; 516 struct spdk_dif_ctx_init_ext_opts dif_opts; 517 struct iovec md_iov = { 518 .iov_base = md_buf, 519 .iov_len = num_blocks * bdev->md_len, 520 }; 521 522 if (md_buf == NULL) { 523 return 0; 524 } 525 526 dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format); 527 dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16; 528 rc = spdk_dif_ctx_init(&dif_ctx, 529 bdev->blocklen, bdev->md_len, bdev->md_interleave, 530 bdev->dif_is_head_of_md, bdev->dif_type, 531 SPDK_DIF_FLAGS_REFTAG_CHECK, 532 0, 0, 0, 0, 0, &dif_opts); 533 if (rc != 0) { 534 SPDK_ERRLOG("Initialization of DIF context failed\n"); 535 return rc; 536 } 537 538 spdk_dif_ctx_set_remapped_init_ref_tag(&dif_ctx, remapped_offset); 539 540 rc = spdk_dix_remap_ref_tag(&md_iov, num_blocks, &dif_ctx, &err_blk, false); 541 if (rc != 0) { 542 SPDK_ERRLOG("Remapping reference tag failed. type=%d, offset=%d" 543 PRIu32 "\n", err_blk.err_type, err_blk.err_offset); 544 } 545 546 return rc; 547 } 548 549 int 550 raid_bdev_verify_dix_reftag(struct iovec *iovs, int iovcnt, void *md_buf, 551 uint64_t num_blocks, struct spdk_bdev *bdev, uint32_t offset_blocks) 552 { 553 struct spdk_dif_ctx dif_ctx; 554 struct spdk_dif_error err_blk = {}; 555 int rc; 556 struct spdk_dif_ctx_init_ext_opts dif_opts; 557 struct iovec md_iov = { 558 .iov_base = md_buf, 559 .iov_len = num_blocks * bdev->md_len, 560 }; 561 562 if (md_buf == NULL) { 563 return 0; 564 } 565 566 dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format); 567 dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16; 568 rc = spdk_dif_ctx_init(&dif_ctx, 569 bdev->blocklen, bdev->md_len, bdev->md_interleave, 570 bdev->dif_is_head_of_md, bdev->dif_type, 571 SPDK_DIF_FLAGS_REFTAG_CHECK, 572 offset_blocks, 0, 0, 0, 0, &dif_opts); 573 if (rc != 0) { 574 SPDK_ERRLOG("Initialization of DIF context failed\n"); 575 return rc; 576 } 577 578 rc = spdk_dix_verify(iovs, iovcnt, &md_iov, num_blocks, &dif_ctx, &err_blk); 579 if (rc != 0) { 580 SPDK_ERRLOG("Reference tag check failed. type=%d, offset=%d" 581 PRIu32 "\n", err_blk.err_type, err_blk.err_offset); 582 } 583 584 return rc; 585 } 586 587 void 588 raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status) 589 { 590 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 591 int rc; 592 593 if (raid_io->split.offset != RAID_OFFSET_BLOCKS_INVALID) { 594 struct iovec *split_iov = raid_io->split.iov; 595 const struct iovec *split_iov_orig = &raid_io->split.iov_copy; 596 597 /* 598 * Non-zero offset here means that this is the completion of the first part of the 599 * split I/O (the higher LBAs). Then, we submit the second part and set offset to 0. 600 */ 601 if (raid_io->split.offset != 0) { 602 raid_io->offset_blocks = bdev_io->u.bdev.offset_blocks; 603 raid_io->md_buf = bdev_io->u.bdev.md_buf; 604 605 if (status == SPDK_BDEV_IO_STATUS_SUCCESS) { 606 raid_io->num_blocks = raid_io->split.offset; 607 raid_io->iovcnt = raid_io->iovs - bdev_io->u.bdev.iovs; 608 raid_io->iovs = bdev_io->u.bdev.iovs; 609 if (split_iov != NULL) { 610 raid_io->iovcnt++; 611 split_iov->iov_len = split_iov->iov_base - split_iov_orig->iov_base; 612 split_iov->iov_base = split_iov_orig->iov_base; 613 } 614 615 raid_io->split.offset = 0; 616 raid_io->base_bdev_io_submitted = 0; 617 raid_io->raid_ch = raid_io->raid_ch->process.ch_processed; 618 619 raid_io->raid_bdev->module->submit_rw_request(raid_io); 620 return; 621 } 622 } 623 624 raid_io->num_blocks = bdev_io->u.bdev.num_blocks; 625 raid_io->iovcnt = bdev_io->u.bdev.iovcnt; 626 raid_io->iovs = bdev_io->u.bdev.iovs; 627 if (split_iov != NULL) { 628 *split_iov = *split_iov_orig; 629 } 630 } 631 632 if (spdk_unlikely(raid_io->completion_cb != NULL)) { 633 raid_io->completion_cb(raid_io, status); 634 } else { 635 if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_READ && 636 spdk_bdev_get_dif_type(bdev_io->bdev) != SPDK_DIF_DISABLE && 637 bdev_io->bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK && 638 status == SPDK_BDEV_IO_STATUS_SUCCESS)) { 639 640 rc = raid_bdev_remap_dix_reftag(bdev_io->u.bdev.md_buf, 641 bdev_io->u.bdev.num_blocks, bdev_io->bdev, 642 bdev_io->u.bdev.offset_blocks); 643 if (rc != 0) { 644 status = SPDK_BDEV_IO_STATUS_FAILED; 645 } 646 } 647 spdk_bdev_io_complete(bdev_io, status); 648 } 649 } 650 651 /* 652 * brief: 653 * raid_bdev_io_complete_part - signal the completion of a part of the expected 654 * base bdev IOs and complete the raid_io if this is the final expected IO. 655 * The caller should first set raid_io->base_bdev_io_remaining. This function 656 * will decrement this counter by the value of the 'completed' parameter and 657 * complete the raid_io if the counter reaches 0. The caller is free to 658 * interpret the 'base_bdev_io_remaining' and 'completed' values as needed, 659 * it can represent e.g. blocks or IOs. 660 * params: 661 * raid_io - pointer to raid_bdev_io 662 * completed - the part of the raid_io that has been completed 663 * status - status of the base IO 664 * returns: 665 * true - if the raid_io is completed 666 * false - otherwise 667 */ 668 bool 669 raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed, 670 enum spdk_bdev_io_status status) 671 { 672 assert(raid_io->base_bdev_io_remaining >= completed); 673 raid_io->base_bdev_io_remaining -= completed; 674 675 if (status != raid_io->base_bdev_io_status_default) { 676 raid_io->base_bdev_io_status = status; 677 } 678 679 if (raid_io->base_bdev_io_remaining == 0) { 680 raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); 681 return true; 682 } else { 683 return false; 684 } 685 } 686 687 /* 688 * brief: 689 * raid_bdev_queue_io_wait function processes the IO which failed to submit. 690 * It will try to queue the IOs after storing the context to bdev wait queue logic. 691 * params: 692 * raid_io - pointer to raid_bdev_io 693 * bdev - the block device that the IO is submitted to 694 * ch - io channel 695 * cb_fn - callback when the spdk_bdev_io for bdev becomes available 696 * returns: 697 * none 698 */ 699 void 700 raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, 701 struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn) 702 { 703 raid_io->waitq_entry.bdev = bdev; 704 raid_io->waitq_entry.cb_fn = cb_fn; 705 raid_io->waitq_entry.cb_arg = raid_io; 706 spdk_bdev_queue_io_wait(bdev, ch, &raid_io->waitq_entry); 707 } 708 709 static void 710 raid_base_bdev_reset_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 711 { 712 struct raid_bdev_io *raid_io = cb_arg; 713 714 spdk_bdev_free_io(bdev_io); 715 716 raid_bdev_io_complete_part(raid_io, 1, success ? 717 SPDK_BDEV_IO_STATUS_SUCCESS : 718 SPDK_BDEV_IO_STATUS_FAILED); 719 } 720 721 static void raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io); 722 723 static void 724 _raid_bdev_submit_reset_request(void *_raid_io) 725 { 726 struct raid_bdev_io *raid_io = _raid_io; 727 728 raid_bdev_submit_reset_request(raid_io); 729 } 730 731 /* 732 * brief: 733 * raid_bdev_submit_reset_request function submits reset requests 734 * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in 735 * which case it will queue it for later submission 736 * params: 737 * raid_io 738 * returns: 739 * none 740 */ 741 static void 742 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io) 743 { 744 struct raid_bdev *raid_bdev; 745 int ret; 746 uint8_t i; 747 struct raid_base_bdev_info *base_info; 748 struct spdk_io_channel *base_ch; 749 750 raid_bdev = raid_io->raid_bdev; 751 752 if (raid_io->base_bdev_io_remaining == 0) { 753 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; 754 } 755 756 for (i = raid_io->base_bdev_io_submitted; i < raid_bdev->num_base_bdevs; i++) { 757 base_info = &raid_bdev->base_bdev_info[i]; 758 base_ch = raid_io->raid_ch->base_channel[i]; 759 if (base_ch == NULL) { 760 raid_io->base_bdev_io_submitted++; 761 raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS); 762 continue; 763 } 764 ret = spdk_bdev_reset(base_info->desc, base_ch, 765 raid_base_bdev_reset_complete, raid_io); 766 if (ret == 0) { 767 raid_io->base_bdev_io_submitted++; 768 } else if (ret == -ENOMEM) { 769 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), 770 base_ch, _raid_bdev_submit_reset_request); 771 return; 772 } else { 773 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 774 assert(false); 775 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 776 return; 777 } 778 } 779 } 780 781 static void 782 raid_bdev_io_split(struct raid_bdev_io *raid_io, uint64_t split_offset) 783 { 784 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 785 size_t iov_offset = split_offset * raid_bdev->bdev.blocklen; 786 int i; 787 788 assert(split_offset != 0); 789 assert(raid_io->split.offset == RAID_OFFSET_BLOCKS_INVALID); 790 raid_io->split.offset = split_offset; 791 792 raid_io->offset_blocks += split_offset; 793 raid_io->num_blocks -= split_offset; 794 if (raid_io->md_buf != NULL) { 795 raid_io->md_buf += (split_offset * raid_bdev->bdev.md_len); 796 } 797 798 for (i = 0; i < raid_io->iovcnt; i++) { 799 struct iovec *iov = &raid_io->iovs[i]; 800 801 if (iov_offset < iov->iov_len) { 802 if (iov_offset == 0) { 803 raid_io->split.iov = NULL; 804 } else { 805 raid_io->split.iov = iov; 806 raid_io->split.iov_copy = *iov; 807 iov->iov_base += iov_offset; 808 iov->iov_len -= iov_offset; 809 } 810 raid_io->iovs += i; 811 raid_io->iovcnt -= i; 812 break; 813 } 814 815 iov_offset -= iov->iov_len; 816 } 817 } 818 819 static void 820 raid_bdev_submit_rw_request(struct raid_bdev_io *raid_io) 821 { 822 struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; 823 824 if (raid_ch->process.offset != RAID_OFFSET_BLOCKS_INVALID) { 825 uint64_t offset_begin = raid_io->offset_blocks; 826 uint64_t offset_end = offset_begin + raid_io->num_blocks; 827 828 if (offset_end > raid_ch->process.offset) { 829 if (offset_begin < raid_ch->process.offset) { 830 /* 831 * If the I/O spans both the processed and unprocessed ranges, 832 * split it and first handle the unprocessed part. After it 833 * completes, the rest will be handled. 834 * This situation occurs when the process thread is not active 835 * or is waiting for the process window range to be locked 836 * (quiesced). When a window is being processed, such I/Os will be 837 * deferred by the bdev layer until the window is unlocked. 838 */ 839 SPDK_DEBUGLOG(bdev_raid, "split: process_offset: %lu offset_begin: %lu offset_end: %lu\n", 840 raid_ch->process.offset, offset_begin, offset_end); 841 raid_bdev_io_split(raid_io, raid_ch->process.offset - offset_begin); 842 } 843 } else { 844 /* Use the child channel, which corresponds to the already processed range */ 845 raid_io->raid_ch = raid_ch->process.ch_processed; 846 } 847 } 848 849 raid_io->raid_bdev->module->submit_rw_request(raid_io); 850 } 851 852 /* 853 * brief: 854 * Callback function to spdk_bdev_io_get_buf. 855 * params: 856 * ch - pointer to raid bdev io channel 857 * bdev_io - pointer to parent bdev_io on raid bdev device 858 * success - True if buffer is allocated or false otherwise. 859 * returns: 860 * none 861 */ 862 static void 863 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 864 bool success) 865 { 866 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 867 868 if (!success) { 869 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 870 return; 871 } 872 873 raid_bdev_submit_rw_request(raid_io); 874 } 875 876 void 877 raid_bdev_io_init(struct raid_bdev_io *raid_io, struct raid_bdev_io_channel *raid_ch, 878 enum spdk_bdev_io_type type, uint64_t offset_blocks, 879 uint64_t num_blocks, struct iovec *iovs, int iovcnt, void *md_buf, 880 struct spdk_memory_domain *memory_domain, void *memory_domain_ctx) 881 { 882 struct spdk_io_channel *ch = spdk_io_channel_from_ctx(raid_ch); 883 struct raid_bdev *raid_bdev = spdk_io_channel_get_io_device(ch); 884 885 raid_io->type = type; 886 raid_io->offset_blocks = offset_blocks; 887 raid_io->num_blocks = num_blocks; 888 raid_io->iovs = iovs; 889 raid_io->iovcnt = iovcnt; 890 raid_io->memory_domain = memory_domain; 891 raid_io->memory_domain_ctx = memory_domain_ctx; 892 raid_io->md_buf = md_buf; 893 894 raid_io->raid_bdev = raid_bdev; 895 raid_io->raid_ch = raid_ch; 896 raid_io->base_bdev_io_remaining = 0; 897 raid_io->base_bdev_io_submitted = 0; 898 raid_io->completion_cb = NULL; 899 raid_io->split.offset = RAID_OFFSET_BLOCKS_INVALID; 900 901 raid_bdev_io_set_default_status(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS); 902 } 903 904 /* 905 * brief: 906 * raid_bdev_submit_request function is the submit_request function pointer of 907 * raid bdev function table. This is used to submit the io on raid_bdev to below 908 * layers. 909 * params: 910 * ch - pointer to raid bdev io channel 911 * bdev_io - pointer to parent bdev_io on raid bdev device 912 * returns: 913 * none 914 */ 915 static void 916 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 917 { 918 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 919 920 raid_bdev_io_init(raid_io, spdk_io_channel_get_ctx(ch), bdev_io->type, 921 bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks, 922 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.md_buf, 923 bdev_io->u.bdev.memory_domain, bdev_io->u.bdev.memory_domain_ctx); 924 925 switch (bdev_io->type) { 926 case SPDK_BDEV_IO_TYPE_READ: 927 spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb, 928 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 929 break; 930 case SPDK_BDEV_IO_TYPE_WRITE: 931 raid_bdev_submit_rw_request(raid_io); 932 break; 933 934 case SPDK_BDEV_IO_TYPE_RESET: 935 raid_bdev_submit_reset_request(raid_io); 936 break; 937 938 case SPDK_BDEV_IO_TYPE_FLUSH: 939 case SPDK_BDEV_IO_TYPE_UNMAP: 940 if (raid_io->raid_bdev->process != NULL) { 941 /* TODO: rebuild support */ 942 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 943 return; 944 } 945 raid_io->raid_bdev->module->submit_null_payload_request(raid_io); 946 break; 947 948 default: 949 SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type); 950 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 951 break; 952 } 953 } 954 955 /* 956 * brief: 957 * _raid_bdev_io_type_supported checks whether io_type is supported in 958 * all base bdev modules of raid bdev module. If anyone among the base_bdevs 959 * doesn't support, the raid device doesn't supports. 960 * 961 * params: 962 * raid_bdev - pointer to raid bdev context 963 * io_type - io type 964 * returns: 965 * true - io_type is supported 966 * false - io_type is not supported 967 */ 968 inline static bool 969 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type) 970 { 971 struct raid_base_bdev_info *base_info; 972 973 if (io_type == SPDK_BDEV_IO_TYPE_FLUSH || 974 io_type == SPDK_BDEV_IO_TYPE_UNMAP) { 975 if (raid_bdev->module->submit_null_payload_request == NULL) { 976 return false; 977 } 978 } 979 980 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 981 if (base_info->desc == NULL) { 982 continue; 983 } 984 985 if (spdk_bdev_io_type_supported(spdk_bdev_desc_get_bdev(base_info->desc), io_type) == false) { 986 return false; 987 } 988 } 989 990 return true; 991 } 992 993 /* 994 * brief: 995 * raid_bdev_io_type_supported is the io_supported function for bdev function 996 * table which returns whether the particular io type is supported or not by 997 * raid bdev module 998 * params: 999 * ctx - pointer to raid bdev context 1000 * type - io type 1001 * returns: 1002 * true - io_type is supported 1003 * false - io_type is not supported 1004 */ 1005 static bool 1006 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 1007 { 1008 switch (io_type) { 1009 case SPDK_BDEV_IO_TYPE_READ: 1010 case SPDK_BDEV_IO_TYPE_WRITE: 1011 return true; 1012 1013 case SPDK_BDEV_IO_TYPE_FLUSH: 1014 case SPDK_BDEV_IO_TYPE_RESET: 1015 case SPDK_BDEV_IO_TYPE_UNMAP: 1016 return _raid_bdev_io_type_supported(ctx, io_type); 1017 1018 default: 1019 return false; 1020 } 1021 1022 return false; 1023 } 1024 1025 /* 1026 * brief: 1027 * raid_bdev_get_io_channel is the get_io_channel function table pointer for 1028 * raid bdev. This is used to return the io channel for this raid bdev 1029 * params: 1030 * ctxt - pointer to raid_bdev 1031 * returns: 1032 * pointer to io channel for raid bdev 1033 */ 1034 static struct spdk_io_channel * 1035 raid_bdev_get_io_channel(void *ctxt) 1036 { 1037 struct raid_bdev *raid_bdev = ctxt; 1038 1039 return spdk_get_io_channel(raid_bdev); 1040 } 1041 1042 void 1043 raid_bdev_write_info_json(struct raid_bdev *raid_bdev, struct spdk_json_write_ctx *w) 1044 { 1045 struct raid_base_bdev_info *base_info; 1046 1047 assert(raid_bdev != NULL); 1048 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1049 1050 spdk_json_write_named_uuid(w, "uuid", &raid_bdev->bdev.uuid); 1051 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 1052 spdk_json_write_named_string(w, "state", raid_bdev_state_to_str(raid_bdev->state)); 1053 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 1054 spdk_json_write_named_bool(w, "superblock", raid_bdev->superblock_enabled); 1055 spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs); 1056 spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered); 1057 spdk_json_write_named_uint32(w, "num_base_bdevs_operational", 1058 raid_bdev->num_base_bdevs_operational); 1059 if (raid_bdev->process) { 1060 struct raid_bdev_process *process = raid_bdev->process; 1061 uint64_t offset = process->window_offset; 1062 1063 spdk_json_write_named_object_begin(w, "process"); 1064 spdk_json_write_name(w, "type"); 1065 spdk_json_write_string(w, raid_bdev_process_to_str(process->type)); 1066 spdk_json_write_named_string(w, "target", process->target->name); 1067 spdk_json_write_named_object_begin(w, "progress"); 1068 spdk_json_write_named_uint64(w, "blocks", offset); 1069 spdk_json_write_named_uint32(w, "percent", offset * 100.0 / raid_bdev->bdev.blockcnt); 1070 spdk_json_write_object_end(w); 1071 spdk_json_write_object_end(w); 1072 } 1073 spdk_json_write_name(w, "base_bdevs_list"); 1074 spdk_json_write_array_begin(w); 1075 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1076 spdk_json_write_object_begin(w); 1077 spdk_json_write_name(w, "name"); 1078 if (base_info->name) { 1079 spdk_json_write_string(w, base_info->name); 1080 } else { 1081 spdk_json_write_null(w); 1082 } 1083 spdk_json_write_named_uuid(w, "uuid", &base_info->uuid); 1084 spdk_json_write_named_bool(w, "is_configured", base_info->is_configured); 1085 spdk_json_write_named_uint64(w, "data_offset", base_info->data_offset); 1086 spdk_json_write_named_uint64(w, "data_size", base_info->data_size); 1087 spdk_json_write_object_end(w); 1088 } 1089 spdk_json_write_array_end(w); 1090 } 1091 1092 /* 1093 * brief: 1094 * raid_bdev_dump_info_json is the function table pointer for raid bdev 1095 * params: 1096 * ctx - pointer to raid_bdev 1097 * w - pointer to json context 1098 * returns: 1099 * 0 - success 1100 * non zero - failure 1101 */ 1102 static int 1103 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 1104 { 1105 struct raid_bdev *raid_bdev = ctx; 1106 1107 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_dump_config_json\n"); 1108 1109 /* Dump the raid bdev configuration related information */ 1110 spdk_json_write_named_object_begin(w, "raid"); 1111 raid_bdev_write_info_json(raid_bdev, w); 1112 spdk_json_write_object_end(w); 1113 1114 return 0; 1115 } 1116 1117 /* 1118 * brief: 1119 * raid_bdev_write_config_json is the function table pointer for raid bdev 1120 * params: 1121 * bdev - pointer to spdk_bdev 1122 * w - pointer to json context 1123 * returns: 1124 * none 1125 */ 1126 static void 1127 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 1128 { 1129 struct raid_bdev *raid_bdev = bdev->ctxt; 1130 struct raid_base_bdev_info *base_info; 1131 1132 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1133 1134 if (raid_bdev->superblock_enabled) { 1135 /* raid bdev configuration is stored in the superblock */ 1136 return; 1137 } 1138 1139 spdk_json_write_object_begin(w); 1140 1141 spdk_json_write_named_string(w, "method", "bdev_raid_create"); 1142 1143 spdk_json_write_named_object_begin(w, "params"); 1144 spdk_json_write_named_string(w, "name", bdev->name); 1145 spdk_json_write_named_uuid(w, "uuid", &raid_bdev->bdev.uuid); 1146 if (raid_bdev->strip_size_kb != 0) { 1147 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 1148 } 1149 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 1150 1151 spdk_json_write_named_array_begin(w, "base_bdevs"); 1152 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1153 if (base_info->name) { 1154 spdk_json_write_string(w, base_info->name); 1155 } else { 1156 char str[32]; 1157 1158 snprintf(str, sizeof(str), "removed_base_bdev_%u", raid_bdev_base_bdev_slot(base_info)); 1159 spdk_json_write_string(w, str); 1160 } 1161 } 1162 spdk_json_write_array_end(w); 1163 spdk_json_write_object_end(w); 1164 1165 spdk_json_write_object_end(w); 1166 } 1167 1168 static int 1169 raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size) 1170 { 1171 struct raid_bdev *raid_bdev = ctx; 1172 struct raid_base_bdev_info *base_info; 1173 int domains_count = 0, rc = 0; 1174 1175 if (raid_bdev->module->memory_domains_supported == false) { 1176 return 0; 1177 } 1178 1179 /* First loop to get the number of memory domains */ 1180 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1181 if (base_info->is_configured == false) { 1182 continue; 1183 } 1184 rc = spdk_bdev_get_memory_domains(spdk_bdev_desc_get_bdev(base_info->desc), NULL, 0); 1185 if (rc < 0) { 1186 return rc; 1187 } 1188 domains_count += rc; 1189 } 1190 1191 if (!domains || array_size < domains_count) { 1192 return domains_count; 1193 } 1194 1195 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1196 if (base_info->is_configured == false) { 1197 continue; 1198 } 1199 rc = spdk_bdev_get_memory_domains(spdk_bdev_desc_get_bdev(base_info->desc), domains, array_size); 1200 if (rc < 0) { 1201 return rc; 1202 } 1203 domains += rc; 1204 array_size -= rc; 1205 } 1206 1207 return domains_count; 1208 } 1209 1210 /* g_raid_bdev_fn_table is the function table for raid bdev */ 1211 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = { 1212 .destruct = raid_bdev_destruct, 1213 .submit_request = raid_bdev_submit_request, 1214 .io_type_supported = raid_bdev_io_type_supported, 1215 .get_io_channel = raid_bdev_get_io_channel, 1216 .dump_info_json = raid_bdev_dump_info_json, 1217 .write_config_json = raid_bdev_write_config_json, 1218 .get_memory_domains = raid_bdev_get_memory_domains, 1219 }; 1220 1221 struct raid_bdev * 1222 raid_bdev_find_by_name(const char *name) 1223 { 1224 struct raid_bdev *raid_bdev; 1225 1226 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1227 if (strcmp(raid_bdev->bdev.name, name) == 0) { 1228 return raid_bdev; 1229 } 1230 } 1231 1232 return NULL; 1233 } 1234 1235 static struct raid_bdev * 1236 raid_bdev_find_by_uuid(const struct spdk_uuid *uuid) 1237 { 1238 struct raid_bdev *raid_bdev; 1239 1240 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1241 if (spdk_uuid_compare(&raid_bdev->bdev.uuid, uuid) == 0) { 1242 return raid_bdev; 1243 } 1244 } 1245 1246 return NULL; 1247 } 1248 1249 static struct { 1250 const char *name; 1251 enum raid_level value; 1252 } g_raid_level_names[] = { 1253 { "raid0", RAID0 }, 1254 { "0", RAID0 }, 1255 { "raid1", RAID1 }, 1256 { "1", RAID1 }, 1257 { "raid5f", RAID5F }, 1258 { "5f", RAID5F }, 1259 { "concat", CONCAT }, 1260 { } 1261 }; 1262 1263 const char *g_raid_state_names[] = { 1264 [RAID_BDEV_STATE_ONLINE] = "online", 1265 [RAID_BDEV_STATE_CONFIGURING] = "configuring", 1266 [RAID_BDEV_STATE_OFFLINE] = "offline", 1267 [RAID_BDEV_STATE_MAX] = NULL 1268 }; 1269 1270 static const char *g_raid_process_type_names[] = { 1271 [RAID_PROCESS_NONE] = "none", 1272 [RAID_PROCESS_REBUILD] = "rebuild", 1273 [RAID_PROCESS_MAX] = NULL 1274 }; 1275 1276 /* We have to use the typedef in the function declaration to appease astyle. */ 1277 typedef enum raid_level raid_level_t; 1278 typedef enum raid_bdev_state raid_bdev_state_t; 1279 1280 raid_level_t 1281 raid_bdev_str_to_level(const char *str) 1282 { 1283 unsigned int i; 1284 1285 assert(str != NULL); 1286 1287 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 1288 if (strcasecmp(g_raid_level_names[i].name, str) == 0) { 1289 return g_raid_level_names[i].value; 1290 } 1291 } 1292 1293 return INVALID_RAID_LEVEL; 1294 } 1295 1296 const char * 1297 raid_bdev_level_to_str(enum raid_level level) 1298 { 1299 unsigned int i; 1300 1301 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 1302 if (g_raid_level_names[i].value == level) { 1303 return g_raid_level_names[i].name; 1304 } 1305 } 1306 1307 return ""; 1308 } 1309 1310 raid_bdev_state_t 1311 raid_bdev_str_to_state(const char *str) 1312 { 1313 unsigned int i; 1314 1315 assert(str != NULL); 1316 1317 for (i = 0; i < RAID_BDEV_STATE_MAX; i++) { 1318 if (strcasecmp(g_raid_state_names[i], str) == 0) { 1319 break; 1320 } 1321 } 1322 1323 return i; 1324 } 1325 1326 const char * 1327 raid_bdev_state_to_str(enum raid_bdev_state state) 1328 { 1329 if (state >= RAID_BDEV_STATE_MAX) { 1330 return ""; 1331 } 1332 1333 return g_raid_state_names[state]; 1334 } 1335 1336 const char * 1337 raid_bdev_process_to_str(enum raid_process_type value) 1338 { 1339 if (value >= RAID_PROCESS_MAX) { 1340 return ""; 1341 } 1342 1343 return g_raid_process_type_names[value]; 1344 } 1345 1346 /* 1347 * brief: 1348 * raid_bdev_fini_start is called when bdev layer is starting the 1349 * shutdown process 1350 * params: 1351 * none 1352 * returns: 1353 * none 1354 */ 1355 static void 1356 raid_bdev_fini_start(void) 1357 { 1358 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_fini_start\n"); 1359 g_shutdown_started = true; 1360 } 1361 1362 /* 1363 * brief: 1364 * raid_bdev_exit is called on raid bdev module exit time by bdev layer 1365 * params: 1366 * none 1367 * returns: 1368 * none 1369 */ 1370 static void 1371 raid_bdev_exit(void) 1372 { 1373 struct raid_bdev *raid_bdev, *tmp; 1374 1375 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_exit\n"); 1376 1377 TAILQ_FOREACH_SAFE(raid_bdev, &g_raid_bdev_list, global_link, tmp) { 1378 raid_bdev_cleanup_and_free(raid_bdev); 1379 } 1380 } 1381 1382 static void 1383 raid_bdev_opts_config_json(struct spdk_json_write_ctx *w) 1384 { 1385 spdk_json_write_object_begin(w); 1386 1387 spdk_json_write_named_string(w, "method", "bdev_raid_set_options"); 1388 1389 spdk_json_write_named_object_begin(w, "params"); 1390 spdk_json_write_named_uint32(w, "process_window_size_kb", g_opts.process_window_size_kb); 1391 spdk_json_write_object_end(w); 1392 1393 spdk_json_write_object_end(w); 1394 } 1395 1396 static int 1397 raid_bdev_config_json(struct spdk_json_write_ctx *w) 1398 { 1399 raid_bdev_opts_config_json(w); 1400 1401 return 0; 1402 } 1403 1404 /* 1405 * brief: 1406 * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid 1407 * module 1408 * params: 1409 * none 1410 * returns: 1411 * size of spdk_bdev_io context for raid 1412 */ 1413 static int 1414 raid_bdev_get_ctx_size(void) 1415 { 1416 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_get_ctx_size\n"); 1417 return sizeof(struct raid_bdev_io); 1418 } 1419 1420 static struct spdk_bdev_module g_raid_if = { 1421 .name = "raid", 1422 .module_init = raid_bdev_init, 1423 .fini_start = raid_bdev_fini_start, 1424 .module_fini = raid_bdev_exit, 1425 .config_json = raid_bdev_config_json, 1426 .get_ctx_size = raid_bdev_get_ctx_size, 1427 .examine_disk = raid_bdev_examine, 1428 .async_init = false, 1429 .async_fini = false, 1430 }; 1431 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if) 1432 1433 /* 1434 * brief: 1435 * raid_bdev_init is the initialization function for raid bdev module 1436 * params: 1437 * none 1438 * returns: 1439 * 0 - success 1440 * non zero - failure 1441 */ 1442 static int 1443 raid_bdev_init(void) 1444 { 1445 return 0; 1446 } 1447 1448 static int 1449 _raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, 1450 enum raid_level level, bool superblock_enabled, const struct spdk_uuid *uuid, 1451 struct raid_bdev **raid_bdev_out) 1452 { 1453 struct raid_bdev *raid_bdev; 1454 struct spdk_bdev *raid_bdev_gen; 1455 struct raid_bdev_module *module; 1456 struct raid_base_bdev_info *base_info; 1457 uint8_t min_operational; 1458 1459 if (strnlen(name, RAID_BDEV_SB_NAME_SIZE) == RAID_BDEV_SB_NAME_SIZE) { 1460 SPDK_ERRLOG("Raid bdev name '%s' exceeds %d characters\n", name, RAID_BDEV_SB_NAME_SIZE - 1); 1461 return -EINVAL; 1462 } 1463 1464 if (raid_bdev_find_by_name(name) != NULL) { 1465 SPDK_ERRLOG("Duplicate raid bdev name found: %s\n", name); 1466 return -EEXIST; 1467 } 1468 1469 if (level == RAID1) { 1470 if (strip_size != 0) { 1471 SPDK_ERRLOG("Strip size is not supported by raid1\n"); 1472 return -EINVAL; 1473 } 1474 } else if (spdk_u32_is_pow2(strip_size) == false) { 1475 SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size); 1476 return -EINVAL; 1477 } 1478 1479 module = raid_bdev_module_find(level); 1480 if (module == NULL) { 1481 SPDK_ERRLOG("Unsupported raid level '%d'\n", level); 1482 return -EINVAL; 1483 } 1484 1485 assert(module->base_bdevs_min != 0); 1486 if (num_base_bdevs < module->base_bdevs_min) { 1487 SPDK_ERRLOG("At least %u base devices required for %s\n", 1488 module->base_bdevs_min, 1489 raid_bdev_level_to_str(level)); 1490 return -EINVAL; 1491 } 1492 1493 switch (module->base_bdevs_constraint.type) { 1494 case CONSTRAINT_MAX_BASE_BDEVS_REMOVED: 1495 min_operational = num_base_bdevs - module->base_bdevs_constraint.value; 1496 break; 1497 case CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL: 1498 min_operational = module->base_bdevs_constraint.value; 1499 break; 1500 case CONSTRAINT_UNSET: 1501 if (module->base_bdevs_constraint.value != 0) { 1502 SPDK_ERRLOG("Unexpected constraint value '%u' provided for raid bdev '%s'.\n", 1503 (uint8_t)module->base_bdevs_constraint.value, name); 1504 return -EINVAL; 1505 } 1506 min_operational = num_base_bdevs; 1507 break; 1508 default: 1509 SPDK_ERRLOG("Unrecognised constraint type '%u' in module for raid level '%s'.\n", 1510 (uint8_t)module->base_bdevs_constraint.type, 1511 raid_bdev_level_to_str(module->level)); 1512 return -EINVAL; 1513 }; 1514 1515 if (min_operational == 0 || min_operational > num_base_bdevs) { 1516 SPDK_ERRLOG("Wrong constraint value for raid level '%s'.\n", 1517 raid_bdev_level_to_str(module->level)); 1518 return -EINVAL; 1519 } 1520 1521 raid_bdev = calloc(1, sizeof(*raid_bdev)); 1522 if (!raid_bdev) { 1523 SPDK_ERRLOG("Unable to allocate memory for raid bdev\n"); 1524 return -ENOMEM; 1525 } 1526 1527 raid_bdev->module = module; 1528 raid_bdev->num_base_bdevs = num_base_bdevs; 1529 raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, 1530 sizeof(struct raid_base_bdev_info)); 1531 if (!raid_bdev->base_bdev_info) { 1532 SPDK_ERRLOG("Unable able to allocate base bdev info\n"); 1533 raid_bdev_free(raid_bdev); 1534 return -ENOMEM; 1535 } 1536 1537 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1538 base_info->raid_bdev = raid_bdev; 1539 } 1540 1541 /* strip_size_kb is from the rpc param. strip_size is in blocks and used 1542 * internally and set later. 1543 */ 1544 raid_bdev->strip_size = 0; 1545 raid_bdev->strip_size_kb = strip_size; 1546 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1547 raid_bdev->level = level; 1548 raid_bdev->min_base_bdevs_operational = min_operational; 1549 raid_bdev->superblock_enabled = superblock_enabled; 1550 1551 raid_bdev_gen = &raid_bdev->bdev; 1552 1553 raid_bdev_gen->name = strdup(name); 1554 if (!raid_bdev_gen->name) { 1555 SPDK_ERRLOG("Unable to allocate name for raid\n"); 1556 raid_bdev_free(raid_bdev); 1557 return -ENOMEM; 1558 } 1559 1560 raid_bdev_gen->product_name = "Raid Volume"; 1561 raid_bdev_gen->ctxt = raid_bdev; 1562 raid_bdev_gen->fn_table = &g_raid_bdev_fn_table; 1563 raid_bdev_gen->module = &g_raid_if; 1564 raid_bdev_gen->write_cache = 0; 1565 spdk_uuid_copy(&raid_bdev_gen->uuid, uuid); 1566 1567 TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link); 1568 1569 *raid_bdev_out = raid_bdev; 1570 1571 return 0; 1572 } 1573 1574 /* 1575 * brief: 1576 * raid_bdev_create allocates raid bdev based on passed configuration 1577 * params: 1578 * name - name for raid bdev 1579 * strip_size - strip size in KB 1580 * num_base_bdevs - number of base bdevs 1581 * level - raid level 1582 * superblock_enabled - true if raid should have superblock 1583 * uuid - uuid to set for the bdev 1584 * raid_bdev_out - the created raid bdev 1585 * returns: 1586 * 0 - success 1587 * non zero - failure 1588 */ 1589 int 1590 raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, 1591 enum raid_level level, bool superblock_enabled, const struct spdk_uuid *uuid, 1592 struct raid_bdev **raid_bdev_out) 1593 { 1594 struct raid_bdev *raid_bdev; 1595 int rc; 1596 1597 assert(uuid != NULL); 1598 1599 rc = _raid_bdev_create(name, strip_size, num_base_bdevs, level, superblock_enabled, uuid, 1600 &raid_bdev); 1601 if (rc != 0) { 1602 return rc; 1603 } 1604 1605 if (superblock_enabled && spdk_uuid_is_null(uuid)) { 1606 /* we need to have the uuid to store in the superblock before the bdev is registered */ 1607 spdk_uuid_generate(&raid_bdev->bdev.uuid); 1608 } 1609 1610 raid_bdev->num_base_bdevs_operational = num_base_bdevs; 1611 1612 *raid_bdev_out = raid_bdev; 1613 1614 return 0; 1615 } 1616 1617 static void 1618 _raid_bdev_unregistering_cont(void *ctx) 1619 { 1620 struct raid_bdev *raid_bdev = ctx; 1621 1622 spdk_bdev_close(raid_bdev->self_desc); 1623 raid_bdev->self_desc = NULL; 1624 } 1625 1626 static void 1627 raid_bdev_unregistering_cont(void *ctx) 1628 { 1629 spdk_thread_exec_msg(spdk_thread_get_app_thread(), _raid_bdev_unregistering_cont, ctx); 1630 } 1631 1632 static int 1633 raid_bdev_process_add_finish_action(struct raid_bdev_process *process, spdk_msg_fn cb, void *cb_ctx) 1634 { 1635 struct raid_process_finish_action *finish_action; 1636 1637 assert(spdk_get_thread() == process->thread); 1638 assert(process->state < RAID_PROCESS_STATE_STOPPED); 1639 1640 finish_action = calloc(1, sizeof(*finish_action)); 1641 if (finish_action == NULL) { 1642 return -ENOMEM; 1643 } 1644 1645 finish_action->cb = cb; 1646 finish_action->cb_ctx = cb_ctx; 1647 1648 TAILQ_INSERT_TAIL(&process->finish_actions, finish_action, link); 1649 1650 return 0; 1651 } 1652 1653 static void 1654 raid_bdev_unregistering_stop_process(void *ctx) 1655 { 1656 struct raid_bdev_process *process = ctx; 1657 struct raid_bdev *raid_bdev = process->raid_bdev; 1658 int rc; 1659 1660 process->state = RAID_PROCESS_STATE_STOPPING; 1661 if (process->status == 0) { 1662 process->status = -ECANCELED; 1663 } 1664 1665 rc = raid_bdev_process_add_finish_action(process, raid_bdev_unregistering_cont, raid_bdev); 1666 if (rc != 0) { 1667 SPDK_ERRLOG("Failed to add raid bdev '%s' process finish action: %s\n", 1668 raid_bdev->bdev.name, spdk_strerror(-rc)); 1669 } 1670 } 1671 1672 static void 1673 raid_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) 1674 { 1675 struct raid_bdev *raid_bdev = event_ctx; 1676 1677 if (type == SPDK_BDEV_EVENT_REMOVE) { 1678 if (raid_bdev->process != NULL) { 1679 spdk_thread_send_msg(raid_bdev->process->thread, raid_bdev_unregistering_stop_process, 1680 raid_bdev->process); 1681 } else { 1682 raid_bdev_unregistering_cont(raid_bdev); 1683 } 1684 } 1685 } 1686 1687 static void 1688 raid_bdev_configure_cont(struct raid_bdev *raid_bdev) 1689 { 1690 struct spdk_bdev *raid_bdev_gen = &raid_bdev->bdev; 1691 int rc; 1692 1693 raid_bdev->state = RAID_BDEV_STATE_ONLINE; 1694 SPDK_DEBUGLOG(bdev_raid, "io device register %p\n", raid_bdev); 1695 SPDK_DEBUGLOG(bdev_raid, "blockcnt %" PRIu64 ", blocklen %u\n", 1696 raid_bdev_gen->blockcnt, raid_bdev_gen->blocklen); 1697 spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb, 1698 sizeof(struct raid_bdev_io_channel), 1699 raid_bdev_gen->name); 1700 rc = spdk_bdev_register(raid_bdev_gen); 1701 if (rc != 0) { 1702 SPDK_ERRLOG("Failed to register raid bdev '%s': %s\n", 1703 raid_bdev_gen->name, spdk_strerror(-rc)); 1704 goto err; 1705 } 1706 1707 /* 1708 * Open the bdev internally to delay unregistering if we need to stop a background process 1709 * first. The process may still need to unquiesce a range but it will fail because the 1710 * bdev's internal.spinlock is destroyed by the time the destruct callback is reached. 1711 * During application shutdown, bdevs automatically get unregistered by the bdev layer 1712 * so this is the only way currently to do this correctly. 1713 * TODO: try to handle this correctly in bdev layer instead. 1714 */ 1715 rc = spdk_bdev_open_ext(raid_bdev_gen->name, false, raid_bdev_event_cb, raid_bdev, 1716 &raid_bdev->self_desc); 1717 if (rc != 0) { 1718 SPDK_ERRLOG("Failed to open raid bdev '%s': %s\n", 1719 raid_bdev_gen->name, spdk_strerror(-rc)); 1720 spdk_bdev_unregister(raid_bdev_gen, NULL, NULL); 1721 goto err; 1722 } 1723 1724 SPDK_DEBUGLOG(bdev_raid, "raid bdev generic %p\n", raid_bdev_gen); 1725 SPDK_DEBUGLOG(bdev_raid, "raid bdev is created with name %s, raid_bdev %p\n", 1726 raid_bdev_gen->name, raid_bdev); 1727 return; 1728 err: 1729 if (raid_bdev->module->stop != NULL) { 1730 raid_bdev->module->stop(raid_bdev); 1731 } 1732 spdk_io_device_unregister(raid_bdev, NULL); 1733 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1734 } 1735 1736 static void 1737 raid_bdev_configure_write_sb_cb(int status, struct raid_bdev *raid_bdev, void *ctx) 1738 { 1739 if (status == 0) { 1740 raid_bdev_configure_cont(raid_bdev); 1741 } else { 1742 SPDK_ERRLOG("Failed to write raid bdev '%s' superblock: %s\n", 1743 raid_bdev->bdev.name, spdk_strerror(-status)); 1744 if (raid_bdev->module->stop != NULL) { 1745 raid_bdev->module->stop(raid_bdev); 1746 } 1747 } 1748 } 1749 1750 /* 1751 * brief: 1752 * If raid bdev config is complete, then only register the raid bdev to 1753 * bdev layer and remove this raid bdev from configuring list and 1754 * insert the raid bdev to configured list 1755 * params: 1756 * raid_bdev - pointer to raid bdev 1757 * returns: 1758 * 0 - success 1759 * non zero - failure 1760 */ 1761 static int 1762 raid_bdev_configure(struct raid_bdev *raid_bdev) 1763 { 1764 uint32_t data_block_size = spdk_bdev_get_data_block_size(&raid_bdev->bdev); 1765 int rc; 1766 1767 assert(raid_bdev->state == RAID_BDEV_STATE_CONFIGURING); 1768 assert(raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs_operational); 1769 assert(raid_bdev->bdev.blocklen > 0); 1770 1771 /* The strip_size_kb is read in from user in KB. Convert to blocks here for 1772 * internal use. 1773 */ 1774 raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / data_block_size; 1775 if (raid_bdev->strip_size == 0 && raid_bdev->level != RAID1) { 1776 SPDK_ERRLOG("Strip size cannot be smaller than the device block size\n"); 1777 return -EINVAL; 1778 } 1779 raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size); 1780 1781 rc = raid_bdev->module->start(raid_bdev); 1782 if (rc != 0) { 1783 SPDK_ERRLOG("raid module startup callback failed\n"); 1784 return rc; 1785 } 1786 1787 if (raid_bdev->superblock_enabled) { 1788 if (raid_bdev->sb == NULL) { 1789 rc = raid_bdev_alloc_superblock(raid_bdev, data_block_size); 1790 if (rc == 0) { 1791 raid_bdev_init_superblock(raid_bdev); 1792 } 1793 } else { 1794 assert(spdk_uuid_compare(&raid_bdev->sb->uuid, &raid_bdev->bdev.uuid) == 0); 1795 if (raid_bdev->sb->block_size != data_block_size) { 1796 SPDK_ERRLOG("blocklen does not match value in superblock\n"); 1797 rc = -EINVAL; 1798 } 1799 if (raid_bdev->sb->raid_size != raid_bdev->bdev.blockcnt) { 1800 SPDK_ERRLOG("blockcnt does not match value in superblock\n"); 1801 rc = -EINVAL; 1802 } 1803 } 1804 1805 if (rc != 0) { 1806 if (raid_bdev->module->stop != NULL) { 1807 raid_bdev->module->stop(raid_bdev); 1808 } 1809 return rc; 1810 } 1811 1812 raid_bdev_write_superblock(raid_bdev, raid_bdev_configure_write_sb_cb, NULL); 1813 } else { 1814 raid_bdev_configure_cont(raid_bdev); 1815 } 1816 1817 return 0; 1818 } 1819 1820 /* 1821 * brief: 1822 * If raid bdev is online and registered, change the bdev state to 1823 * configuring and unregister this raid device. Queue this raid device 1824 * in configuring list 1825 * params: 1826 * raid_bdev - pointer to raid bdev 1827 * cb_fn - callback function 1828 * cb_arg - argument to callback function 1829 * returns: 1830 * none 1831 */ 1832 static void 1833 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, 1834 void *cb_arg) 1835 { 1836 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1837 if (cb_fn) { 1838 cb_fn(cb_arg, 0); 1839 } 1840 return; 1841 } 1842 1843 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 1844 SPDK_DEBUGLOG(bdev_raid, "raid bdev state changing from online to offline\n"); 1845 1846 spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg); 1847 } 1848 1849 /* 1850 * brief: 1851 * raid_bdev_find_base_info_by_bdev function finds the base bdev info by bdev. 1852 * params: 1853 * base_bdev - pointer to base bdev 1854 * returns: 1855 * base bdev info if found, otherwise NULL. 1856 */ 1857 static struct raid_base_bdev_info * 1858 raid_bdev_find_base_info_by_bdev(struct spdk_bdev *base_bdev) 1859 { 1860 struct raid_bdev *raid_bdev; 1861 struct raid_base_bdev_info *base_info; 1862 1863 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1864 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1865 if (base_info->desc != NULL && 1866 spdk_bdev_desc_get_bdev(base_info->desc) == base_bdev) { 1867 return base_info; 1868 } 1869 } 1870 } 1871 1872 return NULL; 1873 } 1874 1875 static void 1876 raid_bdev_remove_base_bdev_done(struct raid_base_bdev_info *base_info, int status) 1877 { 1878 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1879 1880 assert(base_info->remove_scheduled); 1881 base_info->remove_scheduled = false; 1882 1883 if (status == 0) { 1884 raid_bdev->num_base_bdevs_operational--; 1885 if (raid_bdev->num_base_bdevs_operational < raid_bdev->min_base_bdevs_operational) { 1886 /* There is not enough base bdevs to keep the raid bdev operational. */ 1887 raid_bdev_deconfigure(raid_bdev, base_info->remove_cb, base_info->remove_cb_ctx); 1888 return; 1889 } 1890 } 1891 1892 if (base_info->remove_cb != NULL) { 1893 base_info->remove_cb(base_info->remove_cb_ctx, status); 1894 } 1895 } 1896 1897 static void 1898 raid_bdev_remove_base_bdev_on_unquiesced(void *ctx, int status) 1899 { 1900 struct raid_base_bdev_info *base_info = ctx; 1901 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1902 1903 if (status != 0) { 1904 SPDK_ERRLOG("Failed to unquiesce raid bdev %s: %s\n", 1905 raid_bdev->bdev.name, spdk_strerror(-status)); 1906 } 1907 1908 raid_bdev_remove_base_bdev_done(base_info, status); 1909 } 1910 1911 static void 1912 raid_bdev_channel_remove_base_bdev(struct spdk_io_channel_iter *i) 1913 { 1914 struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i); 1915 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 1916 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch); 1917 uint8_t idx = raid_bdev_base_bdev_slot(base_info); 1918 1919 SPDK_DEBUGLOG(bdev_raid, "slot: %u raid_ch: %p\n", idx, raid_ch); 1920 1921 if (raid_ch->base_channel[idx] != NULL) { 1922 spdk_put_io_channel(raid_ch->base_channel[idx]); 1923 raid_ch->base_channel[idx] = NULL; 1924 } 1925 1926 if (raid_ch->process.ch_processed != NULL) { 1927 raid_ch->process.ch_processed->base_channel[idx] = NULL; 1928 } 1929 1930 spdk_for_each_channel_continue(i, 0); 1931 } 1932 1933 static void 1934 raid_bdev_channels_remove_base_bdev_done(struct spdk_io_channel_iter *i, int status) 1935 { 1936 struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i); 1937 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1938 1939 raid_bdev_free_base_bdev_resource(base_info); 1940 1941 spdk_bdev_unquiesce(&raid_bdev->bdev, &g_raid_if, raid_bdev_remove_base_bdev_on_unquiesced, 1942 base_info); 1943 } 1944 1945 static void 1946 raid_bdev_remove_base_bdev_cont(struct raid_base_bdev_info *base_info) 1947 { 1948 raid_bdev_deconfigure_base_bdev(base_info); 1949 1950 spdk_for_each_channel(base_info->raid_bdev, raid_bdev_channel_remove_base_bdev, base_info, 1951 raid_bdev_channels_remove_base_bdev_done); 1952 } 1953 1954 static void 1955 raid_bdev_remove_base_bdev_write_sb_cb(int status, struct raid_bdev *raid_bdev, void *ctx) 1956 { 1957 struct raid_base_bdev_info *base_info = ctx; 1958 1959 if (status != 0) { 1960 SPDK_ERRLOG("Failed to write raid bdev '%s' superblock: %s\n", 1961 raid_bdev->bdev.name, spdk_strerror(-status)); 1962 raid_bdev_remove_base_bdev_done(base_info, status); 1963 return; 1964 } 1965 1966 raid_bdev_remove_base_bdev_cont(base_info); 1967 } 1968 1969 static void 1970 raid_bdev_remove_base_bdev_on_quiesced(void *ctx, int status) 1971 { 1972 struct raid_base_bdev_info *base_info = ctx; 1973 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1974 1975 if (status != 0) { 1976 SPDK_ERRLOG("Failed to quiesce raid bdev %s: %s\n", 1977 raid_bdev->bdev.name, spdk_strerror(-status)); 1978 raid_bdev_remove_base_bdev_done(base_info, status); 1979 return; 1980 } 1981 1982 if (raid_bdev->sb) { 1983 struct raid_bdev_superblock *sb = raid_bdev->sb; 1984 uint8_t slot = raid_bdev_base_bdev_slot(base_info); 1985 uint8_t i; 1986 1987 for (i = 0; i < sb->base_bdevs_size; i++) { 1988 struct raid_bdev_sb_base_bdev *sb_base_bdev = &sb->base_bdevs[i]; 1989 1990 if (sb_base_bdev->state == RAID_SB_BASE_BDEV_CONFIGURED && 1991 sb_base_bdev->slot == slot) { 1992 if (base_info->is_failed) { 1993 sb_base_bdev->state = RAID_SB_BASE_BDEV_FAILED; 1994 } else { 1995 sb_base_bdev->state = RAID_SB_BASE_BDEV_MISSING; 1996 } 1997 1998 raid_bdev_write_superblock(raid_bdev, raid_bdev_remove_base_bdev_write_sb_cb, base_info); 1999 return; 2000 } 2001 } 2002 } 2003 2004 raid_bdev_remove_base_bdev_cont(base_info); 2005 } 2006 2007 static int 2008 raid_bdev_remove_base_bdev_quiesce(struct raid_base_bdev_info *base_info) 2009 { 2010 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 2011 2012 return spdk_bdev_quiesce(&base_info->raid_bdev->bdev, &g_raid_if, 2013 raid_bdev_remove_base_bdev_on_quiesced, base_info); 2014 } 2015 2016 struct raid_bdev_process_base_bdev_remove_ctx { 2017 struct raid_bdev_process *process; 2018 struct raid_base_bdev_info *base_info; 2019 uint8_t num_base_bdevs_operational; 2020 }; 2021 2022 static void 2023 _raid_bdev_process_base_bdev_remove_cont(void *ctx) 2024 { 2025 struct raid_base_bdev_info *base_info = ctx; 2026 int ret; 2027 2028 ret = raid_bdev_remove_base_bdev_quiesce(base_info); 2029 if (ret != 0) { 2030 raid_bdev_remove_base_bdev_done(base_info, ret); 2031 } 2032 } 2033 2034 static void 2035 raid_bdev_process_base_bdev_remove_cont(void *_ctx) 2036 { 2037 struct raid_bdev_process_base_bdev_remove_ctx *ctx = _ctx; 2038 struct raid_base_bdev_info *base_info = ctx->base_info; 2039 2040 free(ctx); 2041 2042 spdk_thread_send_msg(spdk_thread_get_app_thread(), _raid_bdev_process_base_bdev_remove_cont, 2043 base_info); 2044 } 2045 2046 static void 2047 _raid_bdev_process_base_bdev_remove(void *_ctx) 2048 { 2049 struct raid_bdev_process_base_bdev_remove_ctx *ctx = _ctx; 2050 struct raid_bdev_process *process = ctx->process; 2051 int ret; 2052 2053 if (ctx->base_info != process->target && 2054 ctx->num_base_bdevs_operational > process->raid_bdev->min_base_bdevs_operational) { 2055 /* process doesn't need to be stopped */ 2056 raid_bdev_process_base_bdev_remove_cont(ctx); 2057 return; 2058 } 2059 2060 assert(process->state > RAID_PROCESS_STATE_INIT && 2061 process->state < RAID_PROCESS_STATE_STOPPED); 2062 2063 ret = raid_bdev_process_add_finish_action(process, raid_bdev_process_base_bdev_remove_cont, ctx); 2064 if (ret != 0) { 2065 raid_bdev_remove_base_bdev_done(ctx->base_info, ret); 2066 free(ctx); 2067 return; 2068 } 2069 2070 process->state = RAID_PROCESS_STATE_STOPPING; 2071 2072 if (process->status == 0) { 2073 process->status = -ENODEV; 2074 } 2075 } 2076 2077 static int 2078 raid_bdev_process_base_bdev_remove(struct raid_bdev_process *process, 2079 struct raid_base_bdev_info *base_info) 2080 { 2081 struct raid_bdev_process_base_bdev_remove_ctx *ctx; 2082 2083 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 2084 2085 ctx = calloc(1, sizeof(*ctx)); 2086 if (ctx == NULL) { 2087 return -ENOMEM; 2088 } 2089 2090 /* 2091 * We have to send the process and num_base_bdevs_operational in the message ctx 2092 * because the process thread should not access raid_bdev's properties. Particularly, 2093 * raid_bdev->process may be cleared by the time the message is handled, but ctx->process 2094 * will still be valid until the process is fully stopped. 2095 */ 2096 ctx->base_info = base_info; 2097 ctx->process = process; 2098 /* 2099 * raid_bdev->num_base_bdevs_operational can't be used here because it is decremented 2100 * after the removal and more than one base bdev may be removed at the same time 2101 */ 2102 RAID_FOR_EACH_BASE_BDEV(process->raid_bdev, base_info) { 2103 if (base_info->is_configured && !base_info->remove_scheduled) { 2104 ctx->num_base_bdevs_operational++; 2105 } 2106 } 2107 2108 spdk_thread_send_msg(process->thread, _raid_bdev_process_base_bdev_remove, ctx); 2109 2110 return 0; 2111 } 2112 2113 static int 2114 _raid_bdev_remove_base_bdev(struct raid_base_bdev_info *base_info, 2115 raid_base_bdev_cb cb_fn, void *cb_ctx) 2116 { 2117 struct raid_bdev *raid_bdev = base_info->raid_bdev; 2118 int ret = 0; 2119 2120 SPDK_DEBUGLOG(bdev_raid, "%s\n", base_info->name); 2121 2122 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 2123 2124 if (base_info->remove_scheduled || !base_info->is_configured) { 2125 return -ENODEV; 2126 } 2127 2128 assert(base_info->desc); 2129 base_info->remove_scheduled = true; 2130 2131 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 2132 /* 2133 * As raid bdev is not registered yet or already unregistered, 2134 * so cleanup should be done here itself. 2135 * 2136 * Removing a base bdev at this stage does not change the number of operational 2137 * base bdevs, only the number of discovered base bdevs. 2138 */ 2139 raid_bdev_free_base_bdev_resource(base_info); 2140 base_info->remove_scheduled = false; 2141 if (raid_bdev->num_base_bdevs_discovered == 0 && 2142 raid_bdev->state == RAID_BDEV_STATE_OFFLINE) { 2143 /* There is no base bdev for this raid, so free the raid device. */ 2144 raid_bdev_cleanup_and_free(raid_bdev); 2145 } 2146 if (cb_fn != NULL) { 2147 cb_fn(cb_ctx, 0); 2148 } 2149 } else if (raid_bdev->min_base_bdevs_operational == raid_bdev->num_base_bdevs) { 2150 /* This raid bdev does not tolerate removing a base bdev. */ 2151 raid_bdev->num_base_bdevs_operational--; 2152 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_ctx); 2153 } else { 2154 base_info->remove_cb = cb_fn; 2155 base_info->remove_cb_ctx = cb_ctx; 2156 2157 if (raid_bdev->process != NULL) { 2158 ret = raid_bdev_process_base_bdev_remove(raid_bdev->process, base_info); 2159 } else { 2160 ret = raid_bdev_remove_base_bdev_quiesce(base_info); 2161 } 2162 2163 if (ret != 0) { 2164 base_info->remove_scheduled = false; 2165 } 2166 } 2167 2168 return ret; 2169 } 2170 2171 /* 2172 * brief: 2173 * raid_bdev_remove_base_bdev function is called by below layers when base_bdev 2174 * is removed. This function checks if this base bdev is part of any raid bdev 2175 * or not. If yes, it takes necessary action on that particular raid bdev. 2176 * params: 2177 * base_bdev - pointer to base bdev which got removed 2178 * cb_fn - callback function 2179 * cb_arg - argument to callback function 2180 * returns: 2181 * 0 - success 2182 * non zero - failure 2183 */ 2184 int 2185 raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev, raid_base_bdev_cb cb_fn, void *cb_ctx) 2186 { 2187 struct raid_base_bdev_info *base_info; 2188 2189 /* Find the raid_bdev which has claimed this base_bdev */ 2190 base_info = raid_bdev_find_base_info_by_bdev(base_bdev); 2191 if (!base_info) { 2192 SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); 2193 return -ENODEV; 2194 } 2195 2196 return _raid_bdev_remove_base_bdev(base_info, cb_fn, cb_ctx); 2197 } 2198 2199 static void 2200 raid_bdev_fail_base_remove_cb(void *ctx, int status) 2201 { 2202 struct raid_base_bdev_info *base_info = ctx; 2203 2204 if (status != 0) { 2205 SPDK_WARNLOG("Failed to remove base bdev %s\n", base_info->name); 2206 base_info->is_failed = false; 2207 } 2208 } 2209 2210 static void 2211 _raid_bdev_fail_base_bdev(void *ctx) 2212 { 2213 struct raid_base_bdev_info *base_info = ctx; 2214 int rc; 2215 2216 if (base_info->is_failed) { 2217 return; 2218 } 2219 base_info->is_failed = true; 2220 2221 SPDK_NOTICELOG("Failing base bdev in slot %d ('%s') of raid bdev '%s'\n", 2222 raid_bdev_base_bdev_slot(base_info), base_info->name, base_info->raid_bdev->bdev.name); 2223 2224 rc = _raid_bdev_remove_base_bdev(base_info, raid_bdev_fail_base_remove_cb, base_info); 2225 if (rc != 0) { 2226 raid_bdev_fail_base_remove_cb(base_info, rc); 2227 } 2228 } 2229 2230 void 2231 raid_bdev_fail_base_bdev(struct raid_base_bdev_info *base_info) 2232 { 2233 spdk_thread_exec_msg(spdk_thread_get_app_thread(), _raid_bdev_fail_base_bdev, base_info); 2234 } 2235 2236 static void 2237 raid_bdev_resize_write_sb_cb(int status, struct raid_bdev *raid_bdev, void *ctx) 2238 { 2239 if (status != 0) { 2240 SPDK_ERRLOG("Failed to write raid bdev '%s' superblock after resizing the bdev: %s\n", 2241 raid_bdev->bdev.name, spdk_strerror(-status)); 2242 } 2243 } 2244 2245 /* 2246 * brief: 2247 * raid_bdev_resize_base_bdev function is called by below layers when base_bdev 2248 * is resized. This function checks if the smallest size of the base_bdevs is changed. 2249 * If yes, call module handler to resize the raid_bdev if implemented. 2250 * params: 2251 * base_bdev - pointer to base bdev which got resized. 2252 * returns: 2253 * none 2254 */ 2255 static void 2256 raid_bdev_resize_base_bdev(struct spdk_bdev *base_bdev) 2257 { 2258 struct raid_bdev *raid_bdev; 2259 struct raid_base_bdev_info *base_info; 2260 uint64_t blockcnt_old; 2261 2262 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_resize_base_bdev\n"); 2263 2264 base_info = raid_bdev_find_base_info_by_bdev(base_bdev); 2265 2266 /* Find the raid_bdev which has claimed this base_bdev */ 2267 if (!base_info) { 2268 SPDK_ERRLOG("raid_bdev whose base_bdev '%s' not found\n", base_bdev->name); 2269 return; 2270 } 2271 raid_bdev = base_info->raid_bdev; 2272 2273 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 2274 2275 SPDK_NOTICELOG("base_bdev '%s' was resized: old size %" PRIu64 ", new size %" PRIu64 "\n", 2276 base_bdev->name, base_info->blockcnt, base_bdev->blockcnt); 2277 2278 base_info->blockcnt = base_bdev->blockcnt; 2279 2280 if (!raid_bdev->module->resize) { 2281 return; 2282 } 2283 2284 blockcnt_old = raid_bdev->bdev.blockcnt; 2285 if (raid_bdev->module->resize(raid_bdev) == false) { 2286 return; 2287 } 2288 2289 SPDK_NOTICELOG("raid bdev '%s': block count was changed from %" PRIu64 " to %" PRIu64 "\n", 2290 raid_bdev->bdev.name, blockcnt_old, raid_bdev->bdev.blockcnt); 2291 2292 if (raid_bdev->superblock_enabled) { 2293 struct raid_bdev_superblock *sb = raid_bdev->sb; 2294 uint8_t i; 2295 2296 for (i = 0; i < sb->base_bdevs_size; i++) { 2297 struct raid_bdev_sb_base_bdev *sb_base_bdev = &sb->base_bdevs[i]; 2298 2299 if (sb_base_bdev->state == RAID_SB_BASE_BDEV_CONFIGURED) { 2300 base_info = &raid_bdev->base_bdev_info[sb_base_bdev->slot]; 2301 sb_base_bdev->data_size = base_info->data_size; 2302 } 2303 } 2304 sb->raid_size = raid_bdev->bdev.blockcnt; 2305 raid_bdev_write_superblock(raid_bdev, raid_bdev_resize_write_sb_cb, NULL); 2306 } 2307 } 2308 2309 /* 2310 * brief: 2311 * raid_bdev_event_base_bdev function is called by below layers when base_bdev 2312 * triggers asynchronous event. 2313 * params: 2314 * type - event details. 2315 * bdev - bdev that triggered event. 2316 * event_ctx - context for event. 2317 * returns: 2318 * none 2319 */ 2320 static void 2321 raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 2322 void *event_ctx) 2323 { 2324 int rc; 2325 2326 switch (type) { 2327 case SPDK_BDEV_EVENT_REMOVE: 2328 rc = raid_bdev_remove_base_bdev(bdev, NULL, NULL); 2329 if (rc != 0) { 2330 SPDK_ERRLOG("Failed to remove base bdev %s: %s\n", 2331 spdk_bdev_get_name(bdev), spdk_strerror(-rc)); 2332 } 2333 break; 2334 case SPDK_BDEV_EVENT_RESIZE: 2335 raid_bdev_resize_base_bdev(bdev); 2336 break; 2337 default: 2338 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 2339 break; 2340 } 2341 } 2342 2343 /* 2344 * brief: 2345 * Deletes the specified raid bdev 2346 * params: 2347 * raid_bdev - pointer to raid bdev 2348 * cb_fn - callback function 2349 * cb_arg - argument to callback function 2350 */ 2351 void 2352 raid_bdev_delete(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, void *cb_arg) 2353 { 2354 struct raid_base_bdev_info *base_info; 2355 2356 SPDK_DEBUGLOG(bdev_raid, "delete raid bdev: %s\n", raid_bdev->bdev.name); 2357 2358 if (raid_bdev->destroy_started) { 2359 SPDK_DEBUGLOG(bdev_raid, "destroying raid bdev %s is already started\n", 2360 raid_bdev->bdev.name); 2361 if (cb_fn) { 2362 cb_fn(cb_arg, -EALREADY); 2363 } 2364 return; 2365 } 2366 2367 raid_bdev->destroy_started = true; 2368 2369 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 2370 base_info->remove_scheduled = true; 2371 2372 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 2373 /* 2374 * As raid bdev is not registered yet or already unregistered, 2375 * so cleanup should be done here itself. 2376 */ 2377 raid_bdev_free_base_bdev_resource(base_info); 2378 } 2379 } 2380 2381 if (raid_bdev->num_base_bdevs_discovered == 0) { 2382 /* There is no base bdev for this raid, so free the raid device. */ 2383 raid_bdev_cleanup_and_free(raid_bdev); 2384 if (cb_fn) { 2385 cb_fn(cb_arg, 0); 2386 } 2387 } else { 2388 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg); 2389 } 2390 } 2391 2392 static void 2393 raid_bdev_process_finish_write_sb_cb(int status, struct raid_bdev *raid_bdev, void *ctx) 2394 { 2395 if (status != 0) { 2396 SPDK_ERRLOG("Failed to write raid bdev '%s' superblock after background process finished: %s\n", 2397 raid_bdev->bdev.name, spdk_strerror(-status)); 2398 } 2399 } 2400 2401 static void 2402 raid_bdev_process_finish_write_sb(void *ctx) 2403 { 2404 struct raid_bdev *raid_bdev = ctx; 2405 struct raid_bdev_superblock *sb = raid_bdev->sb; 2406 struct raid_bdev_sb_base_bdev *sb_base_bdev; 2407 struct raid_base_bdev_info *base_info; 2408 uint8_t i; 2409 2410 for (i = 0; i < sb->base_bdevs_size; i++) { 2411 sb_base_bdev = &sb->base_bdevs[i]; 2412 2413 if (sb_base_bdev->state != RAID_SB_BASE_BDEV_CONFIGURED && 2414 sb_base_bdev->slot < raid_bdev->num_base_bdevs) { 2415 base_info = &raid_bdev->base_bdev_info[sb_base_bdev->slot]; 2416 if (base_info->is_configured) { 2417 sb_base_bdev->state = RAID_SB_BASE_BDEV_CONFIGURED; 2418 spdk_uuid_copy(&sb_base_bdev->uuid, &base_info->uuid); 2419 } 2420 } 2421 } 2422 2423 raid_bdev_write_superblock(raid_bdev, raid_bdev_process_finish_write_sb_cb, NULL); 2424 } 2425 2426 static void raid_bdev_process_free(struct raid_bdev_process *process); 2427 2428 static void 2429 _raid_bdev_process_finish_done(void *ctx) 2430 { 2431 struct raid_bdev_process *process = ctx; 2432 struct raid_process_finish_action *finish_action; 2433 2434 while ((finish_action = TAILQ_FIRST(&process->finish_actions)) != NULL) { 2435 TAILQ_REMOVE(&process->finish_actions, finish_action, link); 2436 finish_action->cb(finish_action->cb_ctx); 2437 free(finish_action); 2438 } 2439 2440 raid_bdev_process_free(process); 2441 2442 spdk_thread_exit(spdk_get_thread()); 2443 } 2444 2445 static void 2446 raid_bdev_process_finish_target_removed(void *ctx, int status) 2447 { 2448 struct raid_bdev_process *process = ctx; 2449 2450 if (status != 0) { 2451 SPDK_ERRLOG("Failed to remove target bdev: %s\n", spdk_strerror(-status)); 2452 } 2453 2454 spdk_thread_send_msg(process->thread, _raid_bdev_process_finish_done, process); 2455 } 2456 2457 static void 2458 raid_bdev_process_finish_unquiesced(void *ctx, int status) 2459 { 2460 struct raid_bdev_process *process = ctx; 2461 2462 if (status != 0) { 2463 SPDK_ERRLOG("Failed to unquiesce bdev: %s\n", spdk_strerror(-status)); 2464 } 2465 2466 if (process->status != 0) { 2467 status = _raid_bdev_remove_base_bdev(process->target, raid_bdev_process_finish_target_removed, 2468 process); 2469 if (status != 0) { 2470 raid_bdev_process_finish_target_removed(process, status); 2471 } 2472 return; 2473 } 2474 2475 spdk_thread_send_msg(process->thread, _raid_bdev_process_finish_done, process); 2476 } 2477 2478 static void 2479 raid_bdev_process_finish_unquiesce(void *ctx) 2480 { 2481 struct raid_bdev_process *process = ctx; 2482 int rc; 2483 2484 rc = spdk_bdev_unquiesce(&process->raid_bdev->bdev, &g_raid_if, 2485 raid_bdev_process_finish_unquiesced, process); 2486 if (rc != 0) { 2487 raid_bdev_process_finish_unquiesced(process, rc); 2488 } 2489 } 2490 2491 static void 2492 raid_bdev_process_finish_done(void *ctx) 2493 { 2494 struct raid_bdev_process *process = ctx; 2495 struct raid_bdev *raid_bdev = process->raid_bdev; 2496 2497 if (process->raid_ch != NULL) { 2498 spdk_put_io_channel(spdk_io_channel_from_ctx(process->raid_ch)); 2499 } 2500 2501 process->state = RAID_PROCESS_STATE_STOPPED; 2502 2503 if (process->status == 0) { 2504 SPDK_NOTICELOG("Finished %s on raid bdev %s\n", 2505 raid_bdev_process_to_str(process->type), 2506 raid_bdev->bdev.name); 2507 if (raid_bdev->superblock_enabled) { 2508 spdk_thread_send_msg(spdk_thread_get_app_thread(), 2509 raid_bdev_process_finish_write_sb, 2510 raid_bdev); 2511 } 2512 } else { 2513 SPDK_WARNLOG("Finished %s on raid bdev %s: %s\n", 2514 raid_bdev_process_to_str(process->type), 2515 raid_bdev->bdev.name, 2516 spdk_strerror(-process->status)); 2517 } 2518 2519 spdk_thread_send_msg(spdk_thread_get_app_thread(), raid_bdev_process_finish_unquiesce, 2520 process); 2521 } 2522 2523 static void 2524 __raid_bdev_process_finish(struct spdk_io_channel_iter *i, int status) 2525 { 2526 struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i); 2527 2528 spdk_thread_send_msg(process->thread, raid_bdev_process_finish_done, process); 2529 } 2530 2531 static void 2532 raid_bdev_channel_process_finish(struct spdk_io_channel_iter *i) 2533 { 2534 struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i); 2535 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 2536 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch); 2537 2538 if (process->status == 0) { 2539 uint8_t slot = raid_bdev_base_bdev_slot(process->target); 2540 2541 raid_ch->base_channel[slot] = raid_ch->process.target_ch; 2542 raid_ch->process.target_ch = NULL; 2543 } 2544 2545 raid_bdev_ch_process_cleanup(raid_ch); 2546 2547 spdk_for_each_channel_continue(i, 0); 2548 } 2549 2550 static void 2551 raid_bdev_process_finish_quiesced(void *ctx, int status) 2552 { 2553 struct raid_bdev_process *process = ctx; 2554 struct raid_bdev *raid_bdev = process->raid_bdev; 2555 2556 if (status != 0) { 2557 SPDK_ERRLOG("Failed to quiesce bdev: %s\n", spdk_strerror(-status)); 2558 return; 2559 } 2560 2561 raid_bdev->process = NULL; 2562 process->target->is_process_target = false; 2563 2564 spdk_for_each_channel(process->raid_bdev, raid_bdev_channel_process_finish, process, 2565 __raid_bdev_process_finish); 2566 } 2567 2568 static void 2569 _raid_bdev_process_finish(void *ctx) 2570 { 2571 struct raid_bdev_process *process = ctx; 2572 int rc; 2573 2574 rc = spdk_bdev_quiesce(&process->raid_bdev->bdev, &g_raid_if, 2575 raid_bdev_process_finish_quiesced, process); 2576 if (rc != 0) { 2577 raid_bdev_process_finish_quiesced(ctx, rc); 2578 } 2579 } 2580 2581 static void 2582 raid_bdev_process_do_finish(struct raid_bdev_process *process) 2583 { 2584 spdk_thread_send_msg(spdk_thread_get_app_thread(), _raid_bdev_process_finish, process); 2585 } 2586 2587 static void raid_bdev_process_unlock_window_range(struct raid_bdev_process *process); 2588 static void raid_bdev_process_thread_run(struct raid_bdev_process *process); 2589 2590 static void 2591 raid_bdev_process_finish(struct raid_bdev_process *process, int status) 2592 { 2593 assert(spdk_get_thread() == process->thread); 2594 2595 if (process->status == 0) { 2596 process->status = status; 2597 } 2598 2599 if (process->state >= RAID_PROCESS_STATE_STOPPING) { 2600 return; 2601 } 2602 2603 assert(process->state == RAID_PROCESS_STATE_RUNNING); 2604 process->state = RAID_PROCESS_STATE_STOPPING; 2605 2606 if (process->window_range_locked) { 2607 raid_bdev_process_unlock_window_range(process); 2608 } else { 2609 raid_bdev_process_thread_run(process); 2610 } 2611 } 2612 2613 static void 2614 raid_bdev_process_window_range_unlocked(void *ctx, int status) 2615 { 2616 struct raid_bdev_process *process = ctx; 2617 2618 if (status != 0) { 2619 SPDK_ERRLOG("Failed to unlock LBA range: %s\n", spdk_strerror(-status)); 2620 raid_bdev_process_finish(process, status); 2621 return; 2622 } 2623 2624 process->window_range_locked = false; 2625 process->window_offset += process->window_size; 2626 2627 raid_bdev_process_thread_run(process); 2628 } 2629 2630 static void 2631 raid_bdev_process_unlock_window_range(struct raid_bdev_process *process) 2632 { 2633 int rc; 2634 2635 assert(process->window_range_locked == true); 2636 2637 rc = spdk_bdev_unquiesce_range(&process->raid_bdev->bdev, &g_raid_if, 2638 process->window_offset, process->max_window_size, 2639 raid_bdev_process_window_range_unlocked, process); 2640 if (rc != 0) { 2641 raid_bdev_process_window_range_unlocked(process, rc); 2642 } 2643 } 2644 2645 static void 2646 raid_bdev_process_channels_update_done(struct spdk_io_channel_iter *i, int status) 2647 { 2648 struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i); 2649 2650 raid_bdev_process_unlock_window_range(process); 2651 } 2652 2653 static void 2654 raid_bdev_process_channel_update(struct spdk_io_channel_iter *i) 2655 { 2656 struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i); 2657 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 2658 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch); 2659 2660 raid_ch->process.offset = process->window_offset + process->window_size; 2661 2662 spdk_for_each_channel_continue(i, 0); 2663 } 2664 2665 void 2666 raid_bdev_process_request_complete(struct raid_bdev_process_request *process_req, int status) 2667 { 2668 struct raid_bdev_process *process = process_req->process; 2669 2670 TAILQ_INSERT_TAIL(&process->requests, process_req, link); 2671 2672 assert(spdk_get_thread() == process->thread); 2673 assert(process->window_remaining >= process_req->num_blocks); 2674 2675 if (status != 0) { 2676 process->window_status = status; 2677 } 2678 2679 process->window_remaining -= process_req->num_blocks; 2680 if (process->window_remaining == 0) { 2681 if (process->window_status != 0) { 2682 raid_bdev_process_finish(process, process->window_status); 2683 return; 2684 } 2685 2686 spdk_for_each_channel(process->raid_bdev, raid_bdev_process_channel_update, process, 2687 raid_bdev_process_channels_update_done); 2688 } 2689 } 2690 2691 static int 2692 raid_bdev_submit_process_request(struct raid_bdev_process *process, uint64_t offset_blocks, 2693 uint32_t num_blocks) 2694 { 2695 struct raid_bdev *raid_bdev = process->raid_bdev; 2696 struct raid_bdev_process_request *process_req; 2697 int ret; 2698 2699 process_req = TAILQ_FIRST(&process->requests); 2700 if (process_req == NULL) { 2701 assert(process->window_remaining > 0); 2702 return 0; 2703 } 2704 2705 process_req->target = process->target; 2706 process_req->target_ch = process->raid_ch->process.target_ch; 2707 process_req->offset_blocks = offset_blocks; 2708 process_req->num_blocks = num_blocks; 2709 process_req->iov.iov_len = num_blocks * raid_bdev->bdev.blocklen; 2710 2711 ret = raid_bdev->module->submit_process_request(process_req, process->raid_ch); 2712 if (ret <= 0) { 2713 if (ret < 0) { 2714 SPDK_ERRLOG("Failed to submit process request on %s: %s\n", 2715 raid_bdev->bdev.name, spdk_strerror(-ret)); 2716 process->window_status = ret; 2717 } 2718 return ret; 2719 } 2720 2721 process_req->num_blocks = ret; 2722 TAILQ_REMOVE(&process->requests, process_req, link); 2723 2724 return ret; 2725 } 2726 2727 static void 2728 _raid_bdev_process_thread_run(struct raid_bdev_process *process) 2729 { 2730 struct raid_bdev *raid_bdev = process->raid_bdev; 2731 uint64_t offset = process->window_offset; 2732 const uint64_t offset_end = spdk_min(offset + process->max_window_size, raid_bdev->bdev.blockcnt); 2733 int ret; 2734 2735 while (offset < offset_end) { 2736 ret = raid_bdev_submit_process_request(process, offset, offset_end - offset); 2737 if (ret <= 0) { 2738 break; 2739 } 2740 2741 process->window_remaining += ret; 2742 offset += ret; 2743 } 2744 2745 if (process->window_remaining > 0) { 2746 process->window_size = process->window_remaining; 2747 } else { 2748 raid_bdev_process_finish(process, process->window_status); 2749 } 2750 } 2751 2752 static void 2753 raid_bdev_process_window_range_locked(void *ctx, int status) 2754 { 2755 struct raid_bdev_process *process = ctx; 2756 2757 if (status != 0) { 2758 SPDK_ERRLOG("Failed to lock LBA range: %s\n", spdk_strerror(-status)); 2759 raid_bdev_process_finish(process, status); 2760 return; 2761 } 2762 2763 process->window_range_locked = true; 2764 2765 if (process->state == RAID_PROCESS_STATE_STOPPING) { 2766 raid_bdev_process_unlock_window_range(process); 2767 return; 2768 } 2769 2770 _raid_bdev_process_thread_run(process); 2771 } 2772 2773 static void 2774 raid_bdev_process_thread_run(struct raid_bdev_process *process) 2775 { 2776 struct raid_bdev *raid_bdev = process->raid_bdev; 2777 int rc; 2778 2779 assert(spdk_get_thread() == process->thread); 2780 assert(process->window_remaining == 0); 2781 assert(process->window_range_locked == false); 2782 2783 if (process->state == RAID_PROCESS_STATE_STOPPING) { 2784 raid_bdev_process_do_finish(process); 2785 return; 2786 } 2787 2788 if (process->window_offset == raid_bdev->bdev.blockcnt) { 2789 SPDK_DEBUGLOG(bdev_raid, "process completed on %s\n", raid_bdev->bdev.name); 2790 raid_bdev_process_finish(process, 0); 2791 return; 2792 } 2793 2794 process->max_window_size = spdk_min(raid_bdev->bdev.blockcnt - process->window_offset, 2795 process->max_window_size); 2796 2797 rc = spdk_bdev_quiesce_range(&raid_bdev->bdev, &g_raid_if, 2798 process->window_offset, process->max_window_size, 2799 raid_bdev_process_window_range_locked, process); 2800 if (rc != 0) { 2801 raid_bdev_process_window_range_locked(process, rc); 2802 } 2803 } 2804 2805 static void 2806 raid_bdev_process_thread_init(void *ctx) 2807 { 2808 struct raid_bdev_process *process = ctx; 2809 struct raid_bdev *raid_bdev = process->raid_bdev; 2810 struct spdk_io_channel *ch; 2811 2812 process->thread = spdk_get_thread(); 2813 2814 ch = spdk_get_io_channel(raid_bdev); 2815 if (ch == NULL) { 2816 process->status = -ENOMEM; 2817 raid_bdev_process_do_finish(process); 2818 return; 2819 } 2820 2821 process->raid_ch = spdk_io_channel_get_ctx(ch); 2822 process->state = RAID_PROCESS_STATE_RUNNING; 2823 2824 SPDK_NOTICELOG("Started %s on raid bdev %s\n", 2825 raid_bdev_process_to_str(process->type), raid_bdev->bdev.name); 2826 2827 raid_bdev_process_thread_run(process); 2828 } 2829 2830 static void 2831 raid_bdev_channels_abort_start_process_done(struct spdk_io_channel_iter *i, int status) 2832 { 2833 struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i); 2834 2835 _raid_bdev_remove_base_bdev(process->target, NULL, NULL); 2836 raid_bdev_process_free(process); 2837 2838 /* TODO: update sb */ 2839 } 2840 2841 static void 2842 raid_bdev_channel_abort_start_process(struct spdk_io_channel_iter *i) 2843 { 2844 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 2845 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch); 2846 2847 raid_bdev_ch_process_cleanup(raid_ch); 2848 2849 spdk_for_each_channel_continue(i, 0); 2850 } 2851 2852 static void 2853 raid_bdev_channels_start_process_done(struct spdk_io_channel_iter *i, int status) 2854 { 2855 struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i); 2856 struct raid_bdev *raid_bdev = process->raid_bdev; 2857 struct spdk_thread *thread; 2858 char thread_name[RAID_BDEV_SB_NAME_SIZE + 16]; 2859 2860 if (status == 0 && 2861 (process->target->remove_scheduled || !process->target->is_configured || 2862 raid_bdev->num_base_bdevs_operational <= raid_bdev->min_base_bdevs_operational)) { 2863 /* a base bdev was removed before we got here */ 2864 status = -ENODEV; 2865 } 2866 2867 if (status != 0) { 2868 SPDK_ERRLOG("Failed to start %s on %s: %s\n", 2869 raid_bdev_process_to_str(process->type), raid_bdev->bdev.name, 2870 spdk_strerror(-status)); 2871 goto err; 2872 } 2873 2874 snprintf(thread_name, sizeof(thread_name), "%s_%s", 2875 raid_bdev->bdev.name, raid_bdev_process_to_str(process->type)); 2876 2877 thread = spdk_thread_create(thread_name, NULL); 2878 if (thread == NULL) { 2879 SPDK_ERRLOG("Failed to create %s thread for %s\n", 2880 raid_bdev_process_to_str(process->type), raid_bdev->bdev.name); 2881 goto err; 2882 } 2883 2884 raid_bdev->process = process; 2885 2886 spdk_thread_send_msg(thread, raid_bdev_process_thread_init, process); 2887 2888 return; 2889 err: 2890 spdk_for_each_channel(process->raid_bdev, raid_bdev_channel_abort_start_process, process, 2891 raid_bdev_channels_abort_start_process_done); 2892 } 2893 2894 static void 2895 raid_bdev_channel_start_process(struct spdk_io_channel_iter *i) 2896 { 2897 struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i); 2898 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 2899 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch); 2900 int rc; 2901 2902 rc = raid_bdev_ch_process_setup(raid_ch, process); 2903 2904 spdk_for_each_channel_continue(i, rc); 2905 } 2906 2907 static void 2908 raid_bdev_process_start(struct raid_bdev_process *process) 2909 { 2910 struct raid_bdev *raid_bdev = process->raid_bdev; 2911 2912 assert(raid_bdev->module->submit_process_request != NULL); 2913 2914 spdk_for_each_channel(raid_bdev, raid_bdev_channel_start_process, process, 2915 raid_bdev_channels_start_process_done); 2916 } 2917 2918 static void 2919 raid_bdev_process_request_free(struct raid_bdev_process_request *process_req) 2920 { 2921 spdk_dma_free(process_req->iov.iov_base); 2922 spdk_dma_free(process_req->md_buf); 2923 free(process_req); 2924 } 2925 2926 static struct raid_bdev_process_request * 2927 raid_bdev_process_alloc_request(struct raid_bdev_process *process) 2928 { 2929 struct raid_bdev *raid_bdev = process->raid_bdev; 2930 struct raid_bdev_process_request *process_req; 2931 2932 process_req = calloc(1, sizeof(*process_req)); 2933 if (process_req == NULL) { 2934 return NULL; 2935 } 2936 2937 process_req->process = process; 2938 process_req->iov.iov_len = process->max_window_size * raid_bdev->bdev.blocklen; 2939 process_req->iov.iov_base = spdk_dma_malloc(process_req->iov.iov_len, 4096, 0); 2940 if (process_req->iov.iov_base == NULL) { 2941 free(process_req); 2942 return NULL; 2943 } 2944 if (spdk_bdev_is_md_separate(&raid_bdev->bdev)) { 2945 process_req->md_buf = spdk_dma_malloc(process->max_window_size * raid_bdev->bdev.md_len, 4096, 0); 2946 if (process_req->md_buf == NULL) { 2947 raid_bdev_process_request_free(process_req); 2948 return NULL; 2949 } 2950 } 2951 2952 return process_req; 2953 } 2954 2955 static void 2956 raid_bdev_process_free(struct raid_bdev_process *process) 2957 { 2958 struct raid_bdev_process_request *process_req; 2959 2960 while ((process_req = TAILQ_FIRST(&process->requests)) != NULL) { 2961 TAILQ_REMOVE(&process->requests, process_req, link); 2962 raid_bdev_process_request_free(process_req); 2963 } 2964 2965 free(process); 2966 } 2967 2968 static struct raid_bdev_process * 2969 raid_bdev_process_alloc(struct raid_bdev *raid_bdev, enum raid_process_type type, 2970 struct raid_base_bdev_info *target) 2971 { 2972 struct raid_bdev_process *process; 2973 struct raid_bdev_process_request *process_req; 2974 int i; 2975 2976 process = calloc(1, sizeof(*process)); 2977 if (process == NULL) { 2978 return NULL; 2979 } 2980 2981 process->raid_bdev = raid_bdev; 2982 process->type = type; 2983 process->target = target; 2984 process->max_window_size = spdk_max(spdk_divide_round_up(g_opts.process_window_size_kb * 1024UL, 2985 spdk_bdev_get_data_block_size(&raid_bdev->bdev)), 2986 raid_bdev->bdev.write_unit_size); 2987 TAILQ_INIT(&process->requests); 2988 TAILQ_INIT(&process->finish_actions); 2989 2990 for (i = 0; i < RAID_BDEV_PROCESS_MAX_QD; i++) { 2991 process_req = raid_bdev_process_alloc_request(process); 2992 if (process_req == NULL) { 2993 raid_bdev_process_free(process); 2994 return NULL; 2995 } 2996 2997 TAILQ_INSERT_TAIL(&process->requests, process_req, link); 2998 } 2999 3000 return process; 3001 } 3002 3003 static int 3004 raid_bdev_start_rebuild(struct raid_base_bdev_info *target) 3005 { 3006 struct raid_bdev_process *process; 3007 3008 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 3009 3010 process = raid_bdev_process_alloc(target->raid_bdev, RAID_PROCESS_REBUILD, target); 3011 if (process == NULL) { 3012 return -ENOMEM; 3013 } 3014 3015 raid_bdev_process_start(process); 3016 3017 return 0; 3018 } 3019 3020 static void raid_bdev_configure_base_bdev_cont(struct raid_base_bdev_info *base_info); 3021 3022 static void 3023 _raid_bdev_configure_base_bdev_cont(struct spdk_io_channel_iter *i, int status) 3024 { 3025 struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i); 3026 3027 raid_bdev_configure_base_bdev_cont(base_info); 3028 } 3029 3030 static void 3031 raid_bdev_ch_sync(struct spdk_io_channel_iter *i) 3032 { 3033 spdk_for_each_channel_continue(i, 0); 3034 } 3035 3036 static void 3037 raid_bdev_configure_base_bdev_cont(struct raid_base_bdev_info *base_info) 3038 { 3039 struct raid_bdev *raid_bdev = base_info->raid_bdev; 3040 int rc; 3041 3042 if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs_operational && 3043 base_info->is_process_target == false) { 3044 /* TODO: defer if rebuild in progress on another base bdev */ 3045 assert(raid_bdev->process == NULL); 3046 assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); 3047 base_info->is_process_target = true; 3048 /* To assure is_process_target is set before is_configured when checked in raid_bdev_create_cb() */ 3049 spdk_for_each_channel(raid_bdev, raid_bdev_ch_sync, base_info, _raid_bdev_configure_base_bdev_cont); 3050 return; 3051 } 3052 3053 base_info->is_configured = true; 3054 3055 raid_bdev->num_base_bdevs_discovered++; 3056 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 3057 assert(raid_bdev->num_base_bdevs_operational <= raid_bdev->num_base_bdevs); 3058 assert(raid_bdev->num_base_bdevs_operational >= raid_bdev->min_base_bdevs_operational); 3059 3060 /* 3061 * Configure the raid bdev when the number of discovered base bdevs reaches the number 3062 * of base bdevs we know to be operational members of the array. Usually this is equal 3063 * to the total number of base bdevs (num_base_bdevs) but can be less - when the array is 3064 * degraded. 3065 */ 3066 if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs_operational) { 3067 rc = raid_bdev_configure(raid_bdev); 3068 if (rc != 0) { 3069 SPDK_ERRLOG("Failed to configure raid bdev: %s\n", spdk_strerror(-rc)); 3070 } 3071 } else if (base_info->is_process_target) { 3072 raid_bdev->num_base_bdevs_operational++; 3073 rc = raid_bdev_start_rebuild(base_info); 3074 if (rc != 0) { 3075 SPDK_ERRLOG("Failed to start rebuild: %s\n", spdk_strerror(-rc)); 3076 _raid_bdev_remove_base_bdev(base_info, NULL, NULL); 3077 } 3078 } else { 3079 rc = 0; 3080 } 3081 3082 if (base_info->configure_cb != NULL) { 3083 base_info->configure_cb(base_info->configure_cb_ctx, rc); 3084 } 3085 } 3086 3087 static void raid_bdev_examine_sb(const struct raid_bdev_superblock *sb, struct spdk_bdev *bdev, 3088 raid_base_bdev_cb cb_fn, void *cb_ctx); 3089 3090 static void 3091 raid_bdev_configure_base_bdev_check_sb_cb(const struct raid_bdev_superblock *sb, int status, 3092 void *ctx) 3093 { 3094 struct raid_base_bdev_info *base_info = ctx; 3095 3096 switch (status) { 3097 case 0: 3098 /* valid superblock found */ 3099 if (spdk_uuid_compare(&base_info->raid_bdev->bdev.uuid, &sb->uuid) == 0) { 3100 struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(base_info->desc); 3101 3102 raid_bdev_free_base_bdev_resource(base_info); 3103 raid_bdev_examine_sb(sb, bdev, base_info->configure_cb, base_info->configure_cb_ctx); 3104 return; 3105 } 3106 SPDK_ERRLOG("Superblock of a different raid bdev found on bdev %s\n", base_info->name); 3107 status = -EEXIST; 3108 raid_bdev_free_base_bdev_resource(base_info); 3109 break; 3110 case -EINVAL: 3111 /* no valid superblock */ 3112 raid_bdev_configure_base_bdev_cont(base_info); 3113 return; 3114 default: 3115 SPDK_ERRLOG("Failed to examine bdev %s: %s\n", 3116 base_info->name, spdk_strerror(-status)); 3117 break; 3118 } 3119 3120 if (base_info->configure_cb != NULL) { 3121 base_info->configure_cb(base_info->configure_cb_ctx, status); 3122 } 3123 } 3124 3125 static int 3126 raid_bdev_configure_base_bdev(struct raid_base_bdev_info *base_info, bool existing, 3127 raid_base_bdev_cb cb_fn, void *cb_ctx) 3128 { 3129 struct raid_bdev *raid_bdev = base_info->raid_bdev; 3130 struct spdk_bdev_desc *desc; 3131 struct spdk_bdev *bdev; 3132 const struct spdk_uuid *bdev_uuid; 3133 int rc; 3134 3135 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 3136 assert(base_info->desc == NULL); 3137 3138 /* 3139 * Base bdev can be added by name or uuid. Here we assure both properties are set and valid 3140 * before claiming the bdev. 3141 */ 3142 3143 if (!spdk_uuid_is_null(&base_info->uuid)) { 3144 char uuid_str[SPDK_UUID_STRING_LEN]; 3145 const char *bdev_name; 3146 3147 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &base_info->uuid); 3148 3149 /* UUID of a bdev is registered as its alias */ 3150 bdev = spdk_bdev_get_by_name(uuid_str); 3151 if (bdev == NULL) { 3152 return -ENODEV; 3153 } 3154 3155 bdev_name = spdk_bdev_get_name(bdev); 3156 3157 if (base_info->name == NULL) { 3158 assert(existing == true); 3159 base_info->name = strdup(bdev_name); 3160 if (base_info->name == NULL) { 3161 return -ENOMEM; 3162 } 3163 } else if (strcmp(base_info->name, bdev_name) != 0) { 3164 SPDK_ERRLOG("Name mismatch for base bdev '%s' - expected '%s'\n", 3165 bdev_name, base_info->name); 3166 return -EINVAL; 3167 } 3168 } 3169 3170 assert(base_info->name != NULL); 3171 3172 rc = spdk_bdev_open_ext(base_info->name, true, raid_bdev_event_base_bdev, NULL, &desc); 3173 if (rc != 0) { 3174 if (rc != -ENODEV) { 3175 SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", base_info->name); 3176 } 3177 return rc; 3178 } 3179 3180 bdev = spdk_bdev_desc_get_bdev(desc); 3181 bdev_uuid = spdk_bdev_get_uuid(bdev); 3182 3183 if (spdk_uuid_is_null(&base_info->uuid)) { 3184 spdk_uuid_copy(&base_info->uuid, bdev_uuid); 3185 } else if (spdk_uuid_compare(&base_info->uuid, bdev_uuid) != 0) { 3186 SPDK_ERRLOG("UUID mismatch for base bdev '%s'\n", base_info->name); 3187 spdk_bdev_close(desc); 3188 return -EINVAL; 3189 } 3190 3191 rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if); 3192 if (rc != 0) { 3193 SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); 3194 spdk_bdev_close(desc); 3195 return rc; 3196 } 3197 3198 SPDK_DEBUGLOG(bdev_raid, "bdev %s is claimed\n", bdev->name); 3199 3200 base_info->app_thread_ch = spdk_bdev_get_io_channel(desc); 3201 if (base_info->app_thread_ch == NULL) { 3202 SPDK_ERRLOG("Failed to get io channel\n"); 3203 spdk_bdev_module_release_bdev(bdev); 3204 spdk_bdev_close(desc); 3205 return -ENOMEM; 3206 } 3207 3208 base_info->desc = desc; 3209 base_info->blockcnt = bdev->blockcnt; 3210 3211 if (raid_bdev->superblock_enabled) { 3212 uint64_t data_offset; 3213 3214 if (base_info->data_offset == 0) { 3215 assert((RAID_BDEV_MIN_DATA_OFFSET_SIZE % spdk_bdev_get_data_block_size(bdev)) == 0); 3216 data_offset = RAID_BDEV_MIN_DATA_OFFSET_SIZE / spdk_bdev_get_data_block_size(bdev); 3217 } else { 3218 data_offset = base_info->data_offset; 3219 } 3220 3221 if (bdev->optimal_io_boundary != 0) { 3222 data_offset = spdk_divide_round_up(data_offset, 3223 bdev->optimal_io_boundary) * bdev->optimal_io_boundary; 3224 if (base_info->data_offset != 0 && base_info->data_offset != data_offset) { 3225 SPDK_WARNLOG("Data offset %lu on bdev '%s' is different than optimal value %lu\n", 3226 base_info->data_offset, base_info->name, data_offset); 3227 data_offset = base_info->data_offset; 3228 } 3229 } 3230 3231 base_info->data_offset = data_offset; 3232 } 3233 3234 if (base_info->data_offset >= bdev->blockcnt) { 3235 SPDK_ERRLOG("Data offset %lu exceeds base bdev capacity %lu on bdev '%s'\n", 3236 base_info->data_offset, bdev->blockcnt, base_info->name); 3237 rc = -EINVAL; 3238 goto out; 3239 } 3240 3241 if (base_info->data_size == 0) { 3242 base_info->data_size = bdev->blockcnt - base_info->data_offset; 3243 } else if (base_info->data_offset + base_info->data_size > bdev->blockcnt) { 3244 SPDK_ERRLOG("Data offset and size exceeds base bdev capacity %lu on bdev '%s'\n", 3245 bdev->blockcnt, base_info->name); 3246 rc = -EINVAL; 3247 goto out; 3248 } 3249 3250 if (!raid_bdev->module->dif_supported && spdk_bdev_get_dif_type(bdev) != SPDK_DIF_DISABLE) { 3251 SPDK_ERRLOG("Base bdev '%s' has DIF or DIX enabled - unsupported RAID configuration\n", 3252 bdev->name); 3253 rc = -EINVAL; 3254 goto out; 3255 } 3256 3257 /* 3258 * Set the raid bdev properties if this is the first base bdev configured, 3259 * otherwise - verify. Assumption is that all the base bdevs for any raid bdev should 3260 * have the same blocklen and metadata format. 3261 */ 3262 if (raid_bdev->bdev.blocklen == 0) { 3263 raid_bdev->bdev.blocklen = bdev->blocklen; 3264 raid_bdev->bdev.md_len = spdk_bdev_get_md_size(bdev); 3265 raid_bdev->bdev.md_interleave = spdk_bdev_is_md_interleaved(bdev); 3266 raid_bdev->bdev.dif_type = spdk_bdev_get_dif_type(bdev); 3267 raid_bdev->bdev.dif_check_flags = bdev->dif_check_flags; 3268 raid_bdev->bdev.dif_is_head_of_md = spdk_bdev_is_dif_head_of_md(bdev); 3269 } else { 3270 if (raid_bdev->bdev.blocklen != bdev->blocklen) { 3271 SPDK_ERRLOG("Raid bdev '%s' blocklen %u differs from base bdev '%s' blocklen %u\n", 3272 raid_bdev->bdev.name, raid_bdev->bdev.blocklen, bdev->name, bdev->blocklen); 3273 rc = -EINVAL; 3274 goto out; 3275 } 3276 3277 if (raid_bdev->bdev.md_len != spdk_bdev_get_md_size(bdev) || 3278 raid_bdev->bdev.md_interleave != spdk_bdev_is_md_interleaved(bdev) || 3279 raid_bdev->bdev.dif_type != spdk_bdev_get_dif_type(bdev) || 3280 raid_bdev->bdev.dif_check_flags != bdev->dif_check_flags || 3281 raid_bdev->bdev.dif_is_head_of_md != spdk_bdev_is_dif_head_of_md(bdev)) { 3282 SPDK_ERRLOG("Raid bdev '%s' has different metadata format than base bdev '%s'\n", 3283 raid_bdev->bdev.name, bdev->name); 3284 rc = -EINVAL; 3285 goto out; 3286 } 3287 } 3288 3289 base_info->configure_cb = cb_fn; 3290 base_info->configure_cb_ctx = cb_ctx; 3291 3292 if (existing) { 3293 raid_bdev_configure_base_bdev_cont(base_info); 3294 } else { 3295 /* check for existing superblock when using a new bdev */ 3296 rc = raid_bdev_load_base_bdev_superblock(desc, base_info->app_thread_ch, 3297 raid_bdev_configure_base_bdev_check_sb_cb, base_info); 3298 if (rc) { 3299 SPDK_ERRLOG("Failed to read bdev %s superblock: %s\n", 3300 bdev->name, spdk_strerror(-rc)); 3301 } 3302 } 3303 out: 3304 if (rc != 0) { 3305 raid_bdev_free_base_bdev_resource(base_info); 3306 } 3307 return rc; 3308 } 3309 3310 int 3311 raid_bdev_add_base_bdev(struct raid_bdev *raid_bdev, const char *name, 3312 raid_base_bdev_cb cb_fn, void *cb_ctx) 3313 { 3314 struct raid_base_bdev_info *base_info = NULL, *iter; 3315 int rc; 3316 3317 assert(name != NULL); 3318 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 3319 3320 if (raid_bdev->process != NULL) { 3321 SPDK_ERRLOG("raid bdev '%s' is in process\n", 3322 raid_bdev->bdev.name); 3323 return -EPERM; 3324 } 3325 3326 if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 3327 struct spdk_bdev *bdev = spdk_bdev_get_by_name(name); 3328 3329 if (bdev != NULL) { 3330 RAID_FOR_EACH_BASE_BDEV(raid_bdev, iter) { 3331 if (iter->name == NULL && 3332 spdk_uuid_compare(&bdev->uuid, &iter->uuid) == 0) { 3333 base_info = iter; 3334 break; 3335 } 3336 } 3337 } 3338 } 3339 3340 if (base_info == NULL || raid_bdev->state == RAID_BDEV_STATE_ONLINE) { 3341 RAID_FOR_EACH_BASE_BDEV(raid_bdev, iter) { 3342 if (iter->name == NULL && spdk_uuid_is_null(&iter->uuid)) { 3343 base_info = iter; 3344 break; 3345 } 3346 } 3347 } 3348 3349 if (base_info == NULL) { 3350 SPDK_ERRLOG("no empty slot found in raid bdev '%s' for new base bdev '%s'\n", 3351 raid_bdev->bdev.name, name); 3352 return -EINVAL; 3353 } 3354 3355 assert(base_info->is_configured == false); 3356 3357 if (raid_bdev->state == RAID_BDEV_STATE_ONLINE) { 3358 assert(base_info->data_size != 0); 3359 assert(base_info->desc == NULL); 3360 } 3361 3362 base_info->name = strdup(name); 3363 if (base_info->name == NULL) { 3364 return -ENOMEM; 3365 } 3366 3367 rc = raid_bdev_configure_base_bdev(base_info, false, cb_fn, cb_ctx); 3368 if (rc != 0 && (rc != -ENODEV || raid_bdev->state != RAID_BDEV_STATE_CONFIGURING)) { 3369 SPDK_ERRLOG("base bdev '%s' configure failed: %s\n", name, spdk_strerror(-rc)); 3370 free(base_info->name); 3371 base_info->name = NULL; 3372 } 3373 3374 return rc; 3375 } 3376 3377 static int 3378 raid_bdev_create_from_sb(const struct raid_bdev_superblock *sb, struct raid_bdev **raid_bdev_out) 3379 { 3380 struct raid_bdev *raid_bdev; 3381 uint8_t i; 3382 int rc; 3383 3384 rc = _raid_bdev_create(sb->name, (sb->strip_size * sb->block_size) / 1024, sb->num_base_bdevs, 3385 sb->level, true, &sb->uuid, &raid_bdev); 3386 if (rc != 0) { 3387 return rc; 3388 } 3389 3390 rc = raid_bdev_alloc_superblock(raid_bdev, sb->block_size); 3391 if (rc != 0) { 3392 raid_bdev_free(raid_bdev); 3393 return rc; 3394 } 3395 3396 assert(sb->length <= RAID_BDEV_SB_MAX_LENGTH); 3397 memcpy(raid_bdev->sb, sb, sb->length); 3398 3399 for (i = 0; i < sb->base_bdevs_size; i++) { 3400 const struct raid_bdev_sb_base_bdev *sb_base_bdev = &sb->base_bdevs[i]; 3401 struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[sb_base_bdev->slot]; 3402 3403 if (sb_base_bdev->state == RAID_SB_BASE_BDEV_CONFIGURED) { 3404 spdk_uuid_copy(&base_info->uuid, &sb_base_bdev->uuid); 3405 raid_bdev->num_base_bdevs_operational++; 3406 } 3407 3408 base_info->data_offset = sb_base_bdev->data_offset; 3409 base_info->data_size = sb_base_bdev->data_size; 3410 } 3411 3412 *raid_bdev_out = raid_bdev; 3413 return 0; 3414 } 3415 3416 static void 3417 raid_bdev_examine_no_sb(struct spdk_bdev *bdev) 3418 { 3419 struct raid_bdev *raid_bdev; 3420 struct raid_base_bdev_info *base_info; 3421 3422 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 3423 if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING || raid_bdev->sb != NULL) { 3424 continue; 3425 } 3426 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 3427 if (base_info->desc == NULL && 3428 ((base_info->name != NULL && strcmp(bdev->name, base_info->name) == 0) || 3429 spdk_uuid_compare(&base_info->uuid, &bdev->uuid) == 0)) { 3430 raid_bdev_configure_base_bdev(base_info, true, NULL, NULL); 3431 break; 3432 } 3433 } 3434 } 3435 } 3436 3437 struct raid_bdev_examine_others_ctx { 3438 struct spdk_uuid raid_bdev_uuid; 3439 uint8_t current_base_bdev_idx; 3440 raid_base_bdev_cb cb_fn; 3441 void *cb_ctx; 3442 }; 3443 3444 static void 3445 raid_bdev_examine_others_done(void *_ctx, int status) 3446 { 3447 struct raid_bdev_examine_others_ctx *ctx = _ctx; 3448 3449 if (ctx->cb_fn != NULL) { 3450 ctx->cb_fn(ctx->cb_ctx, status); 3451 } 3452 free(ctx); 3453 } 3454 3455 typedef void (*raid_bdev_examine_load_sb_cb)(struct spdk_bdev *bdev, 3456 const struct raid_bdev_superblock *sb, int status, void *ctx); 3457 static int raid_bdev_examine_load_sb(const char *bdev_name, raid_bdev_examine_load_sb_cb cb, 3458 void *cb_ctx); 3459 static void raid_bdev_examine_sb(const struct raid_bdev_superblock *sb, struct spdk_bdev *bdev, 3460 raid_base_bdev_cb cb_fn, void *cb_ctx); 3461 static void raid_bdev_examine_others(void *_ctx, int status); 3462 3463 static void 3464 raid_bdev_examine_others_load_cb(struct spdk_bdev *bdev, const struct raid_bdev_superblock *sb, 3465 int status, void *_ctx) 3466 { 3467 struct raid_bdev_examine_others_ctx *ctx = _ctx; 3468 3469 if (status != 0) { 3470 raid_bdev_examine_others_done(ctx, status); 3471 return; 3472 } 3473 3474 raid_bdev_examine_sb(sb, bdev, raid_bdev_examine_others, ctx); 3475 } 3476 3477 static void 3478 raid_bdev_examine_others(void *_ctx, int status) 3479 { 3480 struct raid_bdev_examine_others_ctx *ctx = _ctx; 3481 struct raid_bdev *raid_bdev; 3482 struct raid_base_bdev_info *base_info; 3483 char uuid_str[SPDK_UUID_STRING_LEN]; 3484 3485 if (status != 0) { 3486 goto out; 3487 } 3488 3489 raid_bdev = raid_bdev_find_by_uuid(&ctx->raid_bdev_uuid); 3490 if (raid_bdev == NULL) { 3491 status = -ENODEV; 3492 goto out; 3493 } 3494 3495 for (base_info = &raid_bdev->base_bdev_info[ctx->current_base_bdev_idx]; 3496 base_info < &raid_bdev->base_bdev_info[raid_bdev->num_base_bdevs]; 3497 base_info++) { 3498 if (base_info->is_configured || spdk_uuid_is_null(&base_info->uuid)) { 3499 continue; 3500 } 3501 3502 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &base_info->uuid); 3503 3504 if (spdk_bdev_get_by_name(uuid_str) == NULL) { 3505 continue; 3506 } 3507 3508 ctx->current_base_bdev_idx = raid_bdev_base_bdev_slot(base_info); 3509 3510 status = raid_bdev_examine_load_sb(uuid_str, raid_bdev_examine_others_load_cb, ctx); 3511 if (status != 0) { 3512 continue; 3513 } 3514 return; 3515 } 3516 out: 3517 raid_bdev_examine_others_done(ctx, status); 3518 } 3519 3520 static void 3521 raid_bdev_examine_sb(const struct raid_bdev_superblock *sb, struct spdk_bdev *bdev, 3522 raid_base_bdev_cb cb_fn, void *cb_ctx) 3523 { 3524 const struct raid_bdev_sb_base_bdev *sb_base_bdev = NULL; 3525 struct raid_bdev *raid_bdev; 3526 struct raid_base_bdev_info *iter, *base_info; 3527 uint8_t i; 3528 int rc; 3529 3530 if (sb->block_size != spdk_bdev_get_data_block_size(bdev)) { 3531 SPDK_WARNLOG("Bdev %s block size (%u) does not match the value in superblock (%u)\n", 3532 bdev->name, sb->block_size, spdk_bdev_get_data_block_size(bdev)); 3533 rc = -EINVAL; 3534 goto out; 3535 } 3536 3537 if (spdk_uuid_is_null(&sb->uuid)) { 3538 SPDK_WARNLOG("NULL raid bdev UUID in superblock on bdev %s\n", bdev->name); 3539 rc = -EINVAL; 3540 goto out; 3541 } 3542 3543 raid_bdev = raid_bdev_find_by_uuid(&sb->uuid); 3544 3545 if (raid_bdev) { 3546 if (sb->seq_number > raid_bdev->sb->seq_number) { 3547 SPDK_DEBUGLOG(bdev_raid, 3548 "raid superblock seq_number on bdev %s (%lu) greater than existing raid bdev %s (%lu)\n", 3549 bdev->name, sb->seq_number, raid_bdev->bdev.name, raid_bdev->sb->seq_number); 3550 3551 if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) { 3552 SPDK_WARNLOG("Newer version of raid bdev %s superblock found on bdev %s but raid bdev is not in configuring state.\n", 3553 raid_bdev->bdev.name, bdev->name); 3554 rc = -EBUSY; 3555 goto out; 3556 } 3557 3558 /* remove and then recreate the raid bdev using the newer superblock */ 3559 raid_bdev_delete(raid_bdev, NULL, NULL); 3560 raid_bdev = NULL; 3561 } else if (sb->seq_number < raid_bdev->sb->seq_number) { 3562 SPDK_DEBUGLOG(bdev_raid, 3563 "raid superblock seq_number on bdev %s (%lu) smaller than existing raid bdev %s (%lu)\n", 3564 bdev->name, sb->seq_number, raid_bdev->bdev.name, raid_bdev->sb->seq_number); 3565 /* use the current raid bdev superblock */ 3566 sb = raid_bdev->sb; 3567 } 3568 } 3569 3570 for (i = 0; i < sb->base_bdevs_size; i++) { 3571 sb_base_bdev = &sb->base_bdevs[i]; 3572 3573 assert(spdk_uuid_is_null(&sb_base_bdev->uuid) == false); 3574 3575 if (spdk_uuid_compare(&sb_base_bdev->uuid, spdk_bdev_get_uuid(bdev)) == 0) { 3576 break; 3577 } 3578 } 3579 3580 if (i == sb->base_bdevs_size) { 3581 SPDK_DEBUGLOG(bdev_raid, "raid superblock does not contain this bdev's uuid\n"); 3582 rc = -EINVAL; 3583 goto out; 3584 } 3585 3586 if (!raid_bdev) { 3587 struct raid_bdev_examine_others_ctx *ctx; 3588 3589 ctx = calloc(1, sizeof(*ctx)); 3590 if (ctx == NULL) { 3591 rc = -ENOMEM; 3592 goto out; 3593 } 3594 3595 rc = raid_bdev_create_from_sb(sb, &raid_bdev); 3596 if (rc != 0) { 3597 SPDK_ERRLOG("Failed to create raid bdev %s: %s\n", 3598 sb->name, spdk_strerror(-rc)); 3599 free(ctx); 3600 goto out; 3601 } 3602 3603 /* after this base bdev is configured, examine other base bdevs that may be present */ 3604 spdk_uuid_copy(&ctx->raid_bdev_uuid, &sb->uuid); 3605 ctx->cb_fn = cb_fn; 3606 ctx->cb_ctx = cb_ctx; 3607 3608 cb_fn = raid_bdev_examine_others; 3609 cb_ctx = ctx; 3610 } 3611 3612 if (raid_bdev->state == RAID_BDEV_STATE_ONLINE) { 3613 assert(sb_base_bdev->slot < raid_bdev->num_base_bdevs); 3614 base_info = &raid_bdev->base_bdev_info[sb_base_bdev->slot]; 3615 assert(base_info->is_configured == false); 3616 assert(sb_base_bdev->state == RAID_SB_BASE_BDEV_MISSING || 3617 sb_base_bdev->state == RAID_SB_BASE_BDEV_FAILED); 3618 assert(spdk_uuid_is_null(&base_info->uuid)); 3619 spdk_uuid_copy(&base_info->uuid, &sb_base_bdev->uuid); 3620 SPDK_NOTICELOG("Re-adding bdev %s to raid bdev %s.\n", bdev->name, raid_bdev->bdev.name); 3621 rc = raid_bdev_configure_base_bdev(base_info, true, cb_fn, cb_ctx); 3622 if (rc != 0) { 3623 SPDK_ERRLOG("Failed to configure bdev %s as base bdev of raid %s: %s\n", 3624 bdev->name, raid_bdev->bdev.name, spdk_strerror(-rc)); 3625 } 3626 goto out; 3627 } 3628 3629 if (sb_base_bdev->state != RAID_SB_BASE_BDEV_CONFIGURED) { 3630 SPDK_NOTICELOG("Bdev %s is not an active member of raid bdev %s. Ignoring.\n", 3631 bdev->name, raid_bdev->bdev.name); 3632 rc = -EINVAL; 3633 goto out; 3634 } 3635 3636 base_info = NULL; 3637 RAID_FOR_EACH_BASE_BDEV(raid_bdev, iter) { 3638 if (spdk_uuid_compare(&iter->uuid, spdk_bdev_get_uuid(bdev)) == 0) { 3639 base_info = iter; 3640 break; 3641 } 3642 } 3643 3644 if (base_info == NULL) { 3645 SPDK_ERRLOG("Bdev %s is not a member of raid bdev %s\n", 3646 bdev->name, raid_bdev->bdev.name); 3647 rc = -EINVAL; 3648 goto out; 3649 } 3650 3651 rc = raid_bdev_configure_base_bdev(base_info, true, cb_fn, cb_ctx); 3652 if (rc != 0) { 3653 SPDK_ERRLOG("Failed to configure bdev %s as base bdev of raid %s: %s\n", 3654 bdev->name, raid_bdev->bdev.name, spdk_strerror(-rc)); 3655 } 3656 out: 3657 if (rc != 0 && cb_fn != 0) { 3658 cb_fn(cb_ctx, rc); 3659 } 3660 } 3661 3662 struct raid_bdev_examine_ctx { 3663 struct spdk_bdev_desc *desc; 3664 struct spdk_io_channel *ch; 3665 raid_bdev_examine_load_sb_cb cb; 3666 void *cb_ctx; 3667 }; 3668 3669 static void 3670 raid_bdev_examine_ctx_free(struct raid_bdev_examine_ctx *ctx) 3671 { 3672 if (!ctx) { 3673 return; 3674 } 3675 3676 if (ctx->ch) { 3677 spdk_put_io_channel(ctx->ch); 3678 } 3679 3680 if (ctx->desc) { 3681 spdk_bdev_close(ctx->desc); 3682 } 3683 3684 free(ctx); 3685 } 3686 3687 static void 3688 raid_bdev_examine_load_sb_done(const struct raid_bdev_superblock *sb, int status, void *_ctx) 3689 { 3690 struct raid_bdev_examine_ctx *ctx = _ctx; 3691 struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(ctx->desc); 3692 3693 ctx->cb(bdev, sb, status, ctx->cb_ctx); 3694 3695 raid_bdev_examine_ctx_free(ctx); 3696 } 3697 3698 static void 3699 raid_bdev_examine_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) 3700 { 3701 } 3702 3703 static int 3704 raid_bdev_examine_load_sb(const char *bdev_name, raid_bdev_examine_load_sb_cb cb, void *cb_ctx) 3705 { 3706 struct raid_bdev_examine_ctx *ctx; 3707 int rc; 3708 3709 assert(cb != NULL); 3710 3711 ctx = calloc(1, sizeof(*ctx)); 3712 if (!ctx) { 3713 return -ENOMEM; 3714 } 3715 3716 rc = spdk_bdev_open_ext(bdev_name, false, raid_bdev_examine_event_cb, NULL, &ctx->desc); 3717 if (rc) { 3718 SPDK_ERRLOG("Failed to open bdev %s: %s\n", bdev_name, spdk_strerror(-rc)); 3719 goto err; 3720 } 3721 3722 ctx->ch = spdk_bdev_get_io_channel(ctx->desc); 3723 if (!ctx->ch) { 3724 SPDK_ERRLOG("Failed to get io channel for bdev %s\n", bdev_name); 3725 rc = -ENOMEM; 3726 goto err; 3727 } 3728 3729 ctx->cb = cb; 3730 ctx->cb_ctx = cb_ctx; 3731 3732 rc = raid_bdev_load_base_bdev_superblock(ctx->desc, ctx->ch, raid_bdev_examine_load_sb_done, ctx); 3733 if (rc) { 3734 SPDK_ERRLOG("Failed to read bdev %s superblock: %s\n", 3735 bdev_name, spdk_strerror(-rc)); 3736 goto err; 3737 } 3738 3739 return 0; 3740 err: 3741 raid_bdev_examine_ctx_free(ctx); 3742 return rc; 3743 } 3744 3745 static void 3746 raid_bdev_examine_cont(struct spdk_bdev *bdev, const struct raid_bdev_superblock *sb, int status, 3747 void *ctx) 3748 { 3749 switch (status) { 3750 case 0: 3751 /* valid superblock found */ 3752 SPDK_DEBUGLOG(bdev_raid, "raid superblock found on bdev %s\n", bdev->name); 3753 raid_bdev_examine_sb(sb, bdev, NULL, NULL); 3754 break; 3755 case -EINVAL: 3756 /* no valid superblock, check if it can be claimed anyway */ 3757 raid_bdev_examine_no_sb(bdev); 3758 break; 3759 default: 3760 SPDK_ERRLOG("Failed to examine bdev %s: %s\n", 3761 bdev->name, spdk_strerror(-status)); 3762 break; 3763 } 3764 3765 spdk_bdev_module_examine_done(&g_raid_if); 3766 } 3767 3768 /* 3769 * brief: 3770 * raid_bdev_examine function is the examine function call by the below layers 3771 * like bdev_nvme layer. This function will check if this base bdev can be 3772 * claimed by this raid bdev or not. 3773 * params: 3774 * bdev - pointer to base bdev 3775 * returns: 3776 * none 3777 */ 3778 static void 3779 raid_bdev_examine(struct spdk_bdev *bdev) 3780 { 3781 int rc; 3782 3783 if (raid_bdev_find_base_info_by_bdev(bdev) != NULL) { 3784 goto done; 3785 } 3786 3787 if (spdk_bdev_get_dif_type(bdev) != SPDK_DIF_DISABLE) { 3788 raid_bdev_examine_no_sb(bdev); 3789 goto done; 3790 } 3791 3792 rc = raid_bdev_examine_load_sb(bdev->name, raid_bdev_examine_cont, NULL); 3793 if (rc != 0) { 3794 SPDK_ERRLOG("Failed to examine bdev %s: %s\n", 3795 bdev->name, spdk_strerror(-rc)); 3796 goto done; 3797 } 3798 3799 return; 3800 done: 3801 spdk_bdev_module_examine_done(&g_raid_if); 3802 } 3803 3804 /* Log component for bdev raid bdev module */ 3805 SPDK_LOG_REGISTER_COMPONENT(bdev_raid) 3806