1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. 3 * All rights reserved. 4 * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "bdev_raid.h" 8 #include "spdk/env.h" 9 #include "spdk/thread.h" 10 #include "spdk/log.h" 11 #include "spdk/string.h" 12 #include "spdk/util.h" 13 #include "spdk/json.h" 14 #include "spdk/likely.h" 15 #include "spdk/trace.h" 16 #include "spdk_internal/trace_defs.h" 17 18 #define RAID_OFFSET_BLOCKS_INVALID UINT64_MAX 19 #define RAID_BDEV_PROCESS_MAX_QD 16 20 21 #define RAID_BDEV_PROCESS_WINDOW_SIZE_KB_DEFAULT 1024 22 #define RAID_BDEV_PROCESS_MAX_BANDWIDTH_MB_SEC_DEFAULT 0 23 24 static bool g_shutdown_started = false; 25 26 /* List of all raid bdevs */ 27 struct raid_all_tailq g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list); 28 29 static TAILQ_HEAD(, raid_bdev_module) g_raid_modules = TAILQ_HEAD_INITIALIZER(g_raid_modules); 30 31 /* 32 * raid_bdev_io_channel is the context of spdk_io_channel for raid bdev device. It 33 * contains the relationship of raid bdev io channel with base bdev io channels. 34 */ 35 struct raid_bdev_io_channel { 36 /* Array of IO channels of base bdevs */ 37 struct spdk_io_channel **base_channel; 38 39 /* Private raid module IO channel */ 40 struct spdk_io_channel *module_channel; 41 42 /* Background process data */ 43 struct { 44 uint64_t offset; 45 struct spdk_io_channel *target_ch; 46 struct raid_bdev_io_channel *ch_processed; 47 } process; 48 }; 49 50 enum raid_bdev_process_state { 51 RAID_PROCESS_STATE_INIT, 52 RAID_PROCESS_STATE_RUNNING, 53 RAID_PROCESS_STATE_STOPPING, 54 RAID_PROCESS_STATE_STOPPED, 55 }; 56 57 struct raid_process_qos { 58 bool enable_qos; 59 uint64_t last_tsc; 60 double bytes_per_tsc; 61 double bytes_available; 62 double bytes_max; 63 struct spdk_poller *process_continue_poller; 64 }; 65 66 struct raid_bdev_process { 67 struct raid_bdev *raid_bdev; 68 enum raid_process_type type; 69 enum raid_bdev_process_state state; 70 struct spdk_thread *thread; 71 struct raid_bdev_io_channel *raid_ch; 72 TAILQ_HEAD(, raid_bdev_process_request) requests; 73 uint64_t max_window_size; 74 uint64_t window_size; 75 uint64_t window_remaining; 76 int window_status; 77 uint64_t window_offset; 78 bool window_range_locked; 79 struct raid_base_bdev_info *target; 80 int status; 81 TAILQ_HEAD(, raid_process_finish_action) finish_actions; 82 struct raid_process_qos qos; 83 }; 84 85 struct raid_process_finish_action { 86 spdk_msg_fn cb; 87 void *cb_ctx; 88 TAILQ_ENTRY(raid_process_finish_action) link; 89 }; 90 91 static struct spdk_raid_bdev_opts g_opts = { 92 .process_window_size_kb = RAID_BDEV_PROCESS_WINDOW_SIZE_KB_DEFAULT, 93 .process_max_bandwidth_mb_sec = RAID_BDEV_PROCESS_MAX_BANDWIDTH_MB_SEC_DEFAULT, 94 }; 95 96 void 97 raid_bdev_get_opts(struct spdk_raid_bdev_opts *opts) 98 { 99 *opts = g_opts; 100 } 101 102 int 103 raid_bdev_set_opts(const struct spdk_raid_bdev_opts *opts) 104 { 105 if (opts->process_window_size_kb == 0) { 106 return -EINVAL; 107 } 108 109 g_opts = *opts; 110 111 return 0; 112 } 113 114 static struct raid_bdev_module * 115 raid_bdev_module_find(enum raid_level level) 116 { 117 struct raid_bdev_module *raid_module; 118 119 TAILQ_FOREACH(raid_module, &g_raid_modules, link) { 120 if (raid_module->level == level) { 121 return raid_module; 122 } 123 } 124 125 return NULL; 126 } 127 128 void 129 raid_bdev_module_list_add(struct raid_bdev_module *raid_module) 130 { 131 if (raid_bdev_module_find(raid_module->level) != NULL) { 132 SPDK_ERRLOG("module for raid level '%s' already registered.\n", 133 raid_bdev_level_to_str(raid_module->level)); 134 assert(false); 135 } else { 136 TAILQ_INSERT_TAIL(&g_raid_modules, raid_module, link); 137 } 138 } 139 140 struct spdk_io_channel * 141 raid_bdev_channel_get_base_channel(struct raid_bdev_io_channel *raid_ch, uint8_t idx) 142 { 143 return raid_ch->base_channel[idx]; 144 } 145 146 void * 147 raid_bdev_channel_get_module_ctx(struct raid_bdev_io_channel *raid_ch) 148 { 149 assert(raid_ch->module_channel != NULL); 150 151 return spdk_io_channel_get_ctx(raid_ch->module_channel); 152 } 153 154 struct raid_base_bdev_info * 155 raid_bdev_channel_get_base_info(struct raid_bdev_io_channel *raid_ch, struct spdk_bdev *base_bdev) 156 { 157 struct spdk_io_channel *ch = spdk_io_channel_from_ctx(raid_ch); 158 struct raid_bdev *raid_bdev = spdk_io_channel_get_io_device(ch); 159 uint8_t i; 160 161 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 162 struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[i]; 163 164 if (base_info->is_configured && 165 spdk_bdev_desc_get_bdev(base_info->desc) == base_bdev) { 166 return base_info; 167 } 168 } 169 170 return NULL; 171 } 172 173 /* Function declarations */ 174 static void raid_bdev_examine(struct spdk_bdev *bdev); 175 static int raid_bdev_init(void); 176 static void raid_bdev_deconfigure(struct raid_bdev *raid_bdev, 177 raid_bdev_destruct_cb cb_fn, void *cb_arg); 178 179 static void 180 raid_bdev_ch_process_cleanup(struct raid_bdev_io_channel *raid_ch) 181 { 182 raid_ch->process.offset = RAID_OFFSET_BLOCKS_INVALID; 183 184 if (raid_ch->process.target_ch != NULL) { 185 spdk_put_io_channel(raid_ch->process.target_ch); 186 raid_ch->process.target_ch = NULL; 187 } 188 189 if (raid_ch->process.ch_processed != NULL) { 190 free(raid_ch->process.ch_processed->base_channel); 191 free(raid_ch->process.ch_processed); 192 raid_ch->process.ch_processed = NULL; 193 } 194 } 195 196 static int 197 raid_bdev_ch_process_setup(struct raid_bdev_io_channel *raid_ch, struct raid_bdev_process *process) 198 { 199 struct raid_bdev *raid_bdev = process->raid_bdev; 200 struct raid_bdev_io_channel *raid_ch_processed; 201 struct raid_base_bdev_info *base_info; 202 203 raid_ch->process.offset = process->window_offset; 204 205 /* In the future we may have other types of processes which don't use a target bdev, 206 * like data scrubbing or strip size migration. Until then, expect that there always is 207 * a process target. */ 208 assert(process->target != NULL); 209 210 raid_ch->process.target_ch = spdk_bdev_get_io_channel(process->target->desc); 211 if (raid_ch->process.target_ch == NULL) { 212 goto err; 213 } 214 215 raid_ch_processed = calloc(1, sizeof(*raid_ch_processed)); 216 if (raid_ch_processed == NULL) { 217 goto err; 218 } 219 raid_ch->process.ch_processed = raid_ch_processed; 220 221 raid_ch_processed->base_channel = calloc(raid_bdev->num_base_bdevs, 222 sizeof(*raid_ch_processed->base_channel)); 223 if (raid_ch_processed->base_channel == NULL) { 224 goto err; 225 } 226 227 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 228 uint8_t slot = raid_bdev_base_bdev_slot(base_info); 229 230 if (base_info != process->target) { 231 raid_ch_processed->base_channel[slot] = raid_ch->base_channel[slot]; 232 } else { 233 raid_ch_processed->base_channel[slot] = raid_ch->process.target_ch; 234 } 235 } 236 237 raid_ch_processed->module_channel = raid_ch->module_channel; 238 raid_ch_processed->process.offset = RAID_OFFSET_BLOCKS_INVALID; 239 240 return 0; 241 err: 242 raid_bdev_ch_process_cleanup(raid_ch); 243 return -ENOMEM; 244 } 245 246 /* 247 * brief: 248 * raid_bdev_create_cb function is a cb function for raid bdev which creates the 249 * hierarchy from raid bdev to base bdev io channels. It will be called per core 250 * params: 251 * io_device - pointer to raid bdev io device represented by raid_bdev 252 * ctx_buf - pointer to context buffer for raid bdev io channel 253 * returns: 254 * 0 - success 255 * non zero - failure 256 */ 257 static int 258 raid_bdev_create_cb(void *io_device, void *ctx_buf) 259 { 260 struct raid_bdev *raid_bdev = io_device; 261 struct raid_bdev_io_channel *raid_ch = ctx_buf; 262 uint8_t i; 263 int ret = -ENOMEM; 264 265 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_create_cb, %p\n", raid_ch); 266 267 assert(raid_bdev != NULL); 268 assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); 269 270 raid_ch->base_channel = calloc(raid_bdev->num_base_bdevs, sizeof(struct spdk_io_channel *)); 271 if (!raid_ch->base_channel) { 272 SPDK_ERRLOG("Unable to allocate base bdevs io channel\n"); 273 return -ENOMEM; 274 } 275 276 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 277 /* 278 * Get the spdk_io_channel for all the base bdevs. This is used during 279 * split logic to send the respective child bdev ios to respective base 280 * bdev io channel. 281 * Skip missing base bdevs and the process target, which should also be treated as 282 * missing until the process completes. 283 */ 284 if (raid_bdev->base_bdev_info[i].is_configured == false || 285 raid_bdev->base_bdev_info[i].is_process_target == true) { 286 continue; 287 } 288 raid_ch->base_channel[i] = spdk_bdev_get_io_channel( 289 raid_bdev->base_bdev_info[i].desc); 290 if (!raid_ch->base_channel[i]) { 291 SPDK_ERRLOG("Unable to create io channel for base bdev\n"); 292 goto err; 293 } 294 } 295 296 if (raid_bdev->module->get_io_channel) { 297 raid_ch->module_channel = raid_bdev->module->get_io_channel(raid_bdev); 298 if (!raid_ch->module_channel) { 299 SPDK_ERRLOG("Unable to create io channel for raid module\n"); 300 goto err; 301 } 302 } 303 304 if (raid_bdev->process != NULL) { 305 ret = raid_bdev_ch_process_setup(raid_ch, raid_bdev->process); 306 if (ret != 0) { 307 SPDK_ERRLOG("Failed to setup process io channel\n"); 308 goto err; 309 } 310 } else { 311 raid_ch->process.offset = RAID_OFFSET_BLOCKS_INVALID; 312 } 313 314 return 0; 315 err: 316 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 317 if (raid_ch->base_channel[i] != NULL) { 318 spdk_put_io_channel(raid_ch->base_channel[i]); 319 } 320 } 321 free(raid_ch->base_channel); 322 323 raid_bdev_ch_process_cleanup(raid_ch); 324 325 return ret; 326 } 327 328 /* 329 * brief: 330 * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the 331 * hierarchy from raid bdev to base bdev io channels. It will be called per core 332 * params: 333 * io_device - pointer to raid bdev io device represented by raid_bdev 334 * ctx_buf - pointer to context buffer for raid bdev io channel 335 * returns: 336 * none 337 */ 338 static void 339 raid_bdev_destroy_cb(void *io_device, void *ctx_buf) 340 { 341 struct raid_bdev *raid_bdev = io_device; 342 struct raid_bdev_io_channel *raid_ch = ctx_buf; 343 uint8_t i; 344 345 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destroy_cb\n"); 346 347 assert(raid_ch != NULL); 348 assert(raid_ch->base_channel); 349 350 if (raid_ch->module_channel) { 351 spdk_put_io_channel(raid_ch->module_channel); 352 } 353 354 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 355 /* Free base bdev channels */ 356 if (raid_ch->base_channel[i] != NULL) { 357 spdk_put_io_channel(raid_ch->base_channel[i]); 358 } 359 } 360 free(raid_ch->base_channel); 361 raid_ch->base_channel = NULL; 362 363 raid_bdev_ch_process_cleanup(raid_ch); 364 } 365 366 /* 367 * brief: 368 * raid_bdev_cleanup is used to cleanup raid_bdev related data 369 * structures. 370 * params: 371 * raid_bdev - pointer to raid_bdev 372 * returns: 373 * none 374 */ 375 static void 376 raid_bdev_cleanup(struct raid_bdev *raid_bdev) 377 { 378 struct raid_base_bdev_info *base_info; 379 380 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_cleanup, %p name %s, state %s\n", 381 raid_bdev, raid_bdev->bdev.name, raid_bdev_state_to_str(raid_bdev->state)); 382 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 383 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 384 385 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 386 assert(base_info->desc == NULL); 387 free(base_info->name); 388 } 389 390 TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link); 391 } 392 393 static void 394 raid_bdev_free(struct raid_bdev *raid_bdev) 395 { 396 raid_bdev_free_superblock(raid_bdev); 397 free(raid_bdev->base_bdev_info); 398 free(raid_bdev->bdev.name); 399 free(raid_bdev); 400 } 401 402 static void 403 raid_bdev_cleanup_and_free(struct raid_bdev *raid_bdev) 404 { 405 raid_bdev_cleanup(raid_bdev); 406 raid_bdev_free(raid_bdev); 407 } 408 409 static void 410 raid_bdev_deconfigure_base_bdev(struct raid_base_bdev_info *base_info) 411 { 412 struct raid_bdev *raid_bdev = base_info->raid_bdev; 413 414 assert(base_info->is_configured); 415 assert(raid_bdev->num_base_bdevs_discovered); 416 raid_bdev->num_base_bdevs_discovered--; 417 base_info->is_configured = false; 418 base_info->is_process_target = false; 419 } 420 421 /* 422 * brief: 423 * free resource of base bdev for raid bdev 424 * params: 425 * base_info - raid base bdev info 426 * returns: 427 * none 428 */ 429 static void 430 raid_bdev_free_base_bdev_resource(struct raid_base_bdev_info *base_info) 431 { 432 struct raid_bdev *raid_bdev = base_info->raid_bdev; 433 434 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 435 assert(base_info->configure_cb == NULL); 436 437 free(base_info->name); 438 base_info->name = NULL; 439 if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) { 440 spdk_uuid_set_null(&base_info->uuid); 441 } 442 base_info->is_failed = false; 443 444 /* clear `data_offset` to allow it to be recalculated during configuration */ 445 base_info->data_offset = 0; 446 447 if (base_info->desc == NULL) { 448 return; 449 } 450 451 spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(base_info->desc)); 452 spdk_bdev_close(base_info->desc); 453 base_info->desc = NULL; 454 spdk_put_io_channel(base_info->app_thread_ch); 455 base_info->app_thread_ch = NULL; 456 457 if (base_info->is_configured) { 458 raid_bdev_deconfigure_base_bdev(base_info); 459 } 460 } 461 462 static void 463 raid_bdev_io_device_unregister_cb(void *io_device) 464 { 465 struct raid_bdev *raid_bdev = io_device; 466 467 if (raid_bdev->num_base_bdevs_discovered == 0) { 468 /* Free raid_bdev when there are no base bdevs left */ 469 SPDK_DEBUGLOG(bdev_raid, "raid bdev base bdevs is 0, going to free all in destruct\n"); 470 raid_bdev_cleanup(raid_bdev); 471 spdk_bdev_destruct_done(&raid_bdev->bdev, 0); 472 raid_bdev_free(raid_bdev); 473 } else { 474 spdk_bdev_destruct_done(&raid_bdev->bdev, 0); 475 } 476 } 477 478 void 479 raid_bdev_module_stop_done(struct raid_bdev *raid_bdev) 480 { 481 if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) { 482 spdk_io_device_unregister(raid_bdev, raid_bdev_io_device_unregister_cb); 483 } 484 } 485 486 static void 487 _raid_bdev_destruct(void *ctxt) 488 { 489 struct raid_bdev *raid_bdev = ctxt; 490 struct raid_base_bdev_info *base_info; 491 492 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destruct\n"); 493 494 assert(raid_bdev->process == NULL); 495 496 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 497 /* 498 * Close all base bdev descriptors for which call has come from below 499 * layers. Also close the descriptors if we have started shutdown. 500 */ 501 if (g_shutdown_started || base_info->remove_scheduled == true) { 502 raid_bdev_free_base_bdev_resource(base_info); 503 } 504 } 505 506 if (g_shutdown_started) { 507 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 508 } 509 510 if (raid_bdev->module->stop != NULL) { 511 if (raid_bdev->module->stop(raid_bdev) == false) { 512 return; 513 } 514 } 515 516 raid_bdev_module_stop_done(raid_bdev); 517 } 518 519 static int 520 raid_bdev_destruct(void *ctx) 521 { 522 spdk_thread_exec_msg(spdk_thread_get_app_thread(), _raid_bdev_destruct, ctx); 523 524 return 1; 525 } 526 527 int 528 raid_bdev_remap_dix_reftag(void *md_buf, uint64_t num_blocks, 529 struct spdk_bdev *bdev, uint32_t remapped_offset) 530 { 531 struct spdk_dif_ctx dif_ctx; 532 struct spdk_dif_error err_blk = {}; 533 int rc; 534 struct spdk_dif_ctx_init_ext_opts dif_opts; 535 struct iovec md_iov = { 536 .iov_base = md_buf, 537 .iov_len = num_blocks * bdev->md_len, 538 }; 539 540 if (md_buf == NULL) { 541 return 0; 542 } 543 544 dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format); 545 dif_opts.dif_pi_format = bdev->dif_pi_format; 546 rc = spdk_dif_ctx_init(&dif_ctx, 547 bdev->blocklen, bdev->md_len, bdev->md_interleave, 548 bdev->dif_is_head_of_md, bdev->dif_type, 549 SPDK_DIF_FLAGS_REFTAG_CHECK, 550 0, 0, 0, 0, 0, &dif_opts); 551 if (rc != 0) { 552 SPDK_ERRLOG("Initialization of DIF context failed\n"); 553 return rc; 554 } 555 556 spdk_dif_ctx_set_remapped_init_ref_tag(&dif_ctx, remapped_offset); 557 558 rc = spdk_dix_remap_ref_tag(&md_iov, num_blocks, &dif_ctx, &err_blk, false); 559 if (rc != 0) { 560 SPDK_ERRLOG("Remapping reference tag failed. type=%d, offset=%d" 561 PRIu32 "\n", err_blk.err_type, err_blk.err_offset); 562 } 563 564 return rc; 565 } 566 567 int 568 raid_bdev_verify_dix_reftag(struct iovec *iovs, int iovcnt, void *md_buf, 569 uint64_t num_blocks, struct spdk_bdev *bdev, uint32_t offset_blocks) 570 { 571 struct spdk_dif_ctx dif_ctx; 572 struct spdk_dif_error err_blk = {}; 573 int rc; 574 struct spdk_dif_ctx_init_ext_opts dif_opts; 575 struct iovec md_iov = { 576 .iov_base = md_buf, 577 .iov_len = num_blocks * bdev->md_len, 578 }; 579 580 if (md_buf == NULL) { 581 return 0; 582 } 583 584 dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format); 585 dif_opts.dif_pi_format = bdev->dif_pi_format; 586 rc = spdk_dif_ctx_init(&dif_ctx, 587 bdev->blocklen, bdev->md_len, bdev->md_interleave, 588 bdev->dif_is_head_of_md, bdev->dif_type, 589 SPDK_DIF_FLAGS_REFTAG_CHECK, 590 offset_blocks, 0, 0, 0, 0, &dif_opts); 591 if (rc != 0) { 592 SPDK_ERRLOG("Initialization of DIF context failed\n"); 593 return rc; 594 } 595 596 rc = spdk_dix_verify(iovs, iovcnt, &md_iov, num_blocks, &dif_ctx, &err_blk); 597 if (rc != 0) { 598 SPDK_ERRLOG("Reference tag check failed. type=%d, offset=%d" 599 PRIu32 "\n", err_blk.err_type, err_blk.err_offset); 600 } 601 602 return rc; 603 } 604 605 void 606 raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status) 607 { 608 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 609 int rc; 610 611 spdk_trace_record(TRACE_BDEV_RAID_IO_DONE, 0, 0, (uintptr_t)raid_io, (uintptr_t)bdev_io); 612 613 if (raid_io->split.offset != RAID_OFFSET_BLOCKS_INVALID) { 614 struct iovec *split_iov = raid_io->split.iov; 615 const struct iovec *split_iov_orig = &raid_io->split.iov_copy; 616 617 /* 618 * Non-zero offset here means that this is the completion of the first part of the 619 * split I/O (the higher LBAs). Then, we submit the second part and set offset to 0. 620 */ 621 if (raid_io->split.offset != 0) { 622 raid_io->offset_blocks = bdev_io->u.bdev.offset_blocks; 623 raid_io->md_buf = bdev_io->u.bdev.md_buf; 624 625 if (status == SPDK_BDEV_IO_STATUS_SUCCESS) { 626 raid_io->num_blocks = raid_io->split.offset; 627 raid_io->iovcnt = raid_io->iovs - bdev_io->u.bdev.iovs; 628 raid_io->iovs = bdev_io->u.bdev.iovs; 629 if (split_iov != NULL) { 630 raid_io->iovcnt++; 631 split_iov->iov_len = split_iov->iov_base - split_iov_orig->iov_base; 632 split_iov->iov_base = split_iov_orig->iov_base; 633 } 634 635 raid_io->split.offset = 0; 636 raid_io->base_bdev_io_submitted = 0; 637 raid_io->raid_ch = raid_io->raid_ch->process.ch_processed; 638 639 raid_io->raid_bdev->module->submit_rw_request(raid_io); 640 return; 641 } 642 } 643 644 raid_io->num_blocks = bdev_io->u.bdev.num_blocks; 645 raid_io->iovcnt = bdev_io->u.bdev.iovcnt; 646 raid_io->iovs = bdev_io->u.bdev.iovs; 647 if (split_iov != NULL) { 648 *split_iov = *split_iov_orig; 649 } 650 } 651 652 if (spdk_unlikely(raid_io->completion_cb != NULL)) { 653 raid_io->completion_cb(raid_io, status); 654 } else { 655 if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_READ && 656 spdk_bdev_get_dif_type(bdev_io->bdev) != SPDK_DIF_DISABLE && 657 bdev_io->bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK && 658 status == SPDK_BDEV_IO_STATUS_SUCCESS)) { 659 660 rc = raid_bdev_remap_dix_reftag(bdev_io->u.bdev.md_buf, 661 bdev_io->u.bdev.num_blocks, bdev_io->bdev, 662 bdev_io->u.bdev.offset_blocks); 663 if (rc != 0) { 664 status = SPDK_BDEV_IO_STATUS_FAILED; 665 } 666 } 667 spdk_bdev_io_complete(bdev_io, status); 668 } 669 } 670 671 /* 672 * brief: 673 * raid_bdev_io_complete_part - signal the completion of a part of the expected 674 * base bdev IOs and complete the raid_io if this is the final expected IO. 675 * The caller should first set raid_io->base_bdev_io_remaining. This function 676 * will decrement this counter by the value of the 'completed' parameter and 677 * complete the raid_io if the counter reaches 0. The caller is free to 678 * interpret the 'base_bdev_io_remaining' and 'completed' values as needed, 679 * it can represent e.g. blocks or IOs. 680 * params: 681 * raid_io - pointer to raid_bdev_io 682 * completed - the part of the raid_io that has been completed 683 * status - status of the base IO 684 * returns: 685 * true - if the raid_io is completed 686 * false - otherwise 687 */ 688 bool 689 raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed, 690 enum spdk_bdev_io_status status) 691 { 692 assert(raid_io->base_bdev_io_remaining >= completed); 693 raid_io->base_bdev_io_remaining -= completed; 694 695 if (status != raid_io->base_bdev_io_status_default) { 696 raid_io->base_bdev_io_status = status; 697 } 698 699 if (raid_io->base_bdev_io_remaining == 0) { 700 raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); 701 return true; 702 } else { 703 return false; 704 } 705 } 706 707 /* 708 * brief: 709 * raid_bdev_queue_io_wait function processes the IO which failed to submit. 710 * It will try to queue the IOs after storing the context to bdev wait queue logic. 711 * params: 712 * raid_io - pointer to raid_bdev_io 713 * bdev - the block device that the IO is submitted to 714 * ch - io channel 715 * cb_fn - callback when the spdk_bdev_io for bdev becomes available 716 * returns: 717 * none 718 */ 719 void 720 raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, 721 struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn) 722 { 723 raid_io->waitq_entry.bdev = bdev; 724 raid_io->waitq_entry.cb_fn = cb_fn; 725 raid_io->waitq_entry.cb_arg = raid_io; 726 spdk_bdev_queue_io_wait(bdev, ch, &raid_io->waitq_entry); 727 } 728 729 static void 730 raid_base_bdev_reset_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 731 { 732 struct raid_bdev_io *raid_io = cb_arg; 733 734 spdk_bdev_free_io(bdev_io); 735 736 raid_bdev_io_complete_part(raid_io, 1, success ? 737 SPDK_BDEV_IO_STATUS_SUCCESS : 738 SPDK_BDEV_IO_STATUS_FAILED); 739 } 740 741 static void raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io); 742 743 static void 744 _raid_bdev_submit_reset_request(void *_raid_io) 745 { 746 struct raid_bdev_io *raid_io = _raid_io; 747 748 raid_bdev_submit_reset_request(raid_io); 749 } 750 751 /* 752 * brief: 753 * raid_bdev_submit_reset_request function submits reset requests 754 * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in 755 * which case it will queue it for later submission 756 * params: 757 * raid_io 758 * returns: 759 * none 760 */ 761 static void 762 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io) 763 { 764 struct raid_bdev *raid_bdev; 765 int ret; 766 uint8_t i; 767 struct raid_base_bdev_info *base_info; 768 struct spdk_io_channel *base_ch; 769 770 raid_bdev = raid_io->raid_bdev; 771 772 if (raid_io->base_bdev_io_remaining == 0) { 773 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; 774 } 775 776 for (i = raid_io->base_bdev_io_submitted; i < raid_bdev->num_base_bdevs; i++) { 777 base_info = &raid_bdev->base_bdev_info[i]; 778 base_ch = raid_io->raid_ch->base_channel[i]; 779 if (base_ch == NULL) { 780 raid_io->base_bdev_io_submitted++; 781 raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS); 782 continue; 783 } 784 ret = spdk_bdev_reset(base_info->desc, base_ch, 785 raid_base_bdev_reset_complete, raid_io); 786 if (ret == 0) { 787 raid_io->base_bdev_io_submitted++; 788 } else if (ret == -ENOMEM) { 789 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), 790 base_ch, _raid_bdev_submit_reset_request); 791 return; 792 } else { 793 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 794 assert(false); 795 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 796 return; 797 } 798 } 799 } 800 801 static void 802 raid_bdev_io_split(struct raid_bdev_io *raid_io, uint64_t split_offset) 803 { 804 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 805 size_t iov_offset = split_offset * raid_bdev->bdev.blocklen; 806 int i; 807 808 assert(split_offset != 0); 809 assert(raid_io->split.offset == RAID_OFFSET_BLOCKS_INVALID); 810 raid_io->split.offset = split_offset; 811 812 raid_io->offset_blocks += split_offset; 813 raid_io->num_blocks -= split_offset; 814 if (raid_io->md_buf != NULL) { 815 raid_io->md_buf += (split_offset * raid_bdev->bdev.md_len); 816 } 817 818 for (i = 0; i < raid_io->iovcnt; i++) { 819 struct iovec *iov = &raid_io->iovs[i]; 820 821 if (iov_offset < iov->iov_len) { 822 if (iov_offset == 0) { 823 raid_io->split.iov = NULL; 824 } else { 825 raid_io->split.iov = iov; 826 raid_io->split.iov_copy = *iov; 827 iov->iov_base += iov_offset; 828 iov->iov_len -= iov_offset; 829 } 830 raid_io->iovs += i; 831 raid_io->iovcnt -= i; 832 break; 833 } 834 835 iov_offset -= iov->iov_len; 836 } 837 } 838 839 static void 840 raid_bdev_submit_rw_request(struct raid_bdev_io *raid_io) 841 { 842 struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; 843 844 if (raid_ch->process.offset != RAID_OFFSET_BLOCKS_INVALID) { 845 uint64_t offset_begin = raid_io->offset_blocks; 846 uint64_t offset_end = offset_begin + raid_io->num_blocks; 847 848 if (offset_end > raid_ch->process.offset) { 849 if (offset_begin < raid_ch->process.offset) { 850 /* 851 * If the I/O spans both the processed and unprocessed ranges, 852 * split it and first handle the unprocessed part. After it 853 * completes, the rest will be handled. 854 * This situation occurs when the process thread is not active 855 * or is waiting for the process window range to be locked 856 * (quiesced). When a window is being processed, such I/Os will be 857 * deferred by the bdev layer until the window is unlocked. 858 */ 859 SPDK_DEBUGLOG(bdev_raid, "split: process_offset: %lu offset_begin: %lu offset_end: %lu\n", 860 raid_ch->process.offset, offset_begin, offset_end); 861 raid_bdev_io_split(raid_io, raid_ch->process.offset - offset_begin); 862 } 863 } else { 864 /* Use the child channel, which corresponds to the already processed range */ 865 raid_io->raid_ch = raid_ch->process.ch_processed; 866 } 867 } 868 869 raid_io->raid_bdev->module->submit_rw_request(raid_io); 870 } 871 872 /* 873 * brief: 874 * Callback function to spdk_bdev_io_get_buf. 875 * params: 876 * ch - pointer to raid bdev io channel 877 * bdev_io - pointer to parent bdev_io on raid bdev device 878 * success - True if buffer is allocated or false otherwise. 879 * returns: 880 * none 881 */ 882 static void 883 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 884 bool success) 885 { 886 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 887 888 if (!success) { 889 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 890 return; 891 } 892 893 raid_bdev_submit_rw_request(raid_io); 894 } 895 896 void 897 raid_bdev_io_init(struct raid_bdev_io *raid_io, struct raid_bdev_io_channel *raid_ch, 898 enum spdk_bdev_io_type type, uint64_t offset_blocks, 899 uint64_t num_blocks, struct iovec *iovs, int iovcnt, void *md_buf, 900 struct spdk_memory_domain *memory_domain, void *memory_domain_ctx) 901 { 902 struct spdk_io_channel *ch = spdk_io_channel_from_ctx(raid_ch); 903 struct raid_bdev *raid_bdev = spdk_io_channel_get_io_device(ch); 904 905 raid_io->type = type; 906 raid_io->offset_blocks = offset_blocks; 907 raid_io->num_blocks = num_blocks; 908 raid_io->iovs = iovs; 909 raid_io->iovcnt = iovcnt; 910 raid_io->memory_domain = memory_domain; 911 raid_io->memory_domain_ctx = memory_domain_ctx; 912 raid_io->md_buf = md_buf; 913 914 raid_io->raid_bdev = raid_bdev; 915 raid_io->raid_ch = raid_ch; 916 raid_io->base_bdev_io_remaining = 0; 917 raid_io->base_bdev_io_submitted = 0; 918 raid_io->completion_cb = NULL; 919 raid_io->split.offset = RAID_OFFSET_BLOCKS_INVALID; 920 921 raid_bdev_io_set_default_status(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS); 922 } 923 924 /* 925 * brief: 926 * raid_bdev_submit_request function is the submit_request function pointer of 927 * raid bdev function table. This is used to submit the io on raid_bdev to below 928 * layers. 929 * params: 930 * ch - pointer to raid bdev io channel 931 * bdev_io - pointer to parent bdev_io on raid bdev device 932 * returns: 933 * none 934 */ 935 static void 936 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 937 { 938 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 939 940 raid_bdev_io_init(raid_io, spdk_io_channel_get_ctx(ch), bdev_io->type, 941 bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks, 942 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.md_buf, 943 bdev_io->u.bdev.memory_domain, bdev_io->u.bdev.memory_domain_ctx); 944 945 spdk_trace_record(TRACE_BDEV_RAID_IO_START, 0, 0, (uintptr_t)raid_io, (uintptr_t)bdev_io); 946 947 switch (bdev_io->type) { 948 case SPDK_BDEV_IO_TYPE_READ: 949 spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb, 950 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 951 break; 952 case SPDK_BDEV_IO_TYPE_WRITE: 953 raid_bdev_submit_rw_request(raid_io); 954 break; 955 956 case SPDK_BDEV_IO_TYPE_RESET: 957 raid_bdev_submit_reset_request(raid_io); 958 break; 959 960 case SPDK_BDEV_IO_TYPE_FLUSH: 961 case SPDK_BDEV_IO_TYPE_UNMAP: 962 if (raid_io->raid_bdev->process != NULL) { 963 /* TODO: rebuild support */ 964 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 965 return; 966 } 967 raid_io->raid_bdev->module->submit_null_payload_request(raid_io); 968 break; 969 970 default: 971 SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type); 972 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 973 break; 974 } 975 } 976 977 /* 978 * brief: 979 * _raid_bdev_io_type_supported checks whether io_type is supported in 980 * all base bdev modules of raid bdev module. If anyone among the base_bdevs 981 * doesn't support, the raid device doesn't supports. 982 * 983 * params: 984 * raid_bdev - pointer to raid bdev context 985 * io_type - io type 986 * returns: 987 * true - io_type is supported 988 * false - io_type is not supported 989 */ 990 inline static bool 991 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type) 992 { 993 struct raid_base_bdev_info *base_info; 994 995 if (io_type == SPDK_BDEV_IO_TYPE_FLUSH || 996 io_type == SPDK_BDEV_IO_TYPE_UNMAP) { 997 if (raid_bdev->module->submit_null_payload_request == NULL) { 998 return false; 999 } 1000 } 1001 1002 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1003 if (base_info->desc == NULL) { 1004 continue; 1005 } 1006 1007 if (spdk_bdev_io_type_supported(spdk_bdev_desc_get_bdev(base_info->desc), io_type) == false) { 1008 return false; 1009 } 1010 } 1011 1012 return true; 1013 } 1014 1015 /* 1016 * brief: 1017 * raid_bdev_io_type_supported is the io_supported function for bdev function 1018 * table which returns whether the particular io type is supported or not by 1019 * raid bdev module 1020 * params: 1021 * ctx - pointer to raid bdev context 1022 * type - io type 1023 * returns: 1024 * true - io_type is supported 1025 * false - io_type is not supported 1026 */ 1027 static bool 1028 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 1029 { 1030 switch (io_type) { 1031 case SPDK_BDEV_IO_TYPE_READ: 1032 case SPDK_BDEV_IO_TYPE_WRITE: 1033 return true; 1034 1035 case SPDK_BDEV_IO_TYPE_FLUSH: 1036 case SPDK_BDEV_IO_TYPE_RESET: 1037 case SPDK_BDEV_IO_TYPE_UNMAP: 1038 return _raid_bdev_io_type_supported(ctx, io_type); 1039 1040 default: 1041 return false; 1042 } 1043 1044 return false; 1045 } 1046 1047 /* 1048 * brief: 1049 * raid_bdev_get_io_channel is the get_io_channel function table pointer for 1050 * raid bdev. This is used to return the io channel for this raid bdev 1051 * params: 1052 * ctxt - pointer to raid_bdev 1053 * returns: 1054 * pointer to io channel for raid bdev 1055 */ 1056 static struct spdk_io_channel * 1057 raid_bdev_get_io_channel(void *ctxt) 1058 { 1059 struct raid_bdev *raid_bdev = ctxt; 1060 1061 return spdk_get_io_channel(raid_bdev); 1062 } 1063 1064 void 1065 raid_bdev_write_info_json(struct raid_bdev *raid_bdev, struct spdk_json_write_ctx *w) 1066 { 1067 struct raid_base_bdev_info *base_info; 1068 1069 assert(raid_bdev != NULL); 1070 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1071 1072 spdk_json_write_named_uuid(w, "uuid", &raid_bdev->bdev.uuid); 1073 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 1074 spdk_json_write_named_string(w, "state", raid_bdev_state_to_str(raid_bdev->state)); 1075 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 1076 spdk_json_write_named_bool(w, "superblock", raid_bdev->superblock_enabled); 1077 spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs); 1078 spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered); 1079 spdk_json_write_named_uint32(w, "num_base_bdevs_operational", 1080 raid_bdev->num_base_bdevs_operational); 1081 if (raid_bdev->process) { 1082 struct raid_bdev_process *process = raid_bdev->process; 1083 uint64_t offset = process->window_offset; 1084 1085 spdk_json_write_named_object_begin(w, "process"); 1086 spdk_json_write_name(w, "type"); 1087 spdk_json_write_string(w, raid_bdev_process_to_str(process->type)); 1088 spdk_json_write_named_string(w, "target", process->target->name); 1089 spdk_json_write_named_object_begin(w, "progress"); 1090 spdk_json_write_named_uint64(w, "blocks", offset); 1091 spdk_json_write_named_uint32(w, "percent", offset * 100.0 / raid_bdev->bdev.blockcnt); 1092 spdk_json_write_object_end(w); 1093 spdk_json_write_object_end(w); 1094 } 1095 spdk_json_write_name(w, "base_bdevs_list"); 1096 spdk_json_write_array_begin(w); 1097 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1098 spdk_json_write_object_begin(w); 1099 spdk_json_write_name(w, "name"); 1100 if (base_info->name) { 1101 spdk_json_write_string(w, base_info->name); 1102 } else { 1103 spdk_json_write_null(w); 1104 } 1105 spdk_json_write_named_uuid(w, "uuid", &base_info->uuid); 1106 spdk_json_write_named_bool(w, "is_configured", base_info->is_configured); 1107 spdk_json_write_named_uint64(w, "data_offset", base_info->data_offset); 1108 spdk_json_write_named_uint64(w, "data_size", base_info->data_size); 1109 spdk_json_write_object_end(w); 1110 } 1111 spdk_json_write_array_end(w); 1112 } 1113 1114 /* 1115 * brief: 1116 * raid_bdev_dump_info_json is the function table pointer for raid bdev 1117 * params: 1118 * ctx - pointer to raid_bdev 1119 * w - pointer to json context 1120 * returns: 1121 * 0 - success 1122 * non zero - failure 1123 */ 1124 static int 1125 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 1126 { 1127 struct raid_bdev *raid_bdev = ctx; 1128 1129 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_dump_config_json\n"); 1130 1131 /* Dump the raid bdev configuration related information */ 1132 spdk_json_write_named_object_begin(w, "raid"); 1133 raid_bdev_write_info_json(raid_bdev, w); 1134 spdk_json_write_object_end(w); 1135 1136 return 0; 1137 } 1138 1139 /* 1140 * brief: 1141 * raid_bdev_write_config_json is the function table pointer for raid bdev 1142 * params: 1143 * bdev - pointer to spdk_bdev 1144 * w - pointer to json context 1145 * returns: 1146 * none 1147 */ 1148 static void 1149 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 1150 { 1151 struct raid_bdev *raid_bdev = bdev->ctxt; 1152 struct raid_base_bdev_info *base_info; 1153 1154 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1155 1156 if (raid_bdev->superblock_enabled) { 1157 /* raid bdev configuration is stored in the superblock */ 1158 return; 1159 } 1160 1161 spdk_json_write_object_begin(w); 1162 1163 spdk_json_write_named_string(w, "method", "bdev_raid_create"); 1164 1165 spdk_json_write_named_object_begin(w, "params"); 1166 spdk_json_write_named_string(w, "name", bdev->name); 1167 spdk_json_write_named_uuid(w, "uuid", &raid_bdev->bdev.uuid); 1168 if (raid_bdev->strip_size_kb != 0) { 1169 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 1170 } 1171 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 1172 1173 spdk_json_write_named_array_begin(w, "base_bdevs"); 1174 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1175 if (base_info->name) { 1176 spdk_json_write_string(w, base_info->name); 1177 } else { 1178 char str[32]; 1179 1180 snprintf(str, sizeof(str), "removed_base_bdev_%u", raid_bdev_base_bdev_slot(base_info)); 1181 spdk_json_write_string(w, str); 1182 } 1183 } 1184 spdk_json_write_array_end(w); 1185 spdk_json_write_object_end(w); 1186 1187 spdk_json_write_object_end(w); 1188 } 1189 1190 static int 1191 raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size) 1192 { 1193 struct raid_bdev *raid_bdev = ctx; 1194 struct raid_base_bdev_info *base_info; 1195 int domains_count = 0, rc = 0; 1196 1197 if (raid_bdev->module->memory_domains_supported == false) { 1198 return 0; 1199 } 1200 1201 /* First loop to get the number of memory domains */ 1202 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1203 if (base_info->is_configured == false) { 1204 continue; 1205 } 1206 rc = spdk_bdev_get_memory_domains(spdk_bdev_desc_get_bdev(base_info->desc), NULL, 0); 1207 if (rc < 0) { 1208 return rc; 1209 } 1210 domains_count += rc; 1211 } 1212 1213 if (!domains || array_size < domains_count) { 1214 return domains_count; 1215 } 1216 1217 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1218 if (base_info->is_configured == false) { 1219 continue; 1220 } 1221 rc = spdk_bdev_get_memory_domains(spdk_bdev_desc_get_bdev(base_info->desc), domains, array_size); 1222 if (rc < 0) { 1223 return rc; 1224 } 1225 domains += rc; 1226 array_size -= rc; 1227 } 1228 1229 return domains_count; 1230 } 1231 1232 /* g_raid_bdev_fn_table is the function table for raid bdev */ 1233 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = { 1234 .destruct = raid_bdev_destruct, 1235 .submit_request = raid_bdev_submit_request, 1236 .io_type_supported = raid_bdev_io_type_supported, 1237 .get_io_channel = raid_bdev_get_io_channel, 1238 .dump_info_json = raid_bdev_dump_info_json, 1239 .write_config_json = raid_bdev_write_config_json, 1240 .get_memory_domains = raid_bdev_get_memory_domains, 1241 }; 1242 1243 struct raid_bdev * 1244 raid_bdev_find_by_name(const char *name) 1245 { 1246 struct raid_bdev *raid_bdev; 1247 1248 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1249 if (strcmp(raid_bdev->bdev.name, name) == 0) { 1250 return raid_bdev; 1251 } 1252 } 1253 1254 return NULL; 1255 } 1256 1257 static struct raid_bdev * 1258 raid_bdev_find_by_uuid(const struct spdk_uuid *uuid) 1259 { 1260 struct raid_bdev *raid_bdev; 1261 1262 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1263 if (spdk_uuid_compare(&raid_bdev->bdev.uuid, uuid) == 0) { 1264 return raid_bdev; 1265 } 1266 } 1267 1268 return NULL; 1269 } 1270 1271 static struct { 1272 const char *name; 1273 enum raid_level value; 1274 } g_raid_level_names[] = { 1275 { "raid0", RAID0 }, 1276 { "0", RAID0 }, 1277 { "raid1", RAID1 }, 1278 { "1", RAID1 }, 1279 { "raid5f", RAID5F }, 1280 { "5f", RAID5F }, 1281 { "concat", CONCAT }, 1282 { } 1283 }; 1284 1285 const char *g_raid_state_names[] = { 1286 [RAID_BDEV_STATE_ONLINE] = "online", 1287 [RAID_BDEV_STATE_CONFIGURING] = "configuring", 1288 [RAID_BDEV_STATE_OFFLINE] = "offline", 1289 [RAID_BDEV_STATE_MAX] = NULL 1290 }; 1291 1292 static const char *g_raid_process_type_names[] = { 1293 [RAID_PROCESS_NONE] = "none", 1294 [RAID_PROCESS_REBUILD] = "rebuild", 1295 [RAID_PROCESS_MAX] = NULL 1296 }; 1297 1298 /* We have to use the typedef in the function declaration to appease astyle. */ 1299 typedef enum raid_level raid_level_t; 1300 typedef enum raid_bdev_state raid_bdev_state_t; 1301 1302 raid_level_t 1303 raid_bdev_str_to_level(const char *str) 1304 { 1305 unsigned int i; 1306 1307 assert(str != NULL); 1308 1309 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 1310 if (strcasecmp(g_raid_level_names[i].name, str) == 0) { 1311 return g_raid_level_names[i].value; 1312 } 1313 } 1314 1315 return INVALID_RAID_LEVEL; 1316 } 1317 1318 const char * 1319 raid_bdev_level_to_str(enum raid_level level) 1320 { 1321 unsigned int i; 1322 1323 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 1324 if (g_raid_level_names[i].value == level) { 1325 return g_raid_level_names[i].name; 1326 } 1327 } 1328 1329 return ""; 1330 } 1331 1332 raid_bdev_state_t 1333 raid_bdev_str_to_state(const char *str) 1334 { 1335 unsigned int i; 1336 1337 assert(str != NULL); 1338 1339 for (i = 0; i < RAID_BDEV_STATE_MAX; i++) { 1340 if (strcasecmp(g_raid_state_names[i], str) == 0) { 1341 break; 1342 } 1343 } 1344 1345 return i; 1346 } 1347 1348 const char * 1349 raid_bdev_state_to_str(enum raid_bdev_state state) 1350 { 1351 if (state >= RAID_BDEV_STATE_MAX) { 1352 return ""; 1353 } 1354 1355 return g_raid_state_names[state]; 1356 } 1357 1358 const char * 1359 raid_bdev_process_to_str(enum raid_process_type value) 1360 { 1361 if (value >= RAID_PROCESS_MAX) { 1362 return ""; 1363 } 1364 1365 return g_raid_process_type_names[value]; 1366 } 1367 1368 /* 1369 * brief: 1370 * raid_bdev_fini_start is called when bdev layer is starting the 1371 * shutdown process 1372 * params: 1373 * none 1374 * returns: 1375 * none 1376 */ 1377 static void 1378 raid_bdev_fini_start(void) 1379 { 1380 struct raid_bdev *raid_bdev; 1381 struct raid_base_bdev_info *base_info; 1382 1383 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_fini_start\n"); 1384 1385 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1386 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1387 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1388 raid_bdev_free_base_bdev_resource(base_info); 1389 } 1390 } 1391 } 1392 1393 g_shutdown_started = true; 1394 } 1395 1396 /* 1397 * brief: 1398 * raid_bdev_exit is called on raid bdev module exit time by bdev layer 1399 * params: 1400 * none 1401 * returns: 1402 * none 1403 */ 1404 static void 1405 raid_bdev_exit(void) 1406 { 1407 struct raid_bdev *raid_bdev, *tmp; 1408 1409 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_exit\n"); 1410 1411 TAILQ_FOREACH_SAFE(raid_bdev, &g_raid_bdev_list, global_link, tmp) { 1412 raid_bdev_cleanup_and_free(raid_bdev); 1413 } 1414 } 1415 1416 static void 1417 raid_bdev_opts_config_json(struct spdk_json_write_ctx *w) 1418 { 1419 spdk_json_write_object_begin(w); 1420 1421 spdk_json_write_named_string(w, "method", "bdev_raid_set_options"); 1422 1423 spdk_json_write_named_object_begin(w, "params"); 1424 spdk_json_write_named_uint32(w, "process_window_size_kb", g_opts.process_window_size_kb); 1425 spdk_json_write_named_uint32(w, "process_max_bandwidth_mb_sec", 1426 g_opts.process_max_bandwidth_mb_sec); 1427 spdk_json_write_object_end(w); 1428 1429 spdk_json_write_object_end(w); 1430 } 1431 1432 static int 1433 raid_bdev_config_json(struct spdk_json_write_ctx *w) 1434 { 1435 raid_bdev_opts_config_json(w); 1436 1437 return 0; 1438 } 1439 1440 /* 1441 * brief: 1442 * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid 1443 * module 1444 * params: 1445 * none 1446 * returns: 1447 * size of spdk_bdev_io context for raid 1448 */ 1449 static int 1450 raid_bdev_get_ctx_size(void) 1451 { 1452 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_get_ctx_size\n"); 1453 return sizeof(struct raid_bdev_io); 1454 } 1455 1456 static struct spdk_bdev_module g_raid_if = { 1457 .name = "raid", 1458 .module_init = raid_bdev_init, 1459 .fini_start = raid_bdev_fini_start, 1460 .module_fini = raid_bdev_exit, 1461 .config_json = raid_bdev_config_json, 1462 .get_ctx_size = raid_bdev_get_ctx_size, 1463 .examine_disk = raid_bdev_examine, 1464 .async_init = false, 1465 .async_fini = false, 1466 }; 1467 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if) 1468 1469 /* 1470 * brief: 1471 * raid_bdev_init is the initialization function for raid bdev module 1472 * params: 1473 * none 1474 * returns: 1475 * 0 - success 1476 * non zero - failure 1477 */ 1478 static int 1479 raid_bdev_init(void) 1480 { 1481 return 0; 1482 } 1483 1484 static int 1485 _raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, 1486 enum raid_level level, bool superblock_enabled, const struct spdk_uuid *uuid, 1487 struct raid_bdev **raid_bdev_out) 1488 { 1489 struct raid_bdev *raid_bdev; 1490 struct spdk_bdev *raid_bdev_gen; 1491 struct raid_bdev_module *module; 1492 struct raid_base_bdev_info *base_info; 1493 uint8_t min_operational; 1494 1495 if (strnlen(name, RAID_BDEV_SB_NAME_SIZE) == RAID_BDEV_SB_NAME_SIZE) { 1496 SPDK_ERRLOG("Raid bdev name '%s' exceeds %d characters\n", name, RAID_BDEV_SB_NAME_SIZE - 1); 1497 return -EINVAL; 1498 } 1499 1500 if (raid_bdev_find_by_name(name) != NULL) { 1501 SPDK_ERRLOG("Duplicate raid bdev name found: %s\n", name); 1502 return -EEXIST; 1503 } 1504 1505 if (level == RAID1) { 1506 if (strip_size != 0) { 1507 SPDK_ERRLOG("Strip size is not supported by raid1\n"); 1508 return -EINVAL; 1509 } 1510 } else if (spdk_u32_is_pow2(strip_size) == false) { 1511 SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size); 1512 return -EINVAL; 1513 } 1514 1515 module = raid_bdev_module_find(level); 1516 if (module == NULL) { 1517 SPDK_ERRLOG("Unsupported raid level '%d'\n", level); 1518 return -EINVAL; 1519 } 1520 1521 assert(module->base_bdevs_min != 0); 1522 if (num_base_bdevs < module->base_bdevs_min) { 1523 SPDK_ERRLOG("At least %u base devices required for %s\n", 1524 module->base_bdevs_min, 1525 raid_bdev_level_to_str(level)); 1526 return -EINVAL; 1527 } 1528 1529 switch (module->base_bdevs_constraint.type) { 1530 case CONSTRAINT_MAX_BASE_BDEVS_REMOVED: 1531 min_operational = num_base_bdevs - module->base_bdevs_constraint.value; 1532 break; 1533 case CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL: 1534 min_operational = module->base_bdevs_constraint.value; 1535 break; 1536 case CONSTRAINT_UNSET: 1537 if (module->base_bdevs_constraint.value != 0) { 1538 SPDK_ERRLOG("Unexpected constraint value '%u' provided for raid bdev '%s'.\n", 1539 (uint8_t)module->base_bdevs_constraint.value, name); 1540 return -EINVAL; 1541 } 1542 min_operational = num_base_bdevs; 1543 break; 1544 default: 1545 SPDK_ERRLOG("Unrecognised constraint type '%u' in module for raid level '%s'.\n", 1546 (uint8_t)module->base_bdevs_constraint.type, 1547 raid_bdev_level_to_str(module->level)); 1548 return -EINVAL; 1549 }; 1550 1551 if (min_operational == 0 || min_operational > num_base_bdevs) { 1552 SPDK_ERRLOG("Wrong constraint value for raid level '%s'.\n", 1553 raid_bdev_level_to_str(module->level)); 1554 return -EINVAL; 1555 } 1556 1557 raid_bdev = calloc(1, sizeof(*raid_bdev)); 1558 if (!raid_bdev) { 1559 SPDK_ERRLOG("Unable to allocate memory for raid bdev\n"); 1560 return -ENOMEM; 1561 } 1562 1563 raid_bdev->module = module; 1564 raid_bdev->num_base_bdevs = num_base_bdevs; 1565 raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, 1566 sizeof(struct raid_base_bdev_info)); 1567 if (!raid_bdev->base_bdev_info) { 1568 SPDK_ERRLOG("Unable able to allocate base bdev info\n"); 1569 raid_bdev_free(raid_bdev); 1570 return -ENOMEM; 1571 } 1572 1573 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1574 base_info->raid_bdev = raid_bdev; 1575 } 1576 1577 /* strip_size_kb is from the rpc param. strip_size is in blocks and used 1578 * internally and set later. 1579 */ 1580 raid_bdev->strip_size = 0; 1581 raid_bdev->strip_size_kb = strip_size; 1582 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1583 raid_bdev->level = level; 1584 raid_bdev->min_base_bdevs_operational = min_operational; 1585 raid_bdev->superblock_enabled = superblock_enabled; 1586 1587 raid_bdev_gen = &raid_bdev->bdev; 1588 1589 raid_bdev_gen->name = strdup(name); 1590 if (!raid_bdev_gen->name) { 1591 SPDK_ERRLOG("Unable to allocate name for raid\n"); 1592 raid_bdev_free(raid_bdev); 1593 return -ENOMEM; 1594 } 1595 1596 raid_bdev_gen->product_name = "Raid Volume"; 1597 raid_bdev_gen->ctxt = raid_bdev; 1598 raid_bdev_gen->fn_table = &g_raid_bdev_fn_table; 1599 raid_bdev_gen->module = &g_raid_if; 1600 raid_bdev_gen->write_cache = 0; 1601 spdk_uuid_copy(&raid_bdev_gen->uuid, uuid); 1602 1603 TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link); 1604 1605 *raid_bdev_out = raid_bdev; 1606 1607 return 0; 1608 } 1609 1610 /* 1611 * brief: 1612 * raid_bdev_create allocates raid bdev based on passed configuration 1613 * params: 1614 * name - name for raid bdev 1615 * strip_size - strip size in KB 1616 * num_base_bdevs - number of base bdevs 1617 * level - raid level 1618 * superblock_enabled - true if raid should have superblock 1619 * uuid - uuid to set for the bdev 1620 * raid_bdev_out - the created raid bdev 1621 * returns: 1622 * 0 - success 1623 * non zero - failure 1624 */ 1625 int 1626 raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, 1627 enum raid_level level, bool superblock_enabled, const struct spdk_uuid *uuid, 1628 struct raid_bdev **raid_bdev_out) 1629 { 1630 struct raid_bdev *raid_bdev; 1631 int rc; 1632 1633 assert(uuid != NULL); 1634 1635 rc = _raid_bdev_create(name, strip_size, num_base_bdevs, level, superblock_enabled, uuid, 1636 &raid_bdev); 1637 if (rc != 0) { 1638 return rc; 1639 } 1640 1641 if (superblock_enabled && spdk_uuid_is_null(uuid)) { 1642 /* we need to have the uuid to store in the superblock before the bdev is registered */ 1643 spdk_uuid_generate(&raid_bdev->bdev.uuid); 1644 } 1645 1646 raid_bdev->num_base_bdevs_operational = num_base_bdevs; 1647 1648 *raid_bdev_out = raid_bdev; 1649 1650 return 0; 1651 } 1652 1653 static void 1654 _raid_bdev_unregistering_cont(void *ctx) 1655 { 1656 struct raid_bdev *raid_bdev = ctx; 1657 1658 spdk_bdev_close(raid_bdev->self_desc); 1659 raid_bdev->self_desc = NULL; 1660 } 1661 1662 static void 1663 raid_bdev_unregistering_cont(void *ctx) 1664 { 1665 spdk_thread_exec_msg(spdk_thread_get_app_thread(), _raid_bdev_unregistering_cont, ctx); 1666 } 1667 1668 static int 1669 raid_bdev_process_add_finish_action(struct raid_bdev_process *process, spdk_msg_fn cb, void *cb_ctx) 1670 { 1671 struct raid_process_finish_action *finish_action; 1672 1673 assert(spdk_get_thread() == process->thread); 1674 assert(process->state < RAID_PROCESS_STATE_STOPPED); 1675 1676 finish_action = calloc(1, sizeof(*finish_action)); 1677 if (finish_action == NULL) { 1678 return -ENOMEM; 1679 } 1680 1681 finish_action->cb = cb; 1682 finish_action->cb_ctx = cb_ctx; 1683 1684 TAILQ_INSERT_TAIL(&process->finish_actions, finish_action, link); 1685 1686 return 0; 1687 } 1688 1689 static void 1690 raid_bdev_unregistering_stop_process(void *ctx) 1691 { 1692 struct raid_bdev_process *process = ctx; 1693 struct raid_bdev *raid_bdev = process->raid_bdev; 1694 int rc; 1695 1696 process->state = RAID_PROCESS_STATE_STOPPING; 1697 if (process->status == 0) { 1698 process->status = -ECANCELED; 1699 } 1700 1701 rc = raid_bdev_process_add_finish_action(process, raid_bdev_unregistering_cont, raid_bdev); 1702 if (rc != 0) { 1703 SPDK_ERRLOG("Failed to add raid bdev '%s' process finish action: %s\n", 1704 raid_bdev->bdev.name, spdk_strerror(-rc)); 1705 } 1706 } 1707 1708 static void 1709 raid_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) 1710 { 1711 struct raid_bdev *raid_bdev = event_ctx; 1712 1713 if (type == SPDK_BDEV_EVENT_REMOVE) { 1714 if (raid_bdev->process != NULL) { 1715 spdk_thread_send_msg(raid_bdev->process->thread, raid_bdev_unregistering_stop_process, 1716 raid_bdev->process); 1717 } else { 1718 raid_bdev_unregistering_cont(raid_bdev); 1719 } 1720 } 1721 } 1722 1723 static void 1724 raid_bdev_configure_cont(struct raid_bdev *raid_bdev) 1725 { 1726 struct spdk_bdev *raid_bdev_gen = &raid_bdev->bdev; 1727 int rc; 1728 1729 raid_bdev->state = RAID_BDEV_STATE_ONLINE; 1730 SPDK_DEBUGLOG(bdev_raid, "io device register %p\n", raid_bdev); 1731 SPDK_DEBUGLOG(bdev_raid, "blockcnt %" PRIu64 ", blocklen %u\n", 1732 raid_bdev_gen->blockcnt, raid_bdev_gen->blocklen); 1733 spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb, 1734 sizeof(struct raid_bdev_io_channel), 1735 raid_bdev_gen->name); 1736 rc = spdk_bdev_register(raid_bdev_gen); 1737 if (rc != 0) { 1738 SPDK_ERRLOG("Failed to register raid bdev '%s': %s\n", 1739 raid_bdev_gen->name, spdk_strerror(-rc)); 1740 goto out; 1741 } 1742 1743 /* 1744 * Open the bdev internally to delay unregistering if we need to stop a background process 1745 * first. The process may still need to unquiesce a range but it will fail because the 1746 * bdev's internal.spinlock is destroyed by the time the destruct callback is reached. 1747 * During application shutdown, bdevs automatically get unregistered by the bdev layer 1748 * so this is the only way currently to do this correctly. 1749 * TODO: try to handle this correctly in bdev layer instead. 1750 */ 1751 rc = spdk_bdev_open_ext(raid_bdev_gen->name, false, raid_bdev_event_cb, raid_bdev, 1752 &raid_bdev->self_desc); 1753 if (rc != 0) { 1754 SPDK_ERRLOG("Failed to open raid bdev '%s': %s\n", 1755 raid_bdev_gen->name, spdk_strerror(-rc)); 1756 spdk_bdev_unregister(raid_bdev_gen, NULL, NULL); 1757 goto out; 1758 } 1759 1760 SPDK_DEBUGLOG(bdev_raid, "raid bdev generic %p\n", raid_bdev_gen); 1761 SPDK_DEBUGLOG(bdev_raid, "raid bdev is created with name %s, raid_bdev %p\n", 1762 raid_bdev_gen->name, raid_bdev); 1763 out: 1764 if (rc != 0) { 1765 if (raid_bdev->module->stop != NULL) { 1766 raid_bdev->module->stop(raid_bdev); 1767 } 1768 spdk_io_device_unregister(raid_bdev, NULL); 1769 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1770 } 1771 1772 if (raid_bdev->configure_cb != NULL) { 1773 raid_bdev->configure_cb(raid_bdev->configure_cb_ctx, rc); 1774 raid_bdev->configure_cb = NULL; 1775 } 1776 } 1777 1778 static void 1779 raid_bdev_configure_write_sb_cb(int status, struct raid_bdev *raid_bdev, void *ctx) 1780 { 1781 if (status == 0) { 1782 raid_bdev_configure_cont(raid_bdev); 1783 } else { 1784 SPDK_ERRLOG("Failed to write raid bdev '%s' superblock: %s\n", 1785 raid_bdev->bdev.name, spdk_strerror(-status)); 1786 if (raid_bdev->module->stop != NULL) { 1787 raid_bdev->module->stop(raid_bdev); 1788 } 1789 if (raid_bdev->configure_cb != NULL) { 1790 raid_bdev->configure_cb(raid_bdev->configure_cb_ctx, status); 1791 raid_bdev->configure_cb = NULL; 1792 } 1793 } 1794 } 1795 1796 /* 1797 * brief: 1798 * If raid bdev config is complete, then only register the raid bdev to 1799 * bdev layer and remove this raid bdev from configuring list and 1800 * insert the raid bdev to configured list 1801 * params: 1802 * raid_bdev - pointer to raid bdev 1803 * returns: 1804 * 0 - success 1805 * non zero - failure 1806 */ 1807 static int 1808 raid_bdev_configure(struct raid_bdev *raid_bdev, raid_bdev_configure_cb cb, void *cb_ctx) 1809 { 1810 uint32_t data_block_size = spdk_bdev_get_data_block_size(&raid_bdev->bdev); 1811 int rc; 1812 1813 assert(raid_bdev->state == RAID_BDEV_STATE_CONFIGURING); 1814 assert(raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs_operational); 1815 assert(raid_bdev->bdev.blocklen > 0); 1816 1817 /* The strip_size_kb is read in from user in KB. Convert to blocks here for 1818 * internal use. 1819 */ 1820 raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / data_block_size; 1821 if (raid_bdev->strip_size == 0 && raid_bdev->level != RAID1) { 1822 SPDK_ERRLOG("Strip size cannot be smaller than the device block size\n"); 1823 return -EINVAL; 1824 } 1825 raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size); 1826 1827 rc = raid_bdev->module->start(raid_bdev); 1828 if (rc != 0) { 1829 SPDK_ERRLOG("raid module startup callback failed\n"); 1830 return rc; 1831 } 1832 1833 assert(raid_bdev->configure_cb == NULL); 1834 raid_bdev->configure_cb = cb; 1835 raid_bdev->configure_cb_ctx = cb_ctx; 1836 1837 if (raid_bdev->superblock_enabled) { 1838 if (raid_bdev->sb == NULL) { 1839 rc = raid_bdev_alloc_superblock(raid_bdev, data_block_size); 1840 if (rc == 0) { 1841 raid_bdev_init_superblock(raid_bdev); 1842 } 1843 } else { 1844 assert(spdk_uuid_compare(&raid_bdev->sb->uuid, &raid_bdev->bdev.uuid) == 0); 1845 if (raid_bdev->sb->block_size != data_block_size) { 1846 SPDK_ERRLOG("blocklen does not match value in superblock\n"); 1847 rc = -EINVAL; 1848 } 1849 if (raid_bdev->sb->raid_size != raid_bdev->bdev.blockcnt) { 1850 SPDK_ERRLOG("blockcnt does not match value in superblock\n"); 1851 rc = -EINVAL; 1852 } 1853 } 1854 1855 if (rc != 0) { 1856 raid_bdev->configure_cb = NULL; 1857 if (raid_bdev->module->stop != NULL) { 1858 raid_bdev->module->stop(raid_bdev); 1859 } 1860 return rc; 1861 } 1862 1863 raid_bdev_write_superblock(raid_bdev, raid_bdev_configure_write_sb_cb, NULL); 1864 } else { 1865 raid_bdev_configure_cont(raid_bdev); 1866 } 1867 1868 return 0; 1869 } 1870 1871 /* 1872 * brief: 1873 * If raid bdev is online and registered, change the bdev state to 1874 * configuring and unregister this raid device. Queue this raid device 1875 * in configuring list 1876 * params: 1877 * raid_bdev - pointer to raid bdev 1878 * cb_fn - callback function 1879 * cb_arg - argument to callback function 1880 * returns: 1881 * none 1882 */ 1883 static void 1884 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, 1885 void *cb_arg) 1886 { 1887 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1888 if (cb_fn) { 1889 cb_fn(cb_arg, 0); 1890 } 1891 return; 1892 } 1893 1894 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 1895 SPDK_DEBUGLOG(bdev_raid, "raid bdev state changing from online to offline\n"); 1896 1897 spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg); 1898 } 1899 1900 /* 1901 * brief: 1902 * raid_bdev_find_base_info_by_bdev function finds the base bdev info by bdev. 1903 * params: 1904 * base_bdev - pointer to base bdev 1905 * returns: 1906 * base bdev info if found, otherwise NULL. 1907 */ 1908 static struct raid_base_bdev_info * 1909 raid_bdev_find_base_info_by_bdev(struct spdk_bdev *base_bdev) 1910 { 1911 struct raid_bdev *raid_bdev; 1912 struct raid_base_bdev_info *base_info; 1913 1914 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1915 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1916 if (base_info->desc != NULL && 1917 spdk_bdev_desc_get_bdev(base_info->desc) == base_bdev) { 1918 return base_info; 1919 } 1920 } 1921 } 1922 1923 return NULL; 1924 } 1925 1926 static void 1927 raid_bdev_remove_base_bdev_done(struct raid_base_bdev_info *base_info, int status) 1928 { 1929 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1930 1931 assert(base_info->remove_scheduled); 1932 base_info->remove_scheduled = false; 1933 1934 if (status == 0) { 1935 raid_bdev->num_base_bdevs_operational--; 1936 if (raid_bdev->num_base_bdevs_operational < raid_bdev->min_base_bdevs_operational) { 1937 /* There is not enough base bdevs to keep the raid bdev operational. */ 1938 raid_bdev_deconfigure(raid_bdev, base_info->remove_cb, base_info->remove_cb_ctx); 1939 return; 1940 } 1941 } 1942 1943 if (base_info->remove_cb != NULL) { 1944 base_info->remove_cb(base_info->remove_cb_ctx, status); 1945 } 1946 } 1947 1948 static void 1949 raid_bdev_remove_base_bdev_on_unquiesced(void *ctx, int status) 1950 { 1951 struct raid_base_bdev_info *base_info = ctx; 1952 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1953 1954 if (status != 0) { 1955 SPDK_ERRLOG("Failed to unquiesce raid bdev %s: %s\n", 1956 raid_bdev->bdev.name, spdk_strerror(-status)); 1957 } 1958 1959 raid_bdev_remove_base_bdev_done(base_info, status); 1960 } 1961 1962 static void 1963 raid_bdev_channel_remove_base_bdev(struct spdk_io_channel_iter *i) 1964 { 1965 struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i); 1966 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 1967 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch); 1968 uint8_t idx = raid_bdev_base_bdev_slot(base_info); 1969 1970 SPDK_DEBUGLOG(bdev_raid, "slot: %u raid_ch: %p\n", idx, raid_ch); 1971 1972 if (raid_ch->base_channel[idx] != NULL) { 1973 spdk_put_io_channel(raid_ch->base_channel[idx]); 1974 raid_ch->base_channel[idx] = NULL; 1975 } 1976 1977 if (raid_ch->process.ch_processed != NULL) { 1978 raid_ch->process.ch_processed->base_channel[idx] = NULL; 1979 } 1980 1981 spdk_for_each_channel_continue(i, 0); 1982 } 1983 1984 static void 1985 raid_bdev_channels_remove_base_bdev_done(struct spdk_io_channel_iter *i, int status) 1986 { 1987 struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i); 1988 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1989 1990 raid_bdev_free_base_bdev_resource(base_info); 1991 1992 spdk_bdev_unquiesce(&raid_bdev->bdev, &g_raid_if, raid_bdev_remove_base_bdev_on_unquiesced, 1993 base_info); 1994 } 1995 1996 static void 1997 raid_bdev_remove_base_bdev_cont(struct raid_base_bdev_info *base_info) 1998 { 1999 raid_bdev_deconfigure_base_bdev(base_info); 2000 2001 spdk_for_each_channel(base_info->raid_bdev, raid_bdev_channel_remove_base_bdev, base_info, 2002 raid_bdev_channels_remove_base_bdev_done); 2003 } 2004 2005 static void 2006 raid_bdev_remove_base_bdev_write_sb_cb(int status, struct raid_bdev *raid_bdev, void *ctx) 2007 { 2008 struct raid_base_bdev_info *base_info = ctx; 2009 2010 if (status != 0) { 2011 SPDK_ERRLOG("Failed to write raid bdev '%s' superblock: %s\n", 2012 raid_bdev->bdev.name, spdk_strerror(-status)); 2013 raid_bdev_remove_base_bdev_done(base_info, status); 2014 return; 2015 } 2016 2017 raid_bdev_remove_base_bdev_cont(base_info); 2018 } 2019 2020 static void 2021 raid_bdev_remove_base_bdev_on_quiesced(void *ctx, int status) 2022 { 2023 struct raid_base_bdev_info *base_info = ctx; 2024 struct raid_bdev *raid_bdev = base_info->raid_bdev; 2025 2026 if (status != 0) { 2027 SPDK_ERRLOG("Failed to quiesce raid bdev %s: %s\n", 2028 raid_bdev->bdev.name, spdk_strerror(-status)); 2029 raid_bdev_remove_base_bdev_done(base_info, status); 2030 return; 2031 } 2032 2033 if (raid_bdev->sb) { 2034 struct raid_bdev_superblock *sb = raid_bdev->sb; 2035 uint8_t slot = raid_bdev_base_bdev_slot(base_info); 2036 uint8_t i; 2037 2038 for (i = 0; i < sb->base_bdevs_size; i++) { 2039 struct raid_bdev_sb_base_bdev *sb_base_bdev = &sb->base_bdevs[i]; 2040 2041 if (sb_base_bdev->state == RAID_SB_BASE_BDEV_CONFIGURED && 2042 sb_base_bdev->slot == slot) { 2043 if (base_info->is_failed) { 2044 sb_base_bdev->state = RAID_SB_BASE_BDEV_FAILED; 2045 } else { 2046 sb_base_bdev->state = RAID_SB_BASE_BDEV_MISSING; 2047 } 2048 2049 raid_bdev_write_superblock(raid_bdev, raid_bdev_remove_base_bdev_write_sb_cb, base_info); 2050 return; 2051 } 2052 } 2053 } 2054 2055 raid_bdev_remove_base_bdev_cont(base_info); 2056 } 2057 2058 static int 2059 raid_bdev_remove_base_bdev_quiesce(struct raid_base_bdev_info *base_info) 2060 { 2061 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 2062 2063 return spdk_bdev_quiesce(&base_info->raid_bdev->bdev, &g_raid_if, 2064 raid_bdev_remove_base_bdev_on_quiesced, base_info); 2065 } 2066 2067 struct raid_bdev_process_base_bdev_remove_ctx { 2068 struct raid_bdev_process *process; 2069 struct raid_base_bdev_info *base_info; 2070 uint8_t num_base_bdevs_operational; 2071 }; 2072 2073 static void 2074 _raid_bdev_process_base_bdev_remove_cont(void *ctx) 2075 { 2076 struct raid_base_bdev_info *base_info = ctx; 2077 int ret; 2078 2079 ret = raid_bdev_remove_base_bdev_quiesce(base_info); 2080 if (ret != 0) { 2081 raid_bdev_remove_base_bdev_done(base_info, ret); 2082 } 2083 } 2084 2085 static void 2086 raid_bdev_process_base_bdev_remove_cont(void *_ctx) 2087 { 2088 struct raid_bdev_process_base_bdev_remove_ctx *ctx = _ctx; 2089 struct raid_base_bdev_info *base_info = ctx->base_info; 2090 2091 free(ctx); 2092 2093 spdk_thread_send_msg(spdk_thread_get_app_thread(), _raid_bdev_process_base_bdev_remove_cont, 2094 base_info); 2095 } 2096 2097 static void 2098 _raid_bdev_process_base_bdev_remove(void *_ctx) 2099 { 2100 struct raid_bdev_process_base_bdev_remove_ctx *ctx = _ctx; 2101 struct raid_bdev_process *process = ctx->process; 2102 int ret; 2103 2104 if (ctx->base_info != process->target && 2105 ctx->num_base_bdevs_operational > process->raid_bdev->min_base_bdevs_operational) { 2106 /* process doesn't need to be stopped */ 2107 raid_bdev_process_base_bdev_remove_cont(ctx); 2108 return; 2109 } 2110 2111 assert(process->state > RAID_PROCESS_STATE_INIT && 2112 process->state < RAID_PROCESS_STATE_STOPPED); 2113 2114 ret = raid_bdev_process_add_finish_action(process, raid_bdev_process_base_bdev_remove_cont, ctx); 2115 if (ret != 0) { 2116 raid_bdev_remove_base_bdev_done(ctx->base_info, ret); 2117 free(ctx); 2118 return; 2119 } 2120 2121 process->state = RAID_PROCESS_STATE_STOPPING; 2122 2123 if (process->status == 0) { 2124 process->status = -ENODEV; 2125 } 2126 } 2127 2128 static int 2129 raid_bdev_process_base_bdev_remove(struct raid_bdev_process *process, 2130 struct raid_base_bdev_info *base_info) 2131 { 2132 struct raid_bdev_process_base_bdev_remove_ctx *ctx; 2133 2134 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 2135 2136 ctx = calloc(1, sizeof(*ctx)); 2137 if (ctx == NULL) { 2138 return -ENOMEM; 2139 } 2140 2141 /* 2142 * We have to send the process and num_base_bdevs_operational in the message ctx 2143 * because the process thread should not access raid_bdev's properties. Particularly, 2144 * raid_bdev->process may be cleared by the time the message is handled, but ctx->process 2145 * will still be valid until the process is fully stopped. 2146 */ 2147 ctx->base_info = base_info; 2148 ctx->process = process; 2149 /* 2150 * raid_bdev->num_base_bdevs_operational can't be used here because it is decremented 2151 * after the removal and more than one base bdev may be removed at the same time 2152 */ 2153 RAID_FOR_EACH_BASE_BDEV(process->raid_bdev, base_info) { 2154 if (base_info->is_configured && !base_info->remove_scheduled) { 2155 ctx->num_base_bdevs_operational++; 2156 } 2157 } 2158 2159 spdk_thread_send_msg(process->thread, _raid_bdev_process_base_bdev_remove, ctx); 2160 2161 return 0; 2162 } 2163 2164 static int 2165 _raid_bdev_remove_base_bdev(struct raid_base_bdev_info *base_info, 2166 raid_base_bdev_cb cb_fn, void *cb_ctx) 2167 { 2168 struct raid_bdev *raid_bdev = base_info->raid_bdev; 2169 int ret = 0; 2170 2171 SPDK_DEBUGLOG(bdev_raid, "%s\n", base_info->name); 2172 2173 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 2174 2175 if (base_info->remove_scheduled || !base_info->is_configured) { 2176 return -ENODEV; 2177 } 2178 2179 assert(base_info->desc); 2180 base_info->remove_scheduled = true; 2181 2182 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 2183 /* 2184 * As raid bdev is not registered yet or already unregistered, 2185 * so cleanup should be done here itself. 2186 * 2187 * Removing a base bdev at this stage does not change the number of operational 2188 * base bdevs, only the number of discovered base bdevs. 2189 */ 2190 raid_bdev_free_base_bdev_resource(base_info); 2191 base_info->remove_scheduled = false; 2192 if (raid_bdev->num_base_bdevs_discovered == 0 && 2193 raid_bdev->state == RAID_BDEV_STATE_OFFLINE) { 2194 /* There is no base bdev for this raid, so free the raid device. */ 2195 raid_bdev_cleanup_and_free(raid_bdev); 2196 } 2197 if (cb_fn != NULL) { 2198 cb_fn(cb_ctx, 0); 2199 } 2200 } else if (raid_bdev->min_base_bdevs_operational == raid_bdev->num_base_bdevs) { 2201 /* This raid bdev does not tolerate removing a base bdev. */ 2202 raid_bdev->num_base_bdevs_operational--; 2203 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_ctx); 2204 } else { 2205 base_info->remove_cb = cb_fn; 2206 base_info->remove_cb_ctx = cb_ctx; 2207 2208 if (raid_bdev->process != NULL) { 2209 ret = raid_bdev_process_base_bdev_remove(raid_bdev->process, base_info); 2210 } else { 2211 ret = raid_bdev_remove_base_bdev_quiesce(base_info); 2212 } 2213 2214 if (ret != 0) { 2215 base_info->remove_scheduled = false; 2216 } 2217 } 2218 2219 return ret; 2220 } 2221 2222 /* 2223 * brief: 2224 * raid_bdev_remove_base_bdev function is called by below layers when base_bdev 2225 * is removed. This function checks if this base bdev is part of any raid bdev 2226 * or not. If yes, it takes necessary action on that particular raid bdev. 2227 * params: 2228 * base_bdev - pointer to base bdev which got removed 2229 * cb_fn - callback function 2230 * cb_arg - argument to callback function 2231 * returns: 2232 * 0 - success 2233 * non zero - failure 2234 */ 2235 int 2236 raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev, raid_base_bdev_cb cb_fn, void *cb_ctx) 2237 { 2238 struct raid_base_bdev_info *base_info; 2239 2240 /* Find the raid_bdev which has claimed this base_bdev */ 2241 base_info = raid_bdev_find_base_info_by_bdev(base_bdev); 2242 if (!base_info) { 2243 SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); 2244 return -ENODEV; 2245 } 2246 2247 return _raid_bdev_remove_base_bdev(base_info, cb_fn, cb_ctx); 2248 } 2249 2250 static void 2251 raid_bdev_fail_base_remove_cb(void *ctx, int status) 2252 { 2253 struct raid_base_bdev_info *base_info = ctx; 2254 2255 if (status != 0) { 2256 SPDK_WARNLOG("Failed to remove base bdev %s\n", base_info->name); 2257 base_info->is_failed = false; 2258 } 2259 } 2260 2261 static void 2262 _raid_bdev_fail_base_bdev(void *ctx) 2263 { 2264 struct raid_base_bdev_info *base_info = ctx; 2265 int rc; 2266 2267 if (base_info->is_failed) { 2268 return; 2269 } 2270 base_info->is_failed = true; 2271 2272 SPDK_NOTICELOG("Failing base bdev in slot %d ('%s') of raid bdev '%s'\n", 2273 raid_bdev_base_bdev_slot(base_info), base_info->name, base_info->raid_bdev->bdev.name); 2274 2275 rc = _raid_bdev_remove_base_bdev(base_info, raid_bdev_fail_base_remove_cb, base_info); 2276 if (rc != 0) { 2277 raid_bdev_fail_base_remove_cb(base_info, rc); 2278 } 2279 } 2280 2281 void 2282 raid_bdev_fail_base_bdev(struct raid_base_bdev_info *base_info) 2283 { 2284 spdk_thread_exec_msg(spdk_thread_get_app_thread(), _raid_bdev_fail_base_bdev, base_info); 2285 } 2286 2287 static void 2288 raid_bdev_resize_write_sb_cb(int status, struct raid_bdev *raid_bdev, void *ctx) 2289 { 2290 if (status != 0) { 2291 SPDK_ERRLOG("Failed to write raid bdev '%s' superblock after resizing the bdev: %s\n", 2292 raid_bdev->bdev.name, spdk_strerror(-status)); 2293 } 2294 } 2295 2296 /* 2297 * brief: 2298 * raid_bdev_resize_base_bdev function is called by below layers when base_bdev 2299 * is resized. This function checks if the smallest size of the base_bdevs is changed. 2300 * If yes, call module handler to resize the raid_bdev if implemented. 2301 * params: 2302 * base_bdev - pointer to base bdev which got resized. 2303 * returns: 2304 * none 2305 */ 2306 static void 2307 raid_bdev_resize_base_bdev(struct spdk_bdev *base_bdev) 2308 { 2309 struct raid_bdev *raid_bdev; 2310 struct raid_base_bdev_info *base_info; 2311 uint64_t blockcnt_old; 2312 2313 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_resize_base_bdev\n"); 2314 2315 base_info = raid_bdev_find_base_info_by_bdev(base_bdev); 2316 2317 /* Find the raid_bdev which has claimed this base_bdev */ 2318 if (!base_info) { 2319 SPDK_ERRLOG("raid_bdev whose base_bdev '%s' not found\n", base_bdev->name); 2320 return; 2321 } 2322 raid_bdev = base_info->raid_bdev; 2323 2324 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 2325 2326 SPDK_NOTICELOG("base_bdev '%s' was resized: old size %" PRIu64 ", new size %" PRIu64 "\n", 2327 base_bdev->name, base_info->blockcnt, base_bdev->blockcnt); 2328 2329 base_info->blockcnt = base_bdev->blockcnt; 2330 2331 if (!raid_bdev->module->resize) { 2332 return; 2333 } 2334 2335 blockcnt_old = raid_bdev->bdev.blockcnt; 2336 if (raid_bdev->module->resize(raid_bdev) == false) { 2337 return; 2338 } 2339 2340 SPDK_NOTICELOG("raid bdev '%s': block count was changed from %" PRIu64 " to %" PRIu64 "\n", 2341 raid_bdev->bdev.name, blockcnt_old, raid_bdev->bdev.blockcnt); 2342 2343 if (raid_bdev->superblock_enabled) { 2344 struct raid_bdev_superblock *sb = raid_bdev->sb; 2345 uint8_t i; 2346 2347 for (i = 0; i < sb->base_bdevs_size; i++) { 2348 struct raid_bdev_sb_base_bdev *sb_base_bdev = &sb->base_bdevs[i]; 2349 2350 if (sb_base_bdev->slot < raid_bdev->num_base_bdevs) { 2351 base_info = &raid_bdev->base_bdev_info[sb_base_bdev->slot]; 2352 sb_base_bdev->data_size = base_info->data_size; 2353 } 2354 } 2355 sb->raid_size = raid_bdev->bdev.blockcnt; 2356 raid_bdev_write_superblock(raid_bdev, raid_bdev_resize_write_sb_cb, NULL); 2357 } 2358 } 2359 2360 /* 2361 * brief: 2362 * raid_bdev_event_base_bdev function is called by below layers when base_bdev 2363 * triggers asynchronous event. 2364 * params: 2365 * type - event details. 2366 * bdev - bdev that triggered event. 2367 * event_ctx - context for event. 2368 * returns: 2369 * none 2370 */ 2371 static void 2372 raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 2373 void *event_ctx) 2374 { 2375 int rc; 2376 2377 switch (type) { 2378 case SPDK_BDEV_EVENT_REMOVE: 2379 rc = raid_bdev_remove_base_bdev(bdev, NULL, NULL); 2380 if (rc != 0) { 2381 SPDK_ERRLOG("Failed to remove base bdev %s: %s\n", 2382 spdk_bdev_get_name(bdev), spdk_strerror(-rc)); 2383 } 2384 break; 2385 case SPDK_BDEV_EVENT_RESIZE: 2386 raid_bdev_resize_base_bdev(bdev); 2387 break; 2388 default: 2389 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 2390 break; 2391 } 2392 } 2393 2394 /* 2395 * brief: 2396 * Deletes the specified raid bdev 2397 * params: 2398 * raid_bdev - pointer to raid bdev 2399 * cb_fn - callback function 2400 * cb_arg - argument to callback function 2401 */ 2402 void 2403 raid_bdev_delete(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, void *cb_arg) 2404 { 2405 struct raid_base_bdev_info *base_info; 2406 2407 SPDK_DEBUGLOG(bdev_raid, "delete raid bdev: %s\n", raid_bdev->bdev.name); 2408 2409 if (raid_bdev->destroy_started) { 2410 SPDK_DEBUGLOG(bdev_raid, "destroying raid bdev %s is already started\n", 2411 raid_bdev->bdev.name); 2412 if (cb_fn) { 2413 cb_fn(cb_arg, -EALREADY); 2414 } 2415 return; 2416 } 2417 2418 raid_bdev->destroy_started = true; 2419 2420 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 2421 base_info->remove_scheduled = true; 2422 2423 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 2424 /* 2425 * As raid bdev is not registered yet or already unregistered, 2426 * so cleanup should be done here itself. 2427 */ 2428 raid_bdev_free_base_bdev_resource(base_info); 2429 } 2430 } 2431 2432 if (raid_bdev->num_base_bdevs_discovered == 0) { 2433 /* There is no base bdev for this raid, so free the raid device. */ 2434 raid_bdev_cleanup_and_free(raid_bdev); 2435 if (cb_fn) { 2436 cb_fn(cb_arg, 0); 2437 } 2438 } else { 2439 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg); 2440 } 2441 } 2442 2443 static void 2444 raid_bdev_process_finish_write_sb_cb(int status, struct raid_bdev *raid_bdev, void *ctx) 2445 { 2446 if (status != 0) { 2447 SPDK_ERRLOG("Failed to write raid bdev '%s' superblock after background process finished: %s\n", 2448 raid_bdev->bdev.name, spdk_strerror(-status)); 2449 } 2450 } 2451 2452 static void 2453 raid_bdev_process_finish_write_sb(void *ctx) 2454 { 2455 struct raid_bdev *raid_bdev = ctx; 2456 struct raid_bdev_superblock *sb = raid_bdev->sb; 2457 struct raid_bdev_sb_base_bdev *sb_base_bdev; 2458 struct raid_base_bdev_info *base_info; 2459 uint8_t i; 2460 2461 for (i = 0; i < sb->base_bdevs_size; i++) { 2462 sb_base_bdev = &sb->base_bdevs[i]; 2463 2464 if (sb_base_bdev->state != RAID_SB_BASE_BDEV_CONFIGURED && 2465 sb_base_bdev->slot < raid_bdev->num_base_bdevs) { 2466 base_info = &raid_bdev->base_bdev_info[sb_base_bdev->slot]; 2467 if (base_info->is_configured) { 2468 sb_base_bdev->state = RAID_SB_BASE_BDEV_CONFIGURED; 2469 sb_base_bdev->data_offset = base_info->data_offset; 2470 spdk_uuid_copy(&sb_base_bdev->uuid, &base_info->uuid); 2471 } 2472 } 2473 } 2474 2475 raid_bdev_write_superblock(raid_bdev, raid_bdev_process_finish_write_sb_cb, NULL); 2476 } 2477 2478 static void raid_bdev_process_free(struct raid_bdev_process *process); 2479 2480 static void 2481 _raid_bdev_process_finish_done(void *ctx) 2482 { 2483 struct raid_bdev_process *process = ctx; 2484 struct raid_process_finish_action *finish_action; 2485 2486 while ((finish_action = TAILQ_FIRST(&process->finish_actions)) != NULL) { 2487 TAILQ_REMOVE(&process->finish_actions, finish_action, link); 2488 finish_action->cb(finish_action->cb_ctx); 2489 free(finish_action); 2490 } 2491 2492 spdk_poller_unregister(&process->qos.process_continue_poller); 2493 2494 raid_bdev_process_free(process); 2495 2496 spdk_thread_exit(spdk_get_thread()); 2497 } 2498 2499 static void 2500 raid_bdev_process_finish_target_removed(void *ctx, int status) 2501 { 2502 struct raid_bdev_process *process = ctx; 2503 2504 if (status != 0) { 2505 SPDK_ERRLOG("Failed to remove target bdev: %s\n", spdk_strerror(-status)); 2506 } 2507 2508 spdk_thread_send_msg(process->thread, _raid_bdev_process_finish_done, process); 2509 } 2510 2511 static void 2512 raid_bdev_process_finish_unquiesced(void *ctx, int status) 2513 { 2514 struct raid_bdev_process *process = ctx; 2515 2516 if (status != 0) { 2517 SPDK_ERRLOG("Failed to unquiesce bdev: %s\n", spdk_strerror(-status)); 2518 } 2519 2520 if (process->status != 0) { 2521 status = _raid_bdev_remove_base_bdev(process->target, raid_bdev_process_finish_target_removed, 2522 process); 2523 if (status != 0) { 2524 raid_bdev_process_finish_target_removed(process, status); 2525 } 2526 return; 2527 } 2528 2529 spdk_thread_send_msg(process->thread, _raid_bdev_process_finish_done, process); 2530 } 2531 2532 static void 2533 raid_bdev_process_finish_unquiesce(void *ctx) 2534 { 2535 struct raid_bdev_process *process = ctx; 2536 int rc; 2537 2538 rc = spdk_bdev_unquiesce(&process->raid_bdev->bdev, &g_raid_if, 2539 raid_bdev_process_finish_unquiesced, process); 2540 if (rc != 0) { 2541 raid_bdev_process_finish_unquiesced(process, rc); 2542 } 2543 } 2544 2545 static void 2546 raid_bdev_process_finish_done(void *ctx) 2547 { 2548 struct raid_bdev_process *process = ctx; 2549 struct raid_bdev *raid_bdev = process->raid_bdev; 2550 2551 if (process->raid_ch != NULL) { 2552 spdk_put_io_channel(spdk_io_channel_from_ctx(process->raid_ch)); 2553 } 2554 2555 process->state = RAID_PROCESS_STATE_STOPPED; 2556 2557 if (process->status == 0) { 2558 SPDK_NOTICELOG("Finished %s on raid bdev %s\n", 2559 raid_bdev_process_to_str(process->type), 2560 raid_bdev->bdev.name); 2561 if (raid_bdev->superblock_enabled) { 2562 spdk_thread_send_msg(spdk_thread_get_app_thread(), 2563 raid_bdev_process_finish_write_sb, 2564 raid_bdev); 2565 } 2566 } else { 2567 SPDK_WARNLOG("Finished %s on raid bdev %s: %s\n", 2568 raid_bdev_process_to_str(process->type), 2569 raid_bdev->bdev.name, 2570 spdk_strerror(-process->status)); 2571 } 2572 2573 spdk_thread_send_msg(spdk_thread_get_app_thread(), raid_bdev_process_finish_unquiesce, 2574 process); 2575 } 2576 2577 static void 2578 __raid_bdev_process_finish(struct spdk_io_channel_iter *i, int status) 2579 { 2580 struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i); 2581 2582 spdk_thread_send_msg(process->thread, raid_bdev_process_finish_done, process); 2583 } 2584 2585 static void 2586 raid_bdev_channel_process_finish(struct spdk_io_channel_iter *i) 2587 { 2588 struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i); 2589 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 2590 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch); 2591 2592 if (process->status == 0) { 2593 uint8_t slot = raid_bdev_base_bdev_slot(process->target); 2594 2595 raid_ch->base_channel[slot] = raid_ch->process.target_ch; 2596 raid_ch->process.target_ch = NULL; 2597 } 2598 2599 raid_bdev_ch_process_cleanup(raid_ch); 2600 2601 spdk_for_each_channel_continue(i, 0); 2602 } 2603 2604 static void 2605 raid_bdev_process_finish_quiesced(void *ctx, int status) 2606 { 2607 struct raid_bdev_process *process = ctx; 2608 struct raid_bdev *raid_bdev = process->raid_bdev; 2609 2610 if (status != 0) { 2611 SPDK_ERRLOG("Failed to quiesce bdev: %s\n", spdk_strerror(-status)); 2612 return; 2613 } 2614 2615 raid_bdev->process = NULL; 2616 process->target->is_process_target = false; 2617 2618 spdk_for_each_channel(process->raid_bdev, raid_bdev_channel_process_finish, process, 2619 __raid_bdev_process_finish); 2620 } 2621 2622 static void 2623 _raid_bdev_process_finish(void *ctx) 2624 { 2625 struct raid_bdev_process *process = ctx; 2626 int rc; 2627 2628 rc = spdk_bdev_quiesce(&process->raid_bdev->bdev, &g_raid_if, 2629 raid_bdev_process_finish_quiesced, process); 2630 if (rc != 0) { 2631 raid_bdev_process_finish_quiesced(ctx, rc); 2632 } 2633 } 2634 2635 static void 2636 raid_bdev_process_do_finish(struct raid_bdev_process *process) 2637 { 2638 spdk_thread_send_msg(spdk_thread_get_app_thread(), _raid_bdev_process_finish, process); 2639 } 2640 2641 static void raid_bdev_process_unlock_window_range(struct raid_bdev_process *process); 2642 static void raid_bdev_process_thread_run(struct raid_bdev_process *process); 2643 2644 static void 2645 raid_bdev_process_finish(struct raid_bdev_process *process, int status) 2646 { 2647 assert(spdk_get_thread() == process->thread); 2648 2649 if (process->status == 0) { 2650 process->status = status; 2651 } 2652 2653 if (process->state >= RAID_PROCESS_STATE_STOPPING) { 2654 return; 2655 } 2656 2657 assert(process->state == RAID_PROCESS_STATE_RUNNING); 2658 process->state = RAID_PROCESS_STATE_STOPPING; 2659 2660 if (process->window_range_locked) { 2661 raid_bdev_process_unlock_window_range(process); 2662 } else { 2663 raid_bdev_process_thread_run(process); 2664 } 2665 } 2666 2667 static void 2668 raid_bdev_process_window_range_unlocked(void *ctx, int status) 2669 { 2670 struct raid_bdev_process *process = ctx; 2671 2672 if (status != 0) { 2673 SPDK_ERRLOG("Failed to unlock LBA range: %s\n", spdk_strerror(-status)); 2674 raid_bdev_process_finish(process, status); 2675 return; 2676 } 2677 2678 process->window_range_locked = false; 2679 process->window_offset += process->window_size; 2680 2681 raid_bdev_process_thread_run(process); 2682 } 2683 2684 static void 2685 raid_bdev_process_unlock_window_range(struct raid_bdev_process *process) 2686 { 2687 int rc; 2688 2689 assert(process->window_range_locked == true); 2690 2691 rc = spdk_bdev_unquiesce_range(&process->raid_bdev->bdev, &g_raid_if, 2692 process->window_offset, process->max_window_size, 2693 raid_bdev_process_window_range_unlocked, process); 2694 if (rc != 0) { 2695 raid_bdev_process_window_range_unlocked(process, rc); 2696 } 2697 } 2698 2699 static void 2700 raid_bdev_process_channels_update_done(struct spdk_io_channel_iter *i, int status) 2701 { 2702 struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i); 2703 2704 raid_bdev_process_unlock_window_range(process); 2705 } 2706 2707 static void 2708 raid_bdev_process_channel_update(struct spdk_io_channel_iter *i) 2709 { 2710 struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i); 2711 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 2712 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch); 2713 2714 raid_ch->process.offset = process->window_offset + process->window_size; 2715 2716 spdk_for_each_channel_continue(i, 0); 2717 } 2718 2719 void 2720 raid_bdev_process_request_complete(struct raid_bdev_process_request *process_req, int status) 2721 { 2722 struct raid_bdev_process *process = process_req->process; 2723 2724 TAILQ_INSERT_TAIL(&process->requests, process_req, link); 2725 2726 assert(spdk_get_thread() == process->thread); 2727 assert(process->window_remaining >= process_req->num_blocks); 2728 2729 if (status != 0) { 2730 process->window_status = status; 2731 } 2732 2733 process->window_remaining -= process_req->num_blocks; 2734 if (process->window_remaining == 0) { 2735 if (process->window_status != 0) { 2736 raid_bdev_process_finish(process, process->window_status); 2737 return; 2738 } 2739 2740 spdk_for_each_channel(process->raid_bdev, raid_bdev_process_channel_update, process, 2741 raid_bdev_process_channels_update_done); 2742 } 2743 } 2744 2745 static int 2746 raid_bdev_submit_process_request(struct raid_bdev_process *process, uint64_t offset_blocks, 2747 uint32_t num_blocks) 2748 { 2749 struct raid_bdev *raid_bdev = process->raid_bdev; 2750 struct raid_bdev_process_request *process_req; 2751 int ret; 2752 2753 process_req = TAILQ_FIRST(&process->requests); 2754 if (process_req == NULL) { 2755 assert(process->window_remaining > 0); 2756 return 0; 2757 } 2758 2759 process_req->target = process->target; 2760 process_req->target_ch = process->raid_ch->process.target_ch; 2761 process_req->offset_blocks = offset_blocks; 2762 process_req->num_blocks = num_blocks; 2763 process_req->iov.iov_len = num_blocks * raid_bdev->bdev.blocklen; 2764 2765 ret = raid_bdev->module->submit_process_request(process_req, process->raid_ch); 2766 if (ret <= 0) { 2767 if (ret < 0) { 2768 SPDK_ERRLOG("Failed to submit process request on %s: %s\n", 2769 raid_bdev->bdev.name, spdk_strerror(-ret)); 2770 process->window_status = ret; 2771 } 2772 return ret; 2773 } 2774 2775 process_req->num_blocks = ret; 2776 TAILQ_REMOVE(&process->requests, process_req, link); 2777 2778 return ret; 2779 } 2780 2781 static void 2782 _raid_bdev_process_thread_run(struct raid_bdev_process *process) 2783 { 2784 struct raid_bdev *raid_bdev = process->raid_bdev; 2785 uint64_t offset = process->window_offset; 2786 const uint64_t offset_end = spdk_min(offset + process->max_window_size, raid_bdev->bdev.blockcnt); 2787 int ret; 2788 2789 while (offset < offset_end) { 2790 ret = raid_bdev_submit_process_request(process, offset, offset_end - offset); 2791 if (ret <= 0) { 2792 break; 2793 } 2794 2795 process->window_remaining += ret; 2796 offset += ret; 2797 } 2798 2799 if (process->window_remaining > 0) { 2800 process->window_size = process->window_remaining; 2801 } else { 2802 raid_bdev_process_finish(process, process->window_status); 2803 } 2804 } 2805 2806 static void 2807 raid_bdev_process_window_range_locked(void *ctx, int status) 2808 { 2809 struct raid_bdev_process *process = ctx; 2810 2811 if (status != 0) { 2812 SPDK_ERRLOG("Failed to lock LBA range: %s\n", spdk_strerror(-status)); 2813 raid_bdev_process_finish(process, status); 2814 return; 2815 } 2816 2817 process->window_range_locked = true; 2818 2819 if (process->state == RAID_PROCESS_STATE_STOPPING) { 2820 raid_bdev_process_unlock_window_range(process); 2821 return; 2822 } 2823 2824 _raid_bdev_process_thread_run(process); 2825 } 2826 2827 static bool 2828 raid_bdev_process_consume_token(struct raid_bdev_process *process) 2829 { 2830 struct raid_bdev *raid_bdev = process->raid_bdev; 2831 uint64_t now = spdk_get_ticks(); 2832 2833 process->qos.bytes_available = spdk_min(process->qos.bytes_max, 2834 process->qos.bytes_available + 2835 (now - process->qos.last_tsc) * process->qos.bytes_per_tsc); 2836 process->qos.last_tsc = now; 2837 if (process->qos.bytes_available > 0.0) { 2838 process->qos.bytes_available -= process->window_size * raid_bdev->bdev.blocklen; 2839 return true; 2840 } 2841 return false; 2842 } 2843 2844 static bool 2845 raid_bdev_process_lock_window_range(struct raid_bdev_process *process) 2846 { 2847 struct raid_bdev *raid_bdev = process->raid_bdev; 2848 int rc; 2849 2850 assert(process->window_range_locked == false); 2851 2852 if (process->qos.enable_qos) { 2853 if (raid_bdev_process_consume_token(process)) { 2854 spdk_poller_pause(process->qos.process_continue_poller); 2855 } else { 2856 spdk_poller_resume(process->qos.process_continue_poller); 2857 return false; 2858 } 2859 } 2860 2861 rc = spdk_bdev_quiesce_range(&raid_bdev->bdev, &g_raid_if, 2862 process->window_offset, process->max_window_size, 2863 raid_bdev_process_window_range_locked, process); 2864 if (rc != 0) { 2865 raid_bdev_process_window_range_locked(process, rc); 2866 } 2867 return true; 2868 } 2869 2870 static int 2871 raid_bdev_process_continue_poll(void *arg) 2872 { 2873 struct raid_bdev_process *process = arg; 2874 2875 if (raid_bdev_process_lock_window_range(process)) { 2876 return SPDK_POLLER_BUSY; 2877 } 2878 return SPDK_POLLER_IDLE; 2879 } 2880 2881 static void 2882 raid_bdev_process_thread_run(struct raid_bdev_process *process) 2883 { 2884 struct raid_bdev *raid_bdev = process->raid_bdev; 2885 2886 assert(spdk_get_thread() == process->thread); 2887 assert(process->window_remaining == 0); 2888 assert(process->window_range_locked == false); 2889 2890 if (process->state == RAID_PROCESS_STATE_STOPPING) { 2891 raid_bdev_process_do_finish(process); 2892 return; 2893 } 2894 2895 if (process->window_offset == raid_bdev->bdev.blockcnt) { 2896 SPDK_DEBUGLOG(bdev_raid, "process completed on %s\n", raid_bdev->bdev.name); 2897 raid_bdev_process_finish(process, 0); 2898 return; 2899 } 2900 2901 process->max_window_size = spdk_min(raid_bdev->bdev.blockcnt - process->window_offset, 2902 process->max_window_size); 2903 raid_bdev_process_lock_window_range(process); 2904 } 2905 2906 static void 2907 raid_bdev_process_thread_init(void *ctx) 2908 { 2909 struct raid_bdev_process *process = ctx; 2910 struct raid_bdev *raid_bdev = process->raid_bdev; 2911 struct spdk_io_channel *ch; 2912 2913 process->thread = spdk_get_thread(); 2914 2915 ch = spdk_get_io_channel(raid_bdev); 2916 if (ch == NULL) { 2917 process->status = -ENOMEM; 2918 raid_bdev_process_do_finish(process); 2919 return; 2920 } 2921 2922 process->raid_ch = spdk_io_channel_get_ctx(ch); 2923 process->state = RAID_PROCESS_STATE_RUNNING; 2924 2925 if (process->qos.enable_qos) { 2926 process->qos.process_continue_poller = SPDK_POLLER_REGISTER(raid_bdev_process_continue_poll, 2927 process, 0); 2928 spdk_poller_pause(process->qos.process_continue_poller); 2929 } 2930 2931 SPDK_NOTICELOG("Started %s on raid bdev %s\n", 2932 raid_bdev_process_to_str(process->type), raid_bdev->bdev.name); 2933 2934 raid_bdev_process_thread_run(process); 2935 } 2936 2937 static void 2938 raid_bdev_channels_abort_start_process_done(struct spdk_io_channel_iter *i, int status) 2939 { 2940 struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i); 2941 2942 _raid_bdev_remove_base_bdev(process->target, NULL, NULL); 2943 raid_bdev_process_free(process); 2944 2945 /* TODO: update sb */ 2946 } 2947 2948 static void 2949 raid_bdev_channel_abort_start_process(struct spdk_io_channel_iter *i) 2950 { 2951 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 2952 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch); 2953 2954 raid_bdev_ch_process_cleanup(raid_ch); 2955 2956 spdk_for_each_channel_continue(i, 0); 2957 } 2958 2959 static void 2960 raid_bdev_channels_start_process_done(struct spdk_io_channel_iter *i, int status) 2961 { 2962 struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i); 2963 struct raid_bdev *raid_bdev = process->raid_bdev; 2964 struct spdk_thread *thread; 2965 char thread_name[RAID_BDEV_SB_NAME_SIZE + 16]; 2966 2967 if (status == 0 && 2968 (process->target->remove_scheduled || !process->target->is_configured || 2969 raid_bdev->num_base_bdevs_operational <= raid_bdev->min_base_bdevs_operational)) { 2970 /* a base bdev was removed before we got here */ 2971 status = -ENODEV; 2972 } 2973 2974 if (status != 0) { 2975 SPDK_ERRLOG("Failed to start %s on %s: %s\n", 2976 raid_bdev_process_to_str(process->type), raid_bdev->bdev.name, 2977 spdk_strerror(-status)); 2978 goto err; 2979 } 2980 2981 snprintf(thread_name, sizeof(thread_name), "%s_%s", 2982 raid_bdev->bdev.name, raid_bdev_process_to_str(process->type)); 2983 2984 thread = spdk_thread_create(thread_name, NULL); 2985 if (thread == NULL) { 2986 SPDK_ERRLOG("Failed to create %s thread for %s\n", 2987 raid_bdev_process_to_str(process->type), raid_bdev->bdev.name); 2988 goto err; 2989 } 2990 2991 raid_bdev->process = process; 2992 2993 spdk_thread_send_msg(thread, raid_bdev_process_thread_init, process); 2994 2995 return; 2996 err: 2997 spdk_for_each_channel(process->raid_bdev, raid_bdev_channel_abort_start_process, process, 2998 raid_bdev_channels_abort_start_process_done); 2999 } 3000 3001 static void 3002 raid_bdev_channel_start_process(struct spdk_io_channel_iter *i) 3003 { 3004 struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i); 3005 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 3006 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch); 3007 int rc; 3008 3009 rc = raid_bdev_ch_process_setup(raid_ch, process); 3010 3011 spdk_for_each_channel_continue(i, rc); 3012 } 3013 3014 static void 3015 raid_bdev_process_start(struct raid_bdev_process *process) 3016 { 3017 struct raid_bdev *raid_bdev = process->raid_bdev; 3018 3019 assert(raid_bdev->module->submit_process_request != NULL); 3020 3021 spdk_for_each_channel(raid_bdev, raid_bdev_channel_start_process, process, 3022 raid_bdev_channels_start_process_done); 3023 } 3024 3025 static void 3026 raid_bdev_process_request_free(struct raid_bdev_process_request *process_req) 3027 { 3028 spdk_dma_free(process_req->iov.iov_base); 3029 spdk_dma_free(process_req->md_buf); 3030 free(process_req); 3031 } 3032 3033 static struct raid_bdev_process_request * 3034 raid_bdev_process_alloc_request(struct raid_bdev_process *process) 3035 { 3036 struct raid_bdev *raid_bdev = process->raid_bdev; 3037 struct raid_bdev_process_request *process_req; 3038 3039 process_req = calloc(1, sizeof(*process_req)); 3040 if (process_req == NULL) { 3041 return NULL; 3042 } 3043 3044 process_req->process = process; 3045 process_req->iov.iov_len = process->max_window_size * raid_bdev->bdev.blocklen; 3046 process_req->iov.iov_base = spdk_dma_malloc(process_req->iov.iov_len, 4096, 0); 3047 if (process_req->iov.iov_base == NULL) { 3048 free(process_req); 3049 return NULL; 3050 } 3051 if (spdk_bdev_is_md_separate(&raid_bdev->bdev)) { 3052 process_req->md_buf = spdk_dma_malloc(process->max_window_size * raid_bdev->bdev.md_len, 4096, 0); 3053 if (process_req->md_buf == NULL) { 3054 raid_bdev_process_request_free(process_req); 3055 return NULL; 3056 } 3057 } 3058 3059 return process_req; 3060 } 3061 3062 static void 3063 raid_bdev_process_free(struct raid_bdev_process *process) 3064 { 3065 struct raid_bdev_process_request *process_req; 3066 3067 while ((process_req = TAILQ_FIRST(&process->requests)) != NULL) { 3068 TAILQ_REMOVE(&process->requests, process_req, link); 3069 raid_bdev_process_request_free(process_req); 3070 } 3071 3072 free(process); 3073 } 3074 3075 static struct raid_bdev_process * 3076 raid_bdev_process_alloc(struct raid_bdev *raid_bdev, enum raid_process_type type, 3077 struct raid_base_bdev_info *target) 3078 { 3079 struct raid_bdev_process *process; 3080 struct raid_bdev_process_request *process_req; 3081 int i; 3082 3083 process = calloc(1, sizeof(*process)); 3084 if (process == NULL) { 3085 return NULL; 3086 } 3087 3088 process->raid_bdev = raid_bdev; 3089 process->type = type; 3090 process->target = target; 3091 process->max_window_size = spdk_max(spdk_divide_round_up(g_opts.process_window_size_kb * 1024UL, 3092 spdk_bdev_get_data_block_size(&raid_bdev->bdev)), 3093 raid_bdev->bdev.write_unit_size); 3094 TAILQ_INIT(&process->requests); 3095 TAILQ_INIT(&process->finish_actions); 3096 3097 if (g_opts.process_max_bandwidth_mb_sec != 0) { 3098 process->qos.enable_qos = true; 3099 process->qos.last_tsc = spdk_get_ticks(); 3100 process->qos.bytes_per_tsc = g_opts.process_max_bandwidth_mb_sec * 1024 * 1024.0 / 3101 spdk_get_ticks_hz(); 3102 process->qos.bytes_max = g_opts.process_max_bandwidth_mb_sec * 1024 * 1024.0 / SPDK_SEC_TO_MSEC; 3103 process->qos.bytes_available = 0.0; 3104 } 3105 3106 for (i = 0; i < RAID_BDEV_PROCESS_MAX_QD; i++) { 3107 process_req = raid_bdev_process_alloc_request(process); 3108 if (process_req == NULL) { 3109 raid_bdev_process_free(process); 3110 return NULL; 3111 } 3112 3113 TAILQ_INSERT_TAIL(&process->requests, process_req, link); 3114 } 3115 3116 return process; 3117 } 3118 3119 static int 3120 raid_bdev_start_rebuild(struct raid_base_bdev_info *target) 3121 { 3122 struct raid_bdev_process *process; 3123 3124 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 3125 3126 process = raid_bdev_process_alloc(target->raid_bdev, RAID_PROCESS_REBUILD, target); 3127 if (process == NULL) { 3128 return -ENOMEM; 3129 } 3130 3131 raid_bdev_process_start(process); 3132 3133 return 0; 3134 } 3135 3136 static void raid_bdev_configure_base_bdev_cont(struct raid_base_bdev_info *base_info); 3137 3138 static void 3139 _raid_bdev_configure_base_bdev_cont(struct spdk_io_channel_iter *i, int status) 3140 { 3141 struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i); 3142 3143 raid_bdev_configure_base_bdev_cont(base_info); 3144 } 3145 3146 static void 3147 raid_bdev_ch_sync(struct spdk_io_channel_iter *i) 3148 { 3149 spdk_for_each_channel_continue(i, 0); 3150 } 3151 3152 static void 3153 raid_bdev_configure_base_bdev_cont(struct raid_base_bdev_info *base_info) 3154 { 3155 struct raid_bdev *raid_bdev = base_info->raid_bdev; 3156 raid_base_bdev_cb configure_cb; 3157 int rc; 3158 3159 if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs_operational && 3160 base_info->is_process_target == false) { 3161 /* TODO: defer if rebuild in progress on another base bdev */ 3162 assert(raid_bdev->process == NULL); 3163 assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); 3164 base_info->is_process_target = true; 3165 /* To assure is_process_target is set before is_configured when checked in raid_bdev_create_cb() */ 3166 spdk_for_each_channel(raid_bdev, raid_bdev_ch_sync, base_info, _raid_bdev_configure_base_bdev_cont); 3167 return; 3168 } 3169 3170 base_info->is_configured = true; 3171 3172 raid_bdev->num_base_bdevs_discovered++; 3173 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 3174 assert(raid_bdev->num_base_bdevs_operational <= raid_bdev->num_base_bdevs); 3175 assert(raid_bdev->num_base_bdevs_operational >= raid_bdev->min_base_bdevs_operational); 3176 3177 configure_cb = base_info->configure_cb; 3178 base_info->configure_cb = NULL; 3179 /* 3180 * Configure the raid bdev when the number of discovered base bdevs reaches the number 3181 * of base bdevs we know to be operational members of the array. Usually this is equal 3182 * to the total number of base bdevs (num_base_bdevs) but can be less - when the array is 3183 * degraded. 3184 */ 3185 if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs_operational) { 3186 rc = raid_bdev_configure(raid_bdev, configure_cb, base_info->configure_cb_ctx); 3187 if (rc != 0) { 3188 SPDK_ERRLOG("Failed to configure raid bdev: %s\n", spdk_strerror(-rc)); 3189 } else { 3190 configure_cb = NULL; 3191 } 3192 } else if (base_info->is_process_target) { 3193 raid_bdev->num_base_bdevs_operational++; 3194 rc = raid_bdev_start_rebuild(base_info); 3195 if (rc != 0) { 3196 SPDK_ERRLOG("Failed to start rebuild: %s\n", spdk_strerror(-rc)); 3197 _raid_bdev_remove_base_bdev(base_info, NULL, NULL); 3198 } 3199 } else { 3200 rc = 0; 3201 } 3202 3203 if (configure_cb != NULL) { 3204 configure_cb(base_info->configure_cb_ctx, rc); 3205 } 3206 } 3207 3208 static void raid_bdev_examine_sb(const struct raid_bdev_superblock *sb, struct spdk_bdev *bdev, 3209 raid_base_bdev_cb cb_fn, void *cb_ctx); 3210 3211 static void 3212 raid_bdev_configure_base_bdev_check_sb_cb(const struct raid_bdev_superblock *sb, int status, 3213 void *ctx) 3214 { 3215 struct raid_base_bdev_info *base_info = ctx; 3216 raid_base_bdev_cb configure_cb = base_info->configure_cb; 3217 3218 switch (status) { 3219 case 0: 3220 /* valid superblock found */ 3221 base_info->configure_cb = NULL; 3222 if (spdk_uuid_compare(&base_info->raid_bdev->bdev.uuid, &sb->uuid) == 0) { 3223 struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(base_info->desc); 3224 3225 raid_bdev_free_base_bdev_resource(base_info); 3226 raid_bdev_examine_sb(sb, bdev, configure_cb, base_info->configure_cb_ctx); 3227 return; 3228 } 3229 SPDK_ERRLOG("Superblock of a different raid bdev found on bdev %s\n", base_info->name); 3230 status = -EEXIST; 3231 raid_bdev_free_base_bdev_resource(base_info); 3232 break; 3233 case -EINVAL: 3234 /* no valid superblock */ 3235 raid_bdev_configure_base_bdev_cont(base_info); 3236 return; 3237 default: 3238 SPDK_ERRLOG("Failed to examine bdev %s: %s\n", 3239 base_info->name, spdk_strerror(-status)); 3240 break; 3241 } 3242 3243 if (configure_cb != NULL) { 3244 base_info->configure_cb = NULL; 3245 configure_cb(base_info->configure_cb_ctx, status); 3246 } 3247 } 3248 3249 static int 3250 raid_bdev_configure_base_bdev(struct raid_base_bdev_info *base_info, bool existing, 3251 raid_base_bdev_cb cb_fn, void *cb_ctx) 3252 { 3253 struct raid_bdev *raid_bdev = base_info->raid_bdev; 3254 struct spdk_bdev_desc *desc; 3255 struct spdk_bdev *bdev; 3256 const struct spdk_uuid *bdev_uuid; 3257 int rc; 3258 3259 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 3260 assert(base_info->desc == NULL); 3261 3262 /* 3263 * Base bdev can be added by name or uuid. Here we assure both properties are set and valid 3264 * before claiming the bdev. 3265 */ 3266 3267 if (!spdk_uuid_is_null(&base_info->uuid)) { 3268 char uuid_str[SPDK_UUID_STRING_LEN]; 3269 const char *bdev_name; 3270 3271 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &base_info->uuid); 3272 3273 /* UUID of a bdev is registered as its alias */ 3274 bdev = spdk_bdev_get_by_name(uuid_str); 3275 if (bdev == NULL) { 3276 return -ENODEV; 3277 } 3278 3279 bdev_name = spdk_bdev_get_name(bdev); 3280 3281 if (base_info->name == NULL) { 3282 assert(existing == true); 3283 base_info->name = strdup(bdev_name); 3284 if (base_info->name == NULL) { 3285 return -ENOMEM; 3286 } 3287 } else if (strcmp(base_info->name, bdev_name) != 0) { 3288 SPDK_ERRLOG("Name mismatch for base bdev '%s' - expected '%s'\n", 3289 bdev_name, base_info->name); 3290 return -EINVAL; 3291 } 3292 } 3293 3294 assert(base_info->name != NULL); 3295 3296 rc = spdk_bdev_open_ext(base_info->name, true, raid_bdev_event_base_bdev, NULL, &desc); 3297 if (rc != 0) { 3298 if (rc != -ENODEV) { 3299 SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", base_info->name); 3300 } 3301 return rc; 3302 } 3303 3304 bdev = spdk_bdev_desc_get_bdev(desc); 3305 bdev_uuid = spdk_bdev_get_uuid(bdev); 3306 3307 if (spdk_uuid_is_null(&base_info->uuid)) { 3308 spdk_uuid_copy(&base_info->uuid, bdev_uuid); 3309 } else if (spdk_uuid_compare(&base_info->uuid, bdev_uuid) != 0) { 3310 SPDK_ERRLOG("UUID mismatch for base bdev '%s'\n", base_info->name); 3311 spdk_bdev_close(desc); 3312 return -EINVAL; 3313 } 3314 3315 rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if); 3316 if (rc != 0) { 3317 SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); 3318 spdk_bdev_close(desc); 3319 return rc; 3320 } 3321 3322 SPDK_DEBUGLOG(bdev_raid, "bdev %s is claimed\n", bdev->name); 3323 3324 base_info->app_thread_ch = spdk_bdev_get_io_channel(desc); 3325 if (base_info->app_thread_ch == NULL) { 3326 SPDK_ERRLOG("Failed to get io channel\n"); 3327 spdk_bdev_module_release_bdev(bdev); 3328 spdk_bdev_close(desc); 3329 return -ENOMEM; 3330 } 3331 3332 base_info->desc = desc; 3333 base_info->blockcnt = bdev->blockcnt; 3334 3335 if (raid_bdev->superblock_enabled) { 3336 uint64_t data_offset; 3337 3338 if (base_info->data_offset == 0) { 3339 assert((RAID_BDEV_MIN_DATA_OFFSET_SIZE % spdk_bdev_get_data_block_size(bdev)) == 0); 3340 data_offset = RAID_BDEV_MIN_DATA_OFFSET_SIZE / spdk_bdev_get_data_block_size(bdev); 3341 } else { 3342 data_offset = base_info->data_offset; 3343 } 3344 3345 if (bdev->optimal_io_boundary != 0) { 3346 data_offset = spdk_divide_round_up(data_offset, 3347 bdev->optimal_io_boundary) * bdev->optimal_io_boundary; 3348 if (base_info->data_offset != 0 && base_info->data_offset != data_offset) { 3349 SPDK_WARNLOG("Data offset %lu on bdev '%s' is different than optimal value %lu\n", 3350 base_info->data_offset, base_info->name, data_offset); 3351 data_offset = base_info->data_offset; 3352 } 3353 } 3354 3355 base_info->data_offset = data_offset; 3356 } 3357 3358 if (base_info->data_offset >= bdev->blockcnt) { 3359 SPDK_ERRLOG("Data offset %lu exceeds base bdev capacity %lu on bdev '%s'\n", 3360 base_info->data_offset, bdev->blockcnt, base_info->name); 3361 rc = -EINVAL; 3362 goto out; 3363 } 3364 3365 if (base_info->data_size == 0) { 3366 base_info->data_size = bdev->blockcnt - base_info->data_offset; 3367 } else if (base_info->data_offset + base_info->data_size > bdev->blockcnt) { 3368 SPDK_ERRLOG("Data offset and size exceeds base bdev capacity %lu on bdev '%s'\n", 3369 bdev->blockcnt, base_info->name); 3370 rc = -EINVAL; 3371 goto out; 3372 } 3373 3374 if (!raid_bdev->module->dif_supported && spdk_bdev_get_dif_type(bdev) != SPDK_DIF_DISABLE) { 3375 SPDK_ERRLOG("Base bdev '%s' has DIF or DIX enabled - unsupported RAID configuration\n", 3376 bdev->name); 3377 rc = -EINVAL; 3378 goto out; 3379 } 3380 3381 /* 3382 * Set the raid bdev properties if this is the first base bdev configured, 3383 * otherwise - verify. Assumption is that all the base bdevs for any raid bdev should 3384 * have the same blocklen and metadata format. 3385 */ 3386 if (raid_bdev->bdev.blocklen == 0) { 3387 raid_bdev->bdev.blocklen = bdev->blocklen; 3388 raid_bdev->bdev.md_len = spdk_bdev_get_md_size(bdev); 3389 raid_bdev->bdev.md_interleave = spdk_bdev_is_md_interleaved(bdev); 3390 raid_bdev->bdev.dif_type = spdk_bdev_get_dif_type(bdev); 3391 raid_bdev->bdev.dif_check_flags = bdev->dif_check_flags; 3392 raid_bdev->bdev.dif_is_head_of_md = spdk_bdev_is_dif_head_of_md(bdev); 3393 raid_bdev->bdev.dif_pi_format = bdev->dif_pi_format; 3394 } else { 3395 if (raid_bdev->bdev.blocklen != bdev->blocklen) { 3396 SPDK_ERRLOG("Raid bdev '%s' blocklen %u differs from base bdev '%s' blocklen %u\n", 3397 raid_bdev->bdev.name, raid_bdev->bdev.blocklen, bdev->name, bdev->blocklen); 3398 rc = -EINVAL; 3399 goto out; 3400 } 3401 3402 if (raid_bdev->bdev.md_len != spdk_bdev_get_md_size(bdev) || 3403 raid_bdev->bdev.md_interleave != spdk_bdev_is_md_interleaved(bdev) || 3404 raid_bdev->bdev.dif_type != spdk_bdev_get_dif_type(bdev) || 3405 raid_bdev->bdev.dif_check_flags != bdev->dif_check_flags || 3406 raid_bdev->bdev.dif_is_head_of_md != spdk_bdev_is_dif_head_of_md(bdev) || 3407 raid_bdev->bdev.dif_pi_format != bdev->dif_pi_format) { 3408 SPDK_ERRLOG("Raid bdev '%s' has different metadata format than base bdev '%s'\n", 3409 raid_bdev->bdev.name, bdev->name); 3410 rc = -EINVAL; 3411 goto out; 3412 } 3413 } 3414 3415 assert(base_info->configure_cb == NULL); 3416 base_info->configure_cb = cb_fn; 3417 base_info->configure_cb_ctx = cb_ctx; 3418 3419 if (existing) { 3420 raid_bdev_configure_base_bdev_cont(base_info); 3421 } else { 3422 /* check for existing superblock when using a new bdev */ 3423 rc = raid_bdev_load_base_bdev_superblock(desc, base_info->app_thread_ch, 3424 raid_bdev_configure_base_bdev_check_sb_cb, base_info); 3425 if (rc) { 3426 SPDK_ERRLOG("Failed to read bdev %s superblock: %s\n", 3427 bdev->name, spdk_strerror(-rc)); 3428 } 3429 } 3430 out: 3431 if (rc != 0) { 3432 base_info->configure_cb = NULL; 3433 raid_bdev_free_base_bdev_resource(base_info); 3434 } 3435 return rc; 3436 } 3437 3438 int 3439 raid_bdev_add_base_bdev(struct raid_bdev *raid_bdev, const char *name, 3440 raid_base_bdev_cb cb_fn, void *cb_ctx) 3441 { 3442 struct raid_base_bdev_info *base_info = NULL, *iter; 3443 int rc; 3444 3445 assert(name != NULL); 3446 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 3447 3448 if (raid_bdev->process != NULL) { 3449 SPDK_ERRLOG("raid bdev '%s' is in process\n", 3450 raid_bdev->bdev.name); 3451 return -EPERM; 3452 } 3453 3454 if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 3455 struct spdk_bdev *bdev = spdk_bdev_get_by_name(name); 3456 3457 if (bdev != NULL) { 3458 RAID_FOR_EACH_BASE_BDEV(raid_bdev, iter) { 3459 if (iter->name == NULL && 3460 spdk_uuid_compare(&bdev->uuid, &iter->uuid) == 0) { 3461 base_info = iter; 3462 break; 3463 } 3464 } 3465 } 3466 } 3467 3468 if (base_info == NULL || raid_bdev->state == RAID_BDEV_STATE_ONLINE) { 3469 RAID_FOR_EACH_BASE_BDEV(raid_bdev, iter) { 3470 if (iter->name == NULL && spdk_uuid_is_null(&iter->uuid)) { 3471 base_info = iter; 3472 break; 3473 } 3474 } 3475 } 3476 3477 if (base_info == NULL) { 3478 SPDK_ERRLOG("no empty slot found in raid bdev '%s' for new base bdev '%s'\n", 3479 raid_bdev->bdev.name, name); 3480 return -EINVAL; 3481 } 3482 3483 assert(base_info->is_configured == false); 3484 3485 if (raid_bdev->state == RAID_BDEV_STATE_ONLINE) { 3486 assert(base_info->data_size != 0); 3487 assert(base_info->desc == NULL); 3488 } 3489 3490 base_info->name = strdup(name); 3491 if (base_info->name == NULL) { 3492 return -ENOMEM; 3493 } 3494 3495 rc = raid_bdev_configure_base_bdev(base_info, false, cb_fn, cb_ctx); 3496 if (rc != 0 && (rc != -ENODEV || raid_bdev->state != RAID_BDEV_STATE_CONFIGURING)) { 3497 SPDK_ERRLOG("base bdev '%s' configure failed: %s\n", name, spdk_strerror(-rc)); 3498 free(base_info->name); 3499 base_info->name = NULL; 3500 } 3501 3502 return rc; 3503 } 3504 3505 static int 3506 raid_bdev_create_from_sb(const struct raid_bdev_superblock *sb, struct raid_bdev **raid_bdev_out) 3507 { 3508 struct raid_bdev *raid_bdev; 3509 uint8_t i; 3510 int rc; 3511 3512 rc = _raid_bdev_create(sb->name, (sb->strip_size * sb->block_size) / 1024, sb->num_base_bdevs, 3513 sb->level, true, &sb->uuid, &raid_bdev); 3514 if (rc != 0) { 3515 return rc; 3516 } 3517 3518 rc = raid_bdev_alloc_superblock(raid_bdev, sb->block_size); 3519 if (rc != 0) { 3520 raid_bdev_free(raid_bdev); 3521 return rc; 3522 } 3523 3524 assert(sb->length <= RAID_BDEV_SB_MAX_LENGTH); 3525 memcpy(raid_bdev->sb, sb, sb->length); 3526 3527 for (i = 0; i < sb->base_bdevs_size; i++) { 3528 const struct raid_bdev_sb_base_bdev *sb_base_bdev = &sb->base_bdevs[i]; 3529 struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[sb_base_bdev->slot]; 3530 3531 if (sb_base_bdev->state == RAID_SB_BASE_BDEV_CONFIGURED) { 3532 spdk_uuid_copy(&base_info->uuid, &sb_base_bdev->uuid); 3533 raid_bdev->num_base_bdevs_operational++; 3534 } 3535 3536 base_info->data_offset = sb_base_bdev->data_offset; 3537 base_info->data_size = sb_base_bdev->data_size; 3538 } 3539 3540 *raid_bdev_out = raid_bdev; 3541 return 0; 3542 } 3543 3544 static void 3545 raid_bdev_examine_no_sb(struct spdk_bdev *bdev) 3546 { 3547 struct raid_bdev *raid_bdev; 3548 struct raid_base_bdev_info *base_info; 3549 3550 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 3551 if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING || raid_bdev->sb != NULL) { 3552 continue; 3553 } 3554 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 3555 if (base_info->desc == NULL && 3556 ((base_info->name != NULL && strcmp(bdev->name, base_info->name) == 0) || 3557 spdk_uuid_compare(&base_info->uuid, &bdev->uuid) == 0)) { 3558 raid_bdev_configure_base_bdev(base_info, true, NULL, NULL); 3559 break; 3560 } 3561 } 3562 } 3563 } 3564 3565 struct raid_bdev_examine_others_ctx { 3566 struct spdk_uuid raid_bdev_uuid; 3567 uint8_t current_base_bdev_idx; 3568 raid_base_bdev_cb cb_fn; 3569 void *cb_ctx; 3570 }; 3571 3572 static void 3573 raid_bdev_examine_others_done(void *_ctx, int status) 3574 { 3575 struct raid_bdev_examine_others_ctx *ctx = _ctx; 3576 3577 if (ctx->cb_fn != NULL) { 3578 ctx->cb_fn(ctx->cb_ctx, status); 3579 } 3580 free(ctx); 3581 } 3582 3583 typedef void (*raid_bdev_examine_load_sb_cb)(struct spdk_bdev *bdev, 3584 const struct raid_bdev_superblock *sb, int status, void *ctx); 3585 static int raid_bdev_examine_load_sb(const char *bdev_name, raid_bdev_examine_load_sb_cb cb, 3586 void *cb_ctx); 3587 static void raid_bdev_examine_sb(const struct raid_bdev_superblock *sb, struct spdk_bdev *bdev, 3588 raid_base_bdev_cb cb_fn, void *cb_ctx); 3589 static void raid_bdev_examine_others(void *_ctx, int status); 3590 3591 static void 3592 raid_bdev_examine_others_load_cb(struct spdk_bdev *bdev, const struct raid_bdev_superblock *sb, 3593 int status, void *_ctx) 3594 { 3595 struct raid_bdev_examine_others_ctx *ctx = _ctx; 3596 3597 if (status != 0) { 3598 raid_bdev_examine_others_done(ctx, status); 3599 return; 3600 } 3601 3602 raid_bdev_examine_sb(sb, bdev, raid_bdev_examine_others, ctx); 3603 } 3604 3605 static void 3606 raid_bdev_examine_others(void *_ctx, int status) 3607 { 3608 struct raid_bdev_examine_others_ctx *ctx = _ctx; 3609 struct raid_bdev *raid_bdev; 3610 struct raid_base_bdev_info *base_info; 3611 char uuid_str[SPDK_UUID_STRING_LEN]; 3612 3613 if (status != 0 && status != -EEXIST) { 3614 goto out; 3615 } 3616 3617 raid_bdev = raid_bdev_find_by_uuid(&ctx->raid_bdev_uuid); 3618 if (raid_bdev == NULL) { 3619 status = -ENODEV; 3620 goto out; 3621 } 3622 3623 for (base_info = &raid_bdev->base_bdev_info[ctx->current_base_bdev_idx]; 3624 base_info < &raid_bdev->base_bdev_info[raid_bdev->num_base_bdevs]; 3625 base_info++) { 3626 if (base_info->is_configured || spdk_uuid_is_null(&base_info->uuid)) { 3627 continue; 3628 } 3629 3630 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &base_info->uuid); 3631 3632 if (spdk_bdev_get_by_name(uuid_str) == NULL) { 3633 continue; 3634 } 3635 3636 ctx->current_base_bdev_idx = raid_bdev_base_bdev_slot(base_info); 3637 3638 status = raid_bdev_examine_load_sb(uuid_str, raid_bdev_examine_others_load_cb, ctx); 3639 if (status != 0) { 3640 continue; 3641 } 3642 return; 3643 } 3644 out: 3645 raid_bdev_examine_others_done(ctx, status); 3646 } 3647 3648 static void 3649 raid_bdev_examine_sb(const struct raid_bdev_superblock *sb, struct spdk_bdev *bdev, 3650 raid_base_bdev_cb cb_fn, void *cb_ctx) 3651 { 3652 const struct raid_bdev_sb_base_bdev *sb_base_bdev = NULL; 3653 struct raid_bdev *raid_bdev; 3654 struct raid_base_bdev_info *iter, *base_info; 3655 uint8_t i; 3656 int rc; 3657 3658 if (sb->block_size != spdk_bdev_get_data_block_size(bdev)) { 3659 SPDK_WARNLOG("Bdev %s block size (%u) does not match the value in superblock (%u)\n", 3660 bdev->name, sb->block_size, spdk_bdev_get_data_block_size(bdev)); 3661 rc = -EINVAL; 3662 goto out; 3663 } 3664 3665 if (spdk_uuid_is_null(&sb->uuid)) { 3666 SPDK_WARNLOG("NULL raid bdev UUID in superblock on bdev %s\n", bdev->name); 3667 rc = -EINVAL; 3668 goto out; 3669 } 3670 3671 raid_bdev = raid_bdev_find_by_uuid(&sb->uuid); 3672 3673 if (raid_bdev) { 3674 if (sb->seq_number > raid_bdev->sb->seq_number) { 3675 SPDK_DEBUGLOG(bdev_raid, 3676 "raid superblock seq_number on bdev %s (%lu) greater than existing raid bdev %s (%lu)\n", 3677 bdev->name, sb->seq_number, raid_bdev->bdev.name, raid_bdev->sb->seq_number); 3678 3679 if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) { 3680 SPDK_WARNLOG("Newer version of raid bdev %s superblock found on bdev %s but raid bdev is not in configuring state.\n", 3681 raid_bdev->bdev.name, bdev->name); 3682 rc = -EBUSY; 3683 goto out; 3684 } 3685 3686 /* remove and then recreate the raid bdev using the newer superblock */ 3687 raid_bdev_delete(raid_bdev, NULL, NULL); 3688 raid_bdev = NULL; 3689 } else if (sb->seq_number < raid_bdev->sb->seq_number) { 3690 SPDK_DEBUGLOG(bdev_raid, 3691 "raid superblock seq_number on bdev %s (%lu) smaller than existing raid bdev %s (%lu)\n", 3692 bdev->name, sb->seq_number, raid_bdev->bdev.name, raid_bdev->sb->seq_number); 3693 /* use the current raid bdev superblock */ 3694 sb = raid_bdev->sb; 3695 } 3696 } 3697 3698 for (i = 0; i < sb->base_bdevs_size; i++) { 3699 sb_base_bdev = &sb->base_bdevs[i]; 3700 3701 assert(spdk_uuid_is_null(&sb_base_bdev->uuid) == false); 3702 3703 if (spdk_uuid_compare(&sb_base_bdev->uuid, spdk_bdev_get_uuid(bdev)) == 0) { 3704 break; 3705 } 3706 } 3707 3708 if (i == sb->base_bdevs_size) { 3709 SPDK_DEBUGLOG(bdev_raid, "raid superblock does not contain this bdev's uuid\n"); 3710 rc = -EINVAL; 3711 goto out; 3712 } 3713 3714 if (!raid_bdev) { 3715 struct raid_bdev_examine_others_ctx *ctx; 3716 3717 ctx = calloc(1, sizeof(*ctx)); 3718 if (ctx == NULL) { 3719 rc = -ENOMEM; 3720 goto out; 3721 } 3722 3723 rc = raid_bdev_create_from_sb(sb, &raid_bdev); 3724 if (rc != 0) { 3725 SPDK_ERRLOG("Failed to create raid bdev %s: %s\n", 3726 sb->name, spdk_strerror(-rc)); 3727 free(ctx); 3728 goto out; 3729 } 3730 3731 /* after this base bdev is configured, examine other base bdevs that may be present */ 3732 spdk_uuid_copy(&ctx->raid_bdev_uuid, &sb->uuid); 3733 ctx->cb_fn = cb_fn; 3734 ctx->cb_ctx = cb_ctx; 3735 3736 cb_fn = raid_bdev_examine_others; 3737 cb_ctx = ctx; 3738 } 3739 3740 if (raid_bdev->state == RAID_BDEV_STATE_ONLINE) { 3741 assert(sb_base_bdev->slot < raid_bdev->num_base_bdevs); 3742 base_info = &raid_bdev->base_bdev_info[sb_base_bdev->slot]; 3743 assert(base_info->is_configured == false); 3744 assert(sb_base_bdev->state == RAID_SB_BASE_BDEV_MISSING || 3745 sb_base_bdev->state == RAID_SB_BASE_BDEV_FAILED); 3746 assert(spdk_uuid_is_null(&base_info->uuid)); 3747 spdk_uuid_copy(&base_info->uuid, &sb_base_bdev->uuid); 3748 SPDK_NOTICELOG("Re-adding bdev %s to raid bdev %s.\n", bdev->name, raid_bdev->bdev.name); 3749 rc = raid_bdev_configure_base_bdev(base_info, true, cb_fn, cb_ctx); 3750 if (rc != 0) { 3751 SPDK_ERRLOG("Failed to configure bdev %s as base bdev of raid %s: %s\n", 3752 bdev->name, raid_bdev->bdev.name, spdk_strerror(-rc)); 3753 } 3754 goto out; 3755 } 3756 3757 if (sb_base_bdev->state != RAID_SB_BASE_BDEV_CONFIGURED) { 3758 SPDK_NOTICELOG("Bdev %s is not an active member of raid bdev %s. Ignoring.\n", 3759 bdev->name, raid_bdev->bdev.name); 3760 rc = -EINVAL; 3761 goto out; 3762 } 3763 3764 base_info = NULL; 3765 RAID_FOR_EACH_BASE_BDEV(raid_bdev, iter) { 3766 if (spdk_uuid_compare(&iter->uuid, spdk_bdev_get_uuid(bdev)) == 0) { 3767 base_info = iter; 3768 break; 3769 } 3770 } 3771 3772 if (base_info == NULL) { 3773 SPDK_ERRLOG("Bdev %s is not a member of raid bdev %s\n", 3774 bdev->name, raid_bdev->bdev.name); 3775 rc = -EINVAL; 3776 goto out; 3777 } 3778 3779 if (base_info->is_configured) { 3780 rc = -EEXIST; 3781 goto out; 3782 } 3783 3784 rc = raid_bdev_configure_base_bdev(base_info, true, cb_fn, cb_ctx); 3785 if (rc != 0) { 3786 SPDK_ERRLOG("Failed to configure bdev %s as base bdev of raid %s: %s\n", 3787 bdev->name, raid_bdev->bdev.name, spdk_strerror(-rc)); 3788 } 3789 out: 3790 if (rc != 0 && cb_fn != 0) { 3791 cb_fn(cb_ctx, rc); 3792 } 3793 } 3794 3795 struct raid_bdev_examine_ctx { 3796 struct spdk_bdev_desc *desc; 3797 struct spdk_io_channel *ch; 3798 raid_bdev_examine_load_sb_cb cb; 3799 void *cb_ctx; 3800 }; 3801 3802 static void 3803 raid_bdev_examine_ctx_free(struct raid_bdev_examine_ctx *ctx) 3804 { 3805 if (!ctx) { 3806 return; 3807 } 3808 3809 if (ctx->ch) { 3810 spdk_put_io_channel(ctx->ch); 3811 } 3812 3813 if (ctx->desc) { 3814 spdk_bdev_close(ctx->desc); 3815 } 3816 3817 free(ctx); 3818 } 3819 3820 static void 3821 raid_bdev_examine_load_sb_done(const struct raid_bdev_superblock *sb, int status, void *_ctx) 3822 { 3823 struct raid_bdev_examine_ctx *ctx = _ctx; 3824 struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(ctx->desc); 3825 3826 ctx->cb(bdev, sb, status, ctx->cb_ctx); 3827 3828 raid_bdev_examine_ctx_free(ctx); 3829 } 3830 3831 static void 3832 raid_bdev_examine_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) 3833 { 3834 } 3835 3836 static int 3837 raid_bdev_examine_load_sb(const char *bdev_name, raid_bdev_examine_load_sb_cb cb, void *cb_ctx) 3838 { 3839 struct raid_bdev_examine_ctx *ctx; 3840 int rc; 3841 3842 assert(cb != NULL); 3843 3844 ctx = calloc(1, sizeof(*ctx)); 3845 if (!ctx) { 3846 return -ENOMEM; 3847 } 3848 3849 rc = spdk_bdev_open_ext(bdev_name, false, raid_bdev_examine_event_cb, NULL, &ctx->desc); 3850 if (rc) { 3851 SPDK_ERRLOG("Failed to open bdev %s: %s\n", bdev_name, spdk_strerror(-rc)); 3852 goto err; 3853 } 3854 3855 ctx->ch = spdk_bdev_get_io_channel(ctx->desc); 3856 if (!ctx->ch) { 3857 SPDK_ERRLOG("Failed to get io channel for bdev %s\n", bdev_name); 3858 rc = -ENOMEM; 3859 goto err; 3860 } 3861 3862 ctx->cb = cb; 3863 ctx->cb_ctx = cb_ctx; 3864 3865 rc = raid_bdev_load_base_bdev_superblock(ctx->desc, ctx->ch, raid_bdev_examine_load_sb_done, ctx); 3866 if (rc) { 3867 SPDK_ERRLOG("Failed to read bdev %s superblock: %s\n", 3868 bdev_name, spdk_strerror(-rc)); 3869 goto err; 3870 } 3871 3872 return 0; 3873 err: 3874 raid_bdev_examine_ctx_free(ctx); 3875 return rc; 3876 } 3877 3878 static void 3879 raid_bdev_examine_done(void *ctx, int status) 3880 { 3881 struct spdk_bdev *bdev = ctx; 3882 3883 if (status != 0) { 3884 SPDK_ERRLOG("Failed to examine bdev %s: %s\n", 3885 bdev->name, spdk_strerror(-status)); 3886 } 3887 spdk_bdev_module_examine_done(&g_raid_if); 3888 } 3889 3890 static void 3891 raid_bdev_examine_cont(struct spdk_bdev *bdev, const struct raid_bdev_superblock *sb, int status, 3892 void *ctx) 3893 { 3894 switch (status) { 3895 case 0: 3896 /* valid superblock found */ 3897 SPDK_DEBUGLOG(bdev_raid, "raid superblock found on bdev %s\n", bdev->name); 3898 raid_bdev_examine_sb(sb, bdev, raid_bdev_examine_done, bdev); 3899 return; 3900 case -EINVAL: 3901 /* no valid superblock, check if it can be claimed anyway */ 3902 raid_bdev_examine_no_sb(bdev); 3903 status = 0; 3904 break; 3905 } 3906 3907 raid_bdev_examine_done(bdev, status); 3908 } 3909 3910 /* 3911 * brief: 3912 * raid_bdev_examine function is the examine function call by the below layers 3913 * like bdev_nvme layer. This function will check if this base bdev can be 3914 * claimed by this raid bdev or not. 3915 * params: 3916 * bdev - pointer to base bdev 3917 * returns: 3918 * none 3919 */ 3920 static void 3921 raid_bdev_examine(struct spdk_bdev *bdev) 3922 { 3923 int rc = 0; 3924 3925 if (raid_bdev_find_base_info_by_bdev(bdev) != NULL) { 3926 goto done; 3927 } 3928 3929 if (spdk_bdev_get_dif_type(bdev) != SPDK_DIF_DISABLE) { 3930 raid_bdev_examine_no_sb(bdev); 3931 goto done; 3932 } 3933 3934 rc = raid_bdev_examine_load_sb(bdev->name, raid_bdev_examine_cont, NULL); 3935 if (rc != 0) { 3936 goto done; 3937 } 3938 3939 return; 3940 done: 3941 raid_bdev_examine_done(bdev, rc); 3942 } 3943 3944 /* Log component for bdev raid bdev module */ 3945 SPDK_LOG_REGISTER_COMPONENT(bdev_raid) 3946 3947 static void 3948 bdev_raid_trace(void) 3949 { 3950 struct spdk_trace_tpoint_opts opts[] = { 3951 { 3952 "BDEV_RAID_IO_START", TRACE_BDEV_RAID_IO_START, 3953 OWNER_TYPE_NONE, OBJECT_BDEV_RAID_IO, 1, 3954 {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }} 3955 }, 3956 { 3957 "BDEV_RAID_IO_DONE", TRACE_BDEV_RAID_IO_DONE, 3958 OWNER_TYPE_NONE, OBJECT_BDEV_RAID_IO, 0, 3959 {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }} 3960 } 3961 }; 3962 3963 3964 spdk_trace_register_object(OBJECT_BDEV_RAID_IO, 'R'); 3965 spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts)); 3966 spdk_trace_tpoint_register_relation(TRACE_BDEV_IO_START, OBJECT_BDEV_RAID_IO, 1); 3967 spdk_trace_tpoint_register_relation(TRACE_BDEV_IO_DONE, OBJECT_BDEV_RAID_IO, 0); 3968 } 3969 SPDK_TRACE_REGISTER_FN(bdev_raid_trace, "bdev_raid", TRACE_GROUP_BDEV_RAID) 3970