1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. 3 * All rights reserved. 4 * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "bdev_raid.h" 8 #include "spdk/env.h" 9 #include "spdk/thread.h" 10 #include "spdk/log.h" 11 #include "spdk/string.h" 12 #include "spdk/util.h" 13 #include "spdk/json.h" 14 15 static bool g_shutdown_started = false; 16 17 /* List of all raid bdevs */ 18 struct raid_all_tailq g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list); 19 20 static TAILQ_HEAD(, raid_bdev_module) g_raid_modules = TAILQ_HEAD_INITIALIZER(g_raid_modules); 21 22 static struct raid_bdev_module * 23 raid_bdev_module_find(enum raid_level level) 24 { 25 struct raid_bdev_module *raid_module; 26 27 TAILQ_FOREACH(raid_module, &g_raid_modules, link) { 28 if (raid_module->level == level) { 29 return raid_module; 30 } 31 } 32 33 return NULL; 34 } 35 36 void 37 raid_bdev_module_list_add(struct raid_bdev_module *raid_module) 38 { 39 if (raid_bdev_module_find(raid_module->level) != NULL) { 40 SPDK_ERRLOG("module for raid level '%s' already registered.\n", 41 raid_bdev_level_to_str(raid_module->level)); 42 assert(false); 43 } else { 44 TAILQ_INSERT_TAIL(&g_raid_modules, raid_module, link); 45 } 46 } 47 48 /* Function declarations */ 49 static void raid_bdev_examine(struct spdk_bdev *bdev); 50 static int raid_bdev_init(void); 51 static void raid_bdev_deconfigure(struct raid_bdev *raid_bdev, 52 raid_bdev_destruct_cb cb_fn, void *cb_arg); 53 54 /* 55 * brief: 56 * raid_bdev_create_cb function is a cb function for raid bdev which creates the 57 * hierarchy from raid bdev to base bdev io channels. It will be called per core 58 * params: 59 * io_device - pointer to raid bdev io device represented by raid_bdev 60 * ctx_buf - pointer to context buffer for raid bdev io channel 61 * returns: 62 * 0 - success 63 * non zero - failure 64 */ 65 static int 66 raid_bdev_create_cb(void *io_device, void *ctx_buf) 67 { 68 struct raid_bdev *raid_bdev = io_device; 69 struct raid_bdev_io_channel *raid_ch = ctx_buf; 70 uint8_t i; 71 int ret = 0; 72 73 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_create_cb, %p\n", raid_ch); 74 75 assert(raid_bdev != NULL); 76 assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); 77 78 raid_ch->num_channels = raid_bdev->num_base_bdevs; 79 80 raid_ch->base_channel = calloc(raid_ch->num_channels, 81 sizeof(struct spdk_io_channel *)); 82 if (!raid_ch->base_channel) { 83 SPDK_ERRLOG("Unable to allocate base bdevs io channel\n"); 84 return -ENOMEM; 85 } 86 87 spdk_spin_lock(&raid_bdev->base_bdev_lock); 88 for (i = 0; i < raid_ch->num_channels; i++) { 89 /* 90 * Get the spdk_io_channel for all the base bdevs. This is used during 91 * split logic to send the respective child bdev ios to respective base 92 * bdev io channel. 93 */ 94 if (raid_bdev->base_bdev_info[i].desc == NULL) { 95 continue; 96 } 97 raid_ch->base_channel[i] = spdk_bdev_get_io_channel( 98 raid_bdev->base_bdev_info[i].desc); 99 if (!raid_ch->base_channel[i]) { 100 SPDK_ERRLOG("Unable to create io channel for base bdev\n"); 101 ret = -ENOMEM; 102 break; 103 } 104 } 105 spdk_spin_unlock(&raid_bdev->base_bdev_lock); 106 107 if (!ret && raid_bdev->module->get_io_channel) { 108 raid_ch->module_channel = raid_bdev->module->get_io_channel(raid_bdev); 109 if (!raid_ch->module_channel) { 110 SPDK_ERRLOG("Unable to create io channel for raid module\n"); 111 ret = -ENOMEM; 112 } 113 } 114 115 if (ret) { 116 for (i = 0; i < raid_ch->num_channels; i++) { 117 if (raid_ch->base_channel[i] != NULL) { 118 spdk_put_io_channel(raid_ch->base_channel[i]); 119 } 120 } 121 free(raid_ch->base_channel); 122 raid_ch->base_channel = NULL; 123 } 124 return ret; 125 } 126 127 /* 128 * brief: 129 * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the 130 * hierarchy from raid bdev to base bdev io channels. It will be called per core 131 * params: 132 * io_device - pointer to raid bdev io device represented by raid_bdev 133 * ctx_buf - pointer to context buffer for raid bdev io channel 134 * returns: 135 * none 136 */ 137 static void 138 raid_bdev_destroy_cb(void *io_device, void *ctx_buf) 139 { 140 struct raid_bdev_io_channel *raid_ch = ctx_buf; 141 uint8_t i; 142 143 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destroy_cb\n"); 144 145 assert(raid_ch != NULL); 146 assert(raid_ch->base_channel); 147 148 if (raid_ch->module_channel) { 149 spdk_put_io_channel(raid_ch->module_channel); 150 } 151 152 for (i = 0; i < raid_ch->num_channels; i++) { 153 /* Free base bdev channels */ 154 if (raid_ch->base_channel[i] != NULL) { 155 spdk_put_io_channel(raid_ch->base_channel[i]); 156 } 157 } 158 free(raid_ch->base_channel); 159 raid_ch->base_channel = NULL; 160 } 161 162 /* 163 * brief: 164 * raid_bdev_cleanup is used to cleanup raid_bdev related data 165 * structures. 166 * params: 167 * raid_bdev - pointer to raid_bdev 168 * returns: 169 * none 170 */ 171 static void 172 raid_bdev_cleanup(struct raid_bdev *raid_bdev) 173 { 174 struct raid_base_bdev_info *base_info; 175 176 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_cleanup, %p name %s, state %s\n", 177 raid_bdev, raid_bdev->bdev.name, raid_bdev_state_to_str(raid_bdev->state)); 178 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 179 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 180 181 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 182 assert(base_info->desc == NULL); 183 free(base_info->name); 184 } 185 186 TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link); 187 } 188 189 static void 190 raid_bdev_free(struct raid_bdev *raid_bdev) 191 { 192 spdk_spin_destroy(&raid_bdev->base_bdev_lock); 193 free(raid_bdev->base_bdev_info); 194 free(raid_bdev->bdev.name); 195 free(raid_bdev); 196 } 197 198 static void 199 raid_bdev_cleanup_and_free(struct raid_bdev *raid_bdev) 200 { 201 raid_bdev_cleanup(raid_bdev); 202 raid_bdev_free(raid_bdev); 203 } 204 205 /* 206 * brief: 207 * free resource of base bdev for raid bdev 208 * params: 209 * base_info - raid base bdev info 210 * returns: 211 * 0 - success 212 * non zero - failure 213 */ 214 static void 215 raid_bdev_free_base_bdev_resource(struct raid_base_bdev_info *base_info) 216 { 217 struct raid_bdev *raid_bdev = base_info->raid_bdev; 218 219 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 220 221 free(base_info->name); 222 base_info->name = NULL; 223 224 if (base_info->desc == NULL) { 225 return; 226 } 227 228 spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(base_info->desc)); 229 spdk_bdev_close(base_info->desc); 230 base_info->desc = NULL; 231 232 assert(raid_bdev->num_base_bdevs_discovered); 233 raid_bdev->num_base_bdevs_discovered--; 234 } 235 236 static void 237 raid_bdev_io_device_unregister_cb(void *io_device) 238 { 239 struct raid_bdev *raid_bdev = io_device; 240 241 if (raid_bdev->num_base_bdevs_discovered == 0) { 242 /* Free raid_bdev when there are no base bdevs left */ 243 SPDK_DEBUGLOG(bdev_raid, "raid bdev base bdevs is 0, going to free all in destruct\n"); 244 raid_bdev_cleanup(raid_bdev); 245 spdk_bdev_destruct_done(&raid_bdev->bdev, 0); 246 raid_bdev_free(raid_bdev); 247 } else { 248 spdk_bdev_destruct_done(&raid_bdev->bdev, 0); 249 } 250 } 251 252 void 253 raid_bdev_module_stop_done(struct raid_bdev *raid_bdev) 254 { 255 if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) { 256 spdk_io_device_unregister(raid_bdev, raid_bdev_io_device_unregister_cb); 257 } 258 } 259 260 static void 261 _raid_bdev_destruct(void *ctxt) 262 { 263 struct raid_bdev *raid_bdev = ctxt; 264 struct raid_base_bdev_info *base_info; 265 266 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destruct\n"); 267 268 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 269 /* 270 * Close all base bdev descriptors for which call has come from below 271 * layers. Also close the descriptors if we have started shutdown. 272 */ 273 if (g_shutdown_started || base_info->remove_scheduled == true) { 274 raid_bdev_free_base_bdev_resource(base_info); 275 } 276 } 277 278 if (g_shutdown_started) { 279 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 280 } 281 282 if (raid_bdev->module->stop != NULL) { 283 if (raid_bdev->module->stop(raid_bdev) == false) { 284 return; 285 } 286 } 287 288 raid_bdev_module_stop_done(raid_bdev); 289 } 290 291 static int 292 raid_bdev_destruct(void *ctx) 293 { 294 spdk_thread_exec_msg(spdk_thread_get_app_thread(), _raid_bdev_destruct, ctx); 295 296 return 1; 297 } 298 299 void 300 raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status) 301 { 302 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 303 304 spdk_bdev_io_complete(bdev_io, status); 305 } 306 307 /* 308 * brief: 309 * raid_bdev_io_complete_part - signal the completion of a part of the expected 310 * base bdev IOs and complete the raid_io if this is the final expected IO. 311 * The caller should first set raid_io->base_bdev_io_remaining. This function 312 * will decrement this counter by the value of the 'completed' parameter and 313 * complete the raid_io if the counter reaches 0. The caller is free to 314 * interpret the 'base_bdev_io_remaining' and 'completed' values as needed, 315 * it can represent e.g. blocks or IOs. 316 * params: 317 * raid_io - pointer to raid_bdev_io 318 * completed - the part of the raid_io that has been completed 319 * status - status of the base IO 320 * returns: 321 * true - if the raid_io is completed 322 * false - otherwise 323 */ 324 bool 325 raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed, 326 enum spdk_bdev_io_status status) 327 { 328 assert(raid_io->base_bdev_io_remaining >= completed); 329 raid_io->base_bdev_io_remaining -= completed; 330 331 if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { 332 raid_io->base_bdev_io_status = status; 333 } 334 335 if (raid_io->base_bdev_io_remaining == 0) { 336 raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); 337 return true; 338 } else { 339 return false; 340 } 341 } 342 343 /* 344 * brief: 345 * raid_bdev_queue_io_wait function processes the IO which failed to submit. 346 * It will try to queue the IOs after storing the context to bdev wait queue logic. 347 * params: 348 * raid_io - pointer to raid_bdev_io 349 * bdev - the block device that the IO is submitted to 350 * ch - io channel 351 * cb_fn - callback when the spdk_bdev_io for bdev becomes available 352 * returns: 353 * none 354 */ 355 void 356 raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, 357 struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn) 358 { 359 raid_io->waitq_entry.bdev = bdev; 360 raid_io->waitq_entry.cb_fn = cb_fn; 361 raid_io->waitq_entry.cb_arg = raid_io; 362 spdk_bdev_queue_io_wait(bdev, ch, &raid_io->waitq_entry); 363 } 364 365 static void 366 raid_base_bdev_reset_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 367 { 368 struct raid_bdev_io *raid_io = cb_arg; 369 370 spdk_bdev_free_io(bdev_io); 371 372 raid_bdev_io_complete_part(raid_io, 1, success ? 373 SPDK_BDEV_IO_STATUS_SUCCESS : 374 SPDK_BDEV_IO_STATUS_FAILED); 375 } 376 377 static void raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io); 378 379 static void 380 _raid_bdev_submit_reset_request(void *_raid_io) 381 { 382 struct raid_bdev_io *raid_io = _raid_io; 383 384 raid_bdev_submit_reset_request(raid_io); 385 } 386 387 /* 388 * brief: 389 * raid_bdev_submit_reset_request function submits reset requests 390 * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in 391 * which case it will queue it for later submission 392 * params: 393 * raid_io 394 * returns: 395 * none 396 */ 397 static void 398 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io) 399 { 400 struct raid_bdev *raid_bdev; 401 int ret; 402 uint8_t i; 403 struct raid_base_bdev_info *base_info; 404 struct spdk_io_channel *base_ch; 405 406 raid_bdev = raid_io->raid_bdev; 407 408 if (raid_io->base_bdev_io_remaining == 0) { 409 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; 410 } 411 412 for (i = raid_io->base_bdev_io_submitted; i < raid_bdev->num_base_bdevs; i++) { 413 base_info = &raid_bdev->base_bdev_info[i]; 414 base_ch = raid_io->raid_ch->base_channel[i]; 415 if (base_ch == NULL) { 416 raid_io->base_bdev_io_submitted++; 417 raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS); 418 continue; 419 } 420 ret = spdk_bdev_reset(base_info->desc, base_ch, 421 raid_base_bdev_reset_complete, raid_io); 422 if (ret == 0) { 423 raid_io->base_bdev_io_submitted++; 424 } else if (ret == -ENOMEM) { 425 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), 426 base_ch, _raid_bdev_submit_reset_request); 427 return; 428 } else { 429 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 430 assert(false); 431 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 432 return; 433 } 434 } 435 } 436 437 /* 438 * brief: 439 * Callback function to spdk_bdev_io_get_buf. 440 * params: 441 * ch - pointer to raid bdev io channel 442 * bdev_io - pointer to parent bdev_io on raid bdev device 443 * success - True if buffer is allocated or false otherwise. 444 * returns: 445 * none 446 */ 447 static void 448 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 449 bool success) 450 { 451 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 452 453 if (!success) { 454 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 455 return; 456 } 457 458 raid_io->raid_bdev->module->submit_rw_request(raid_io); 459 } 460 461 /* 462 * brief: 463 * raid_bdev_submit_request function is the submit_request function pointer of 464 * raid bdev function table. This is used to submit the io on raid_bdev to below 465 * layers. 466 * params: 467 * ch - pointer to raid bdev io channel 468 * bdev_io - pointer to parent bdev_io on raid bdev device 469 * returns: 470 * none 471 */ 472 static void 473 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 474 { 475 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 476 477 raid_io->raid_bdev = bdev_io->bdev->ctxt; 478 raid_io->raid_ch = spdk_io_channel_get_ctx(ch); 479 raid_io->base_bdev_io_remaining = 0; 480 raid_io->base_bdev_io_submitted = 0; 481 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 482 483 switch (bdev_io->type) { 484 case SPDK_BDEV_IO_TYPE_READ: 485 spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb, 486 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 487 break; 488 case SPDK_BDEV_IO_TYPE_WRITE: 489 raid_io->raid_bdev->module->submit_rw_request(raid_io); 490 break; 491 492 case SPDK_BDEV_IO_TYPE_RESET: 493 raid_bdev_submit_reset_request(raid_io); 494 break; 495 496 case SPDK_BDEV_IO_TYPE_FLUSH: 497 case SPDK_BDEV_IO_TYPE_UNMAP: 498 raid_io->raid_bdev->module->submit_null_payload_request(raid_io); 499 break; 500 501 default: 502 SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type); 503 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 504 break; 505 } 506 } 507 508 /* 509 * brief: 510 * _raid_bdev_io_type_supported checks whether io_type is supported in 511 * all base bdev modules of raid bdev module. If anyone among the base_bdevs 512 * doesn't support, the raid device doesn't supports. 513 * 514 * params: 515 * raid_bdev - pointer to raid bdev context 516 * io_type - io type 517 * returns: 518 * true - io_type is supported 519 * false - io_type is not supported 520 */ 521 inline static bool 522 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type) 523 { 524 struct raid_base_bdev_info *base_info; 525 526 if (io_type == SPDK_BDEV_IO_TYPE_FLUSH || 527 io_type == SPDK_BDEV_IO_TYPE_UNMAP) { 528 if (raid_bdev->module->submit_null_payload_request == NULL) { 529 return false; 530 } 531 } 532 533 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 534 if (base_info->desc == NULL) { 535 continue; 536 } 537 538 if (spdk_bdev_io_type_supported(spdk_bdev_desc_get_bdev(base_info->desc), io_type) == false) { 539 return false; 540 } 541 } 542 543 return true; 544 } 545 546 /* 547 * brief: 548 * raid_bdev_io_type_supported is the io_supported function for bdev function 549 * table which returns whether the particular io type is supported or not by 550 * raid bdev module 551 * params: 552 * ctx - pointer to raid bdev context 553 * type - io type 554 * returns: 555 * true - io_type is supported 556 * false - io_type is not supported 557 */ 558 static bool 559 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 560 { 561 switch (io_type) { 562 case SPDK_BDEV_IO_TYPE_READ: 563 case SPDK_BDEV_IO_TYPE_WRITE: 564 return true; 565 566 case SPDK_BDEV_IO_TYPE_FLUSH: 567 case SPDK_BDEV_IO_TYPE_RESET: 568 case SPDK_BDEV_IO_TYPE_UNMAP: 569 return _raid_bdev_io_type_supported(ctx, io_type); 570 571 default: 572 return false; 573 } 574 575 return false; 576 } 577 578 /* 579 * brief: 580 * raid_bdev_get_io_channel is the get_io_channel function table pointer for 581 * raid bdev. This is used to return the io channel for this raid bdev 582 * params: 583 * ctxt - pointer to raid_bdev 584 * returns: 585 * pointer to io channel for raid bdev 586 */ 587 static struct spdk_io_channel * 588 raid_bdev_get_io_channel(void *ctxt) 589 { 590 struct raid_bdev *raid_bdev = ctxt; 591 592 return spdk_get_io_channel(raid_bdev); 593 } 594 595 void 596 raid_bdev_write_info_json(struct raid_bdev *raid_bdev, struct spdk_json_write_ctx *w) 597 { 598 struct raid_base_bdev_info *base_info; 599 600 assert(raid_bdev != NULL); 601 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 602 603 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 604 spdk_json_write_named_string(w, "state", raid_bdev_state_to_str(raid_bdev->state)); 605 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 606 spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs); 607 spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered); 608 spdk_json_write_name(w, "base_bdevs_list"); 609 spdk_json_write_array_begin(w); 610 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 611 if (base_info->desc) { 612 spdk_json_write_string(w, spdk_bdev_desc_get_bdev(base_info->desc)->name); 613 } else { 614 spdk_json_write_null(w); 615 } 616 } 617 spdk_json_write_array_end(w); 618 } 619 620 /* 621 * brief: 622 * raid_bdev_dump_info_json is the function table pointer for raid bdev 623 * params: 624 * ctx - pointer to raid_bdev 625 * w - pointer to json context 626 * returns: 627 * 0 - success 628 * non zero - failure 629 */ 630 static int 631 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 632 { 633 struct raid_bdev *raid_bdev = ctx; 634 635 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_dump_config_json\n"); 636 637 /* Dump the raid bdev configuration related information */ 638 spdk_json_write_named_object_begin(w, "raid"); 639 raid_bdev_write_info_json(raid_bdev, w); 640 spdk_json_write_object_end(w); 641 642 return 0; 643 } 644 645 /* 646 * brief: 647 * raid_bdev_write_config_json is the function table pointer for raid bdev 648 * params: 649 * bdev - pointer to spdk_bdev 650 * w - pointer to json context 651 * returns: 652 * none 653 */ 654 static void 655 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 656 { 657 struct raid_bdev *raid_bdev = bdev->ctxt; 658 struct raid_base_bdev_info *base_info; 659 660 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 661 662 spdk_json_write_object_begin(w); 663 664 spdk_json_write_named_string(w, "method", "bdev_raid_create"); 665 666 spdk_json_write_named_object_begin(w, "params"); 667 spdk_json_write_named_string(w, "name", bdev->name); 668 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 669 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 670 671 spdk_json_write_named_array_begin(w, "base_bdevs"); 672 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 673 if (base_info->desc) { 674 spdk_json_write_string(w, spdk_bdev_desc_get_bdev(base_info->desc)->name); 675 } 676 } 677 spdk_json_write_array_end(w); 678 spdk_json_write_object_end(w); 679 680 spdk_json_write_object_end(w); 681 } 682 683 static int 684 raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size) 685 { 686 struct raid_bdev *raid_bdev = ctx; 687 struct raid_base_bdev_info *base_info; 688 int domains_count = 0, rc = 0; 689 690 if (raid_bdev->module->memory_domains_supported == false) { 691 return 0; 692 } 693 694 spdk_spin_lock(&raid_bdev->base_bdev_lock); 695 696 /* First loop to get the number of memory domains */ 697 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 698 if (base_info->desc == NULL) { 699 continue; 700 } 701 rc = spdk_bdev_get_memory_domains(spdk_bdev_desc_get_bdev(base_info->desc), NULL, 0); 702 if (rc < 0) { 703 goto out; 704 } 705 domains_count += rc; 706 } 707 708 if (!domains || array_size < domains_count) { 709 goto out; 710 } 711 712 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 713 if (base_info->desc == NULL) { 714 continue; 715 } 716 rc = spdk_bdev_get_memory_domains(spdk_bdev_desc_get_bdev(base_info->desc), domains, array_size); 717 if (rc < 0) { 718 goto out; 719 } 720 domains += rc; 721 array_size -= rc; 722 } 723 out: 724 spdk_spin_unlock(&raid_bdev->base_bdev_lock); 725 726 if (rc < 0) { 727 return rc; 728 } 729 730 return domains_count; 731 } 732 733 /* g_raid_bdev_fn_table is the function table for raid bdev */ 734 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = { 735 .destruct = raid_bdev_destruct, 736 .submit_request = raid_bdev_submit_request, 737 .io_type_supported = raid_bdev_io_type_supported, 738 .get_io_channel = raid_bdev_get_io_channel, 739 .dump_info_json = raid_bdev_dump_info_json, 740 .write_config_json = raid_bdev_write_config_json, 741 .get_memory_domains = raid_bdev_get_memory_domains, 742 }; 743 744 struct raid_bdev * 745 raid_bdev_find_by_name(const char *name) 746 { 747 struct raid_bdev *raid_bdev; 748 749 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 750 if (strcmp(raid_bdev->bdev.name, name) == 0) { 751 return raid_bdev; 752 } 753 } 754 755 return NULL; 756 } 757 758 static struct { 759 const char *name; 760 enum raid_level value; 761 } g_raid_level_names[] = { 762 { "raid0", RAID0 }, 763 { "0", RAID0 }, 764 { "raid1", RAID1 }, 765 { "1", RAID1 }, 766 { "raid5f", RAID5F }, 767 { "5f", RAID5F }, 768 { "concat", CONCAT }, 769 { } 770 }; 771 772 static struct { 773 const char *name; 774 enum raid_bdev_state value; 775 } g_raid_state_names[] = { 776 { "online", RAID_BDEV_STATE_ONLINE }, 777 { "configuring", RAID_BDEV_STATE_CONFIGURING }, 778 { "offline", RAID_BDEV_STATE_OFFLINE }, 779 { } 780 }; 781 782 /* We have to use the typedef in the function declaration to appease astyle. */ 783 typedef enum raid_level raid_level_t; 784 typedef enum raid_bdev_state raid_bdev_state_t; 785 786 raid_level_t 787 raid_bdev_str_to_level(const char *str) 788 { 789 unsigned int i; 790 791 assert(str != NULL); 792 793 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 794 if (strcasecmp(g_raid_level_names[i].name, str) == 0) { 795 return g_raid_level_names[i].value; 796 } 797 } 798 799 return INVALID_RAID_LEVEL; 800 } 801 802 const char * 803 raid_bdev_level_to_str(enum raid_level level) 804 { 805 unsigned int i; 806 807 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 808 if (g_raid_level_names[i].value == level) { 809 return g_raid_level_names[i].name; 810 } 811 } 812 813 return ""; 814 } 815 816 raid_bdev_state_t 817 raid_bdev_str_to_state(const char *str) 818 { 819 unsigned int i; 820 821 assert(str != NULL); 822 823 for (i = 0; g_raid_state_names[i].name != NULL; i++) { 824 if (strcasecmp(g_raid_state_names[i].name, str) == 0) { 825 return g_raid_state_names[i].value; 826 } 827 } 828 829 return RAID_BDEV_STATE_MAX; 830 } 831 832 const char * 833 raid_bdev_state_to_str(enum raid_bdev_state state) 834 { 835 unsigned int i; 836 837 for (i = 0; g_raid_state_names[i].name != NULL; i++) { 838 if (g_raid_state_names[i].value == state) { 839 return g_raid_state_names[i].name; 840 } 841 } 842 843 assert(false); 844 return ""; 845 } 846 847 /* 848 * brief: 849 * raid_bdev_fini_start is called when bdev layer is starting the 850 * shutdown process 851 * params: 852 * none 853 * returns: 854 * none 855 */ 856 static void 857 raid_bdev_fini_start(void) 858 { 859 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_fini_start\n"); 860 g_shutdown_started = true; 861 } 862 863 /* 864 * brief: 865 * raid_bdev_exit is called on raid bdev module exit time by bdev layer 866 * params: 867 * none 868 * returns: 869 * none 870 */ 871 static void 872 raid_bdev_exit(void) 873 { 874 struct raid_bdev *raid_bdev, *tmp; 875 876 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_exit\n"); 877 878 TAILQ_FOREACH_SAFE(raid_bdev, &g_raid_bdev_list, global_link, tmp) { 879 raid_bdev_cleanup_and_free(raid_bdev); 880 } 881 } 882 883 /* 884 * brief: 885 * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid 886 * module 887 * params: 888 * none 889 * returns: 890 * size of spdk_bdev_io context for raid 891 */ 892 static int 893 raid_bdev_get_ctx_size(void) 894 { 895 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_get_ctx_size\n"); 896 return sizeof(struct raid_bdev_io); 897 } 898 899 static struct spdk_bdev_module g_raid_if = { 900 .name = "raid", 901 .module_init = raid_bdev_init, 902 .fini_start = raid_bdev_fini_start, 903 .module_fini = raid_bdev_exit, 904 .get_ctx_size = raid_bdev_get_ctx_size, 905 .examine_config = raid_bdev_examine, 906 .async_init = false, 907 .async_fini = false, 908 }; 909 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if) 910 911 /* 912 * brief: 913 * raid_bdev_init is the initialization function for raid bdev module 914 * params: 915 * none 916 * returns: 917 * 0 - success 918 * non zero - failure 919 */ 920 static int 921 raid_bdev_init(void) 922 { 923 return 0; 924 } 925 926 /* 927 * brief: 928 * raid_bdev_create allocates raid bdev based on passed configuration 929 * params: 930 * name - name for raid bdev 931 * strip_size - strip size in KB 932 * num_base_bdevs - number of base bdevs 933 * level - raid level 934 * raid_bdev_out - the created raid bdev 935 * returns: 936 * 0 - success 937 * non zero - failure 938 */ 939 int 940 raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, 941 enum raid_level level, struct raid_bdev **raid_bdev_out, const struct spdk_uuid *uuid) 942 { 943 struct raid_bdev *raid_bdev; 944 struct spdk_bdev *raid_bdev_gen; 945 struct raid_bdev_module *module; 946 struct raid_base_bdev_info *base_info; 947 uint8_t min_operational; 948 949 if (raid_bdev_find_by_name(name) != NULL) { 950 SPDK_ERRLOG("Duplicate raid bdev name found: %s\n", name); 951 return -EEXIST; 952 } 953 954 if (level == RAID1) { 955 if (strip_size != 0) { 956 SPDK_ERRLOG("Strip size is not supported by raid1\n"); 957 return -EINVAL; 958 } 959 } else if (spdk_u32_is_pow2(strip_size) == false) { 960 SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size); 961 return -EINVAL; 962 } 963 964 module = raid_bdev_module_find(level); 965 if (module == NULL) { 966 SPDK_ERRLOG("Unsupported raid level '%d'\n", level); 967 return -EINVAL; 968 } 969 970 assert(module->base_bdevs_min != 0); 971 if (num_base_bdevs < module->base_bdevs_min) { 972 SPDK_ERRLOG("At least %u base devices required for %s\n", 973 module->base_bdevs_min, 974 raid_bdev_level_to_str(level)); 975 return -EINVAL; 976 } 977 978 switch (module->base_bdevs_constraint.type) { 979 case CONSTRAINT_MAX_BASE_BDEVS_REMOVED: 980 min_operational = num_base_bdevs - module->base_bdevs_constraint.value; 981 break; 982 case CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL: 983 min_operational = module->base_bdevs_constraint.value; 984 break; 985 case CONSTRAINT_UNSET: 986 if (module->base_bdevs_constraint.value != 0) { 987 SPDK_ERRLOG("Unexpected constraint value '%u' provided for raid bdev '%s'.\n", 988 (uint8_t)module->base_bdevs_constraint.value, name); 989 return -EINVAL; 990 } 991 min_operational = num_base_bdevs; 992 break; 993 default: 994 SPDK_ERRLOG("Unrecognised constraint type '%u' in module for raid level '%s'.\n", 995 (uint8_t)module->base_bdevs_constraint.type, 996 raid_bdev_level_to_str(module->level)); 997 return -EINVAL; 998 }; 999 1000 if (min_operational == 0 || min_operational > num_base_bdevs) { 1001 SPDK_ERRLOG("Wrong constraint value for raid level '%s'.\n", 1002 raid_bdev_level_to_str(module->level)); 1003 return -EINVAL; 1004 } 1005 1006 raid_bdev = calloc(1, sizeof(*raid_bdev)); 1007 if (!raid_bdev) { 1008 SPDK_ERRLOG("Unable to allocate memory for raid bdev\n"); 1009 return -ENOMEM; 1010 } 1011 1012 spdk_spin_init(&raid_bdev->base_bdev_lock); 1013 raid_bdev->module = module; 1014 raid_bdev->num_base_bdevs = num_base_bdevs; 1015 raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, 1016 sizeof(struct raid_base_bdev_info)); 1017 if (!raid_bdev->base_bdev_info) { 1018 SPDK_ERRLOG("Unable able to allocate base bdev info\n"); 1019 raid_bdev_free(raid_bdev); 1020 return -ENOMEM; 1021 } 1022 1023 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1024 base_info->raid_bdev = raid_bdev; 1025 } 1026 1027 /* strip_size_kb is from the rpc param. strip_size is in blocks and used 1028 * internally and set later. 1029 */ 1030 raid_bdev->strip_size = 0; 1031 raid_bdev->strip_size_kb = strip_size; 1032 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1033 raid_bdev->level = level; 1034 raid_bdev->min_base_bdevs_operational = min_operational; 1035 1036 raid_bdev_gen = &raid_bdev->bdev; 1037 1038 raid_bdev_gen->name = strdup(name); 1039 if (!raid_bdev_gen->name) { 1040 SPDK_ERRLOG("Unable to allocate name for raid\n"); 1041 raid_bdev_free(raid_bdev); 1042 return -ENOMEM; 1043 } 1044 1045 raid_bdev_gen->product_name = "Raid Volume"; 1046 raid_bdev_gen->ctxt = raid_bdev; 1047 raid_bdev_gen->fn_table = &g_raid_bdev_fn_table; 1048 raid_bdev_gen->module = &g_raid_if; 1049 raid_bdev_gen->write_cache = 0; 1050 1051 if (uuid) { 1052 spdk_uuid_copy(&raid_bdev_gen->uuid, uuid); 1053 } 1054 1055 TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link); 1056 1057 *raid_bdev_out = raid_bdev; 1058 1059 return 0; 1060 } 1061 1062 /* 1063 * brief: 1064 * Check underlying block devices against support for metadata. Do not configure 1065 * md support when parameters from block devices are inconsistent. 1066 * params: 1067 * raid_bdev - pointer to raid bdev 1068 * returns: 1069 * 0 - The raid bdev md parameters were successfully configured. 1070 * non zero - Failed to configure md. 1071 */ 1072 static int 1073 raid_bdev_configure_md(struct raid_bdev *raid_bdev) 1074 { 1075 struct spdk_bdev *base_bdev; 1076 uint8_t i; 1077 1078 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1079 base_bdev = spdk_bdev_desc_get_bdev(raid_bdev->base_bdev_info[i].desc); 1080 1081 if (i == 0) { 1082 raid_bdev->bdev.md_len = spdk_bdev_get_md_size(base_bdev); 1083 raid_bdev->bdev.md_interleave = spdk_bdev_is_md_interleaved(base_bdev); 1084 raid_bdev->bdev.dif_type = spdk_bdev_get_dif_type(base_bdev); 1085 raid_bdev->bdev.dif_is_head_of_md = spdk_bdev_is_dif_head_of_md(base_bdev); 1086 raid_bdev->bdev.dif_check_flags = base_bdev->dif_check_flags; 1087 continue; 1088 } 1089 1090 if (raid_bdev->bdev.md_len != spdk_bdev_get_md_size(base_bdev) || 1091 raid_bdev->bdev.md_interleave != spdk_bdev_is_md_interleaved(base_bdev) || 1092 raid_bdev->bdev.dif_type != spdk_bdev_get_dif_type(base_bdev) || 1093 raid_bdev->bdev.dif_is_head_of_md != spdk_bdev_is_dif_head_of_md(base_bdev) || 1094 raid_bdev->bdev.dif_check_flags != base_bdev->dif_check_flags) { 1095 SPDK_ERRLOG("base bdevs are configured with different metadata formats\n"); 1096 return -EPERM; 1097 } 1098 } 1099 1100 return 0; 1101 } 1102 1103 /* 1104 * brief: 1105 * If raid bdev config is complete, then only register the raid bdev to 1106 * bdev layer and remove this raid bdev from configuring list and 1107 * insert the raid bdev to configured list 1108 * params: 1109 * raid_bdev - pointer to raid bdev 1110 * returns: 1111 * 0 - success 1112 * non zero - failure 1113 */ 1114 static int 1115 raid_bdev_configure(struct raid_bdev *raid_bdev) 1116 { 1117 uint32_t blocklen = 0; 1118 struct spdk_bdev *raid_bdev_gen; 1119 struct raid_base_bdev_info *base_info; 1120 struct spdk_bdev *base_bdev; 1121 int rc = 0; 1122 1123 assert(raid_bdev->state == RAID_BDEV_STATE_CONFIGURING); 1124 assert(raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs); 1125 1126 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1127 assert(base_info->desc != NULL); 1128 base_bdev = spdk_bdev_desc_get_bdev(base_info->desc); 1129 /* Check blocklen for all base bdevs that it should be same */ 1130 if (blocklen == 0) { 1131 blocklen = base_bdev->blocklen; 1132 } else if (blocklen != base_bdev->blocklen) { 1133 /* 1134 * Assumption is that all the base bdevs for any raid bdev should 1135 * have same blocklen 1136 */ 1137 SPDK_ERRLOG("Blocklen of various bdevs not matching\n"); 1138 return -EINVAL; 1139 } 1140 } 1141 assert(blocklen > 0); 1142 1143 /* The strip_size_kb is read in from user in KB. Convert to blocks here for 1144 * internal use. 1145 */ 1146 raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / blocklen; 1147 if (raid_bdev->strip_size == 0 && raid_bdev->level != RAID1) { 1148 SPDK_ERRLOG("Strip size cannot be smaller than the device block size\n"); 1149 return -EINVAL; 1150 } 1151 raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size); 1152 raid_bdev->blocklen_shift = spdk_u32log2(blocklen); 1153 1154 raid_bdev_gen = &raid_bdev->bdev; 1155 raid_bdev_gen->blocklen = blocklen; 1156 1157 rc = raid_bdev_configure_md(raid_bdev); 1158 if (rc != 0) { 1159 SPDK_ERRLOG("raid metadata configuration failed\n"); 1160 return rc; 1161 } 1162 1163 rc = raid_bdev->module->start(raid_bdev); 1164 if (rc != 0) { 1165 SPDK_ERRLOG("raid module startup callback failed\n"); 1166 return rc; 1167 } 1168 raid_bdev->state = RAID_BDEV_STATE_ONLINE; 1169 SPDK_DEBUGLOG(bdev_raid, "io device register %p\n", raid_bdev); 1170 SPDK_DEBUGLOG(bdev_raid, "blockcnt %" PRIu64 ", blocklen %u\n", 1171 raid_bdev_gen->blockcnt, raid_bdev_gen->blocklen); 1172 spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb, 1173 sizeof(struct raid_bdev_io_channel), 1174 raid_bdev->bdev.name); 1175 rc = spdk_bdev_register(raid_bdev_gen); 1176 if (rc != 0) { 1177 SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n"); 1178 if (raid_bdev->module->stop != NULL) { 1179 raid_bdev->module->stop(raid_bdev); 1180 } 1181 spdk_io_device_unregister(raid_bdev, NULL); 1182 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1183 return rc; 1184 } 1185 SPDK_DEBUGLOG(bdev_raid, "raid bdev generic %p\n", raid_bdev_gen); 1186 SPDK_DEBUGLOG(bdev_raid, "raid bdev is created with name %s, raid_bdev %p\n", 1187 raid_bdev_gen->name, raid_bdev); 1188 1189 return 0; 1190 } 1191 1192 /* 1193 * brief: 1194 * If raid bdev is online and registered, change the bdev state to 1195 * configuring and unregister this raid device. Queue this raid device 1196 * in configuring list 1197 * params: 1198 * raid_bdev - pointer to raid bdev 1199 * cb_fn - callback function 1200 * cb_arg - argument to callback function 1201 * returns: 1202 * none 1203 */ 1204 static void 1205 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, 1206 void *cb_arg) 1207 { 1208 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1209 if (cb_fn) { 1210 cb_fn(cb_arg, 0); 1211 } 1212 return; 1213 } 1214 1215 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 1216 assert(raid_bdev->num_base_bdevs_discovered); 1217 SPDK_DEBUGLOG(bdev_raid, "raid bdev state changing from online to offline\n"); 1218 1219 spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg); 1220 } 1221 1222 /* 1223 * brief: 1224 * raid_bdev_find_base_info_by_bdev function finds the base bdev info by bdev. 1225 * params: 1226 * base_bdev - pointer to base bdev 1227 * returns: 1228 * base bdev info if found, otherwise NULL. 1229 */ 1230 static struct raid_base_bdev_info * 1231 raid_bdev_find_base_info_by_bdev(struct spdk_bdev *base_bdev) 1232 { 1233 struct raid_bdev *raid_bdev; 1234 struct raid_base_bdev_info *base_info; 1235 1236 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1237 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1238 if (base_info->desc != NULL && 1239 spdk_bdev_desc_get_bdev(base_info->desc) == base_bdev) { 1240 return base_info; 1241 } 1242 } 1243 } 1244 1245 return NULL; 1246 } 1247 1248 static void 1249 raid_bdev_remove_base_bdev_on_unquiesced(void *ctx, int status) 1250 { 1251 struct raid_base_bdev_info *base_info = ctx; 1252 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1253 1254 base_info->remove_scheduled = false; 1255 1256 if (status != 0) { 1257 SPDK_ERRLOG("Failed to unquiesce raid bdev %s: %s\n", 1258 raid_bdev->bdev.name, spdk_strerror(-status)); 1259 goto out; 1260 } 1261 1262 spdk_spin_lock(&raid_bdev->base_bdev_lock); 1263 raid_bdev_free_base_bdev_resource(base_info); 1264 spdk_spin_unlock(&raid_bdev->base_bdev_lock); 1265 out: 1266 if (base_info->remove_cb != NULL) { 1267 base_info->remove_cb(base_info->remove_cb_ctx, status); 1268 } 1269 } 1270 1271 static void 1272 raid_bdev_channel_remove_base_bdev(struct spdk_io_channel_iter *i) 1273 { 1274 struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i); 1275 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 1276 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch); 1277 uint8_t idx = base_info - base_info->raid_bdev->base_bdev_info; 1278 1279 SPDK_DEBUGLOG(bdev_raid, "slot: %u raid_ch: %p\n", idx, raid_ch); 1280 1281 if (raid_ch->base_channel[idx] != NULL) { 1282 spdk_put_io_channel(raid_ch->base_channel[idx]); 1283 raid_ch->base_channel[idx] = NULL; 1284 } 1285 1286 spdk_for_each_channel_continue(i, 0); 1287 } 1288 1289 static void 1290 raid_bdev_channels_remove_base_bdev_done(struct spdk_io_channel_iter *i, int status) 1291 { 1292 struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i); 1293 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1294 1295 spdk_bdev_unquiesce(&raid_bdev->bdev, &g_raid_if, raid_bdev_remove_base_bdev_on_unquiesced, 1296 base_info); 1297 } 1298 1299 static void 1300 raid_bdev_remove_base_bdev_on_quiesced(void *ctx, int status) 1301 { 1302 struct raid_base_bdev_info *base_info = ctx; 1303 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1304 1305 if (status != 0) { 1306 SPDK_ERRLOG("Failed to quiesce raid bdev %s: %s\n", 1307 raid_bdev->bdev.name, spdk_strerror(-status)); 1308 base_info->remove_scheduled = false; 1309 if (base_info->remove_cb != NULL) { 1310 base_info->remove_cb(base_info->remove_cb_ctx, status); 1311 } 1312 return; 1313 } 1314 1315 spdk_for_each_channel(raid_bdev, raid_bdev_channel_remove_base_bdev, base_info, 1316 raid_bdev_channels_remove_base_bdev_done); 1317 } 1318 1319 /* 1320 * brief: 1321 * raid_bdev_remove_base_bdev function is called by below layers when base_bdev 1322 * is removed. This function checks if this base bdev is part of any raid bdev 1323 * or not. If yes, it takes necessary action on that particular raid bdev. 1324 * params: 1325 * base_bdev - pointer to base bdev which got removed 1326 * cb_fn - callback function 1327 * cb_arg - argument to callback function 1328 * returns: 1329 * 0 - success 1330 * non zero - failure 1331 */ 1332 int 1333 raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev, raid_bdev_remove_base_bdev_cb cb_fn, 1334 void *cb_ctx) 1335 { 1336 struct raid_bdev *raid_bdev; 1337 struct raid_base_bdev_info *base_info; 1338 1339 SPDK_DEBUGLOG(bdev_raid, "%s\n", base_bdev->name); 1340 1341 /* Find the raid_bdev which has claimed this base_bdev */ 1342 base_info = raid_bdev_find_base_info_by_bdev(base_bdev); 1343 if (!base_info) { 1344 SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); 1345 return -ENODEV; 1346 } 1347 raid_bdev = base_info->raid_bdev; 1348 1349 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1350 1351 if (base_info->remove_scheduled) { 1352 return 0; 1353 } 1354 1355 assert(base_info->desc); 1356 base_info->remove_scheduled = true; 1357 base_info->remove_cb = cb_fn; 1358 base_info->remove_cb_ctx = cb_ctx; 1359 1360 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1361 /* 1362 * As raid bdev is not registered yet or already unregistered, 1363 * so cleanup should be done here itself. 1364 */ 1365 raid_bdev_free_base_bdev_resource(base_info); 1366 if (raid_bdev->num_base_bdevs_discovered == 0) { 1367 /* There is no base bdev for this raid, so free the raid device. */ 1368 raid_bdev_cleanup_and_free(raid_bdev); 1369 } 1370 } else if (raid_bdev->num_base_bdevs_discovered == raid_bdev->min_base_bdevs_operational) { 1371 /* 1372 * After this base bdev is removed there will not be enough base bdevs 1373 * to keep the raid bdev operational. 1374 */ 1375 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_ctx); 1376 } else { 1377 int ret; 1378 1379 ret = spdk_bdev_quiesce(&raid_bdev->bdev, &g_raid_if, 1380 raid_bdev_remove_base_bdev_on_quiesced, base_info); 1381 if (ret != 0) { 1382 base_info->remove_scheduled = false; 1383 } 1384 } 1385 1386 return 0; 1387 } 1388 1389 /* 1390 * brief: 1391 * raid_bdev_resize_base_bdev function is called by below layers when base_bdev 1392 * is resized. This function checks if the smallest size of the base_bdevs is changed. 1393 * If yes, call module handler to resize the raid_bdev if implemented. 1394 * params: 1395 * base_bdev - pointer to base bdev which got resized. 1396 * returns: 1397 * none 1398 */ 1399 static void 1400 raid_bdev_resize_base_bdev(struct spdk_bdev *base_bdev) 1401 { 1402 struct raid_bdev *raid_bdev; 1403 struct raid_base_bdev_info *base_info; 1404 1405 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_resize_base_bdev\n"); 1406 1407 base_info = raid_bdev_find_base_info_by_bdev(base_bdev); 1408 1409 /* Find the raid_bdev which has claimed this base_bdev */ 1410 if (!base_info) { 1411 SPDK_ERRLOG("raid_bdev whose base_bdev '%s' not found\n", base_bdev->name); 1412 return; 1413 } 1414 raid_bdev = base_info->raid_bdev; 1415 1416 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1417 1418 SPDK_NOTICELOG("base_bdev '%s' was resized: old size %" PRIu64 ", new size %" PRIu64 "\n", 1419 base_bdev->name, base_info->blockcnt, base_bdev->blockcnt); 1420 1421 if (raid_bdev->module->resize) { 1422 raid_bdev->module->resize(raid_bdev); 1423 } 1424 } 1425 1426 /* 1427 * brief: 1428 * raid_bdev_event_base_bdev function is called by below layers when base_bdev 1429 * triggers asynchronous event. 1430 * params: 1431 * type - event details. 1432 * bdev - bdev that triggered event. 1433 * event_ctx - context for event. 1434 * returns: 1435 * none 1436 */ 1437 static void 1438 raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 1439 void *event_ctx) 1440 { 1441 int rc; 1442 1443 switch (type) { 1444 case SPDK_BDEV_EVENT_REMOVE: 1445 rc = raid_bdev_remove_base_bdev(bdev, NULL, NULL); 1446 if (rc != 0) { 1447 SPDK_ERRLOG("Failed to remove base bdev %s: %s\n", 1448 spdk_bdev_get_name(bdev), spdk_strerror(-rc)); 1449 } 1450 break; 1451 case SPDK_BDEV_EVENT_RESIZE: 1452 raid_bdev_resize_base_bdev(bdev); 1453 break; 1454 default: 1455 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 1456 break; 1457 } 1458 } 1459 1460 /* 1461 * brief: 1462 * Deletes the specified raid bdev 1463 * params: 1464 * raid_bdev - pointer to raid bdev 1465 * cb_fn - callback function 1466 * cb_arg - argument to callback function 1467 */ 1468 void 1469 raid_bdev_delete(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, void *cb_arg) 1470 { 1471 struct raid_base_bdev_info *base_info; 1472 1473 SPDK_DEBUGLOG(bdev_raid, "delete raid bdev: %s\n", raid_bdev->bdev.name); 1474 1475 if (raid_bdev->destroy_started) { 1476 SPDK_DEBUGLOG(bdev_raid, "destroying raid bdev %s is already started\n", 1477 raid_bdev->bdev.name); 1478 if (cb_fn) { 1479 cb_fn(cb_arg, -EALREADY); 1480 } 1481 return; 1482 } 1483 1484 raid_bdev->destroy_started = true; 1485 1486 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1487 base_info->remove_scheduled = true; 1488 1489 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1490 /* 1491 * As raid bdev is not registered yet or already unregistered, 1492 * so cleanup should be done here itself. 1493 */ 1494 raid_bdev_free_base_bdev_resource(base_info); 1495 } 1496 } 1497 1498 if (raid_bdev->num_base_bdevs_discovered == 0) { 1499 /* There is no base bdev for this raid, so free the raid device. */ 1500 raid_bdev_cleanup_and_free(raid_bdev); 1501 if (cb_fn) { 1502 cb_fn(cb_arg, 0); 1503 } 1504 } else { 1505 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg); 1506 } 1507 } 1508 1509 static int 1510 raid_bdev_configure_base_bdev(struct raid_base_bdev_info *base_info) 1511 { 1512 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1513 struct spdk_bdev_desc *desc; 1514 struct spdk_bdev *bdev; 1515 int rc; 1516 1517 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1518 assert(base_info->name != NULL); 1519 assert(base_info->desc == NULL); 1520 1521 rc = spdk_bdev_open_ext(base_info->name, true, raid_bdev_event_base_bdev, NULL, &desc); 1522 if (rc != 0) { 1523 if (rc != -ENODEV) { 1524 SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", base_info->name); 1525 } 1526 return rc; 1527 } 1528 1529 bdev = spdk_bdev_desc_get_bdev(desc); 1530 1531 rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if); 1532 if (rc != 0) { 1533 SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); 1534 spdk_bdev_close(desc); 1535 return rc; 1536 } 1537 1538 SPDK_DEBUGLOG(bdev_raid, "bdev %s is claimed\n", bdev->name); 1539 1540 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 1541 1542 base_info->desc = desc; 1543 base_info->blockcnt = bdev->blockcnt; 1544 raid_bdev->num_base_bdevs_discovered++; 1545 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1546 1547 if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) { 1548 rc = raid_bdev_configure(raid_bdev); 1549 if (rc != 0) { 1550 SPDK_ERRLOG("Failed to configure raid bdev\n"); 1551 return rc; 1552 } 1553 } 1554 1555 return 0; 1556 } 1557 1558 /* 1559 * brief: 1560 * raid_bdev_add_base_device function is the actual function which either adds 1561 * the nvme base device to existing raid bdev or create a new raid bdev. It also claims 1562 * the base device and keep the open descriptor. 1563 * params: 1564 * raid_bdev - pointer to raid bdev 1565 * name - name of the base bdev 1566 * slot - position to add base bdev 1567 * returns: 1568 * 0 - success 1569 * non zero - failure 1570 */ 1571 int 1572 raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name, uint8_t slot) 1573 { 1574 struct raid_base_bdev_info *base_info; 1575 int rc; 1576 1577 if (slot >= raid_bdev->num_base_bdevs) { 1578 return -EINVAL; 1579 } 1580 1581 base_info = &raid_bdev->base_bdev_info[slot]; 1582 1583 if (base_info->name != NULL) { 1584 SPDK_ERRLOG("Slot %u on raid bdev '%s' already assigned to bdev '%s'\n", 1585 slot, raid_bdev->bdev.name, base_info->name); 1586 return -EBUSY; 1587 } 1588 1589 base_info->name = strdup(name); 1590 if (base_info->name == NULL) { 1591 return -ENOMEM; 1592 } 1593 1594 rc = raid_bdev_configure_base_bdev(base_info); 1595 if (rc != 0) { 1596 if (rc != -ENODEV) { 1597 SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", name); 1598 } 1599 return rc; 1600 } 1601 1602 return 0; 1603 } 1604 1605 /* 1606 * brief: 1607 * raid_bdev_examine function is the examine function call by the below layers 1608 * like bdev_nvme layer. This function will check if this base bdev can be 1609 * claimed by this raid bdev or not. 1610 * params: 1611 * bdev - pointer to base bdev 1612 * returns: 1613 * none 1614 */ 1615 static void 1616 raid_bdev_examine(struct spdk_bdev *bdev) 1617 { 1618 struct raid_bdev *raid_bdev; 1619 struct raid_base_bdev_info *base_info; 1620 1621 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1622 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1623 if (base_info->desc == NULL && base_info->name != NULL && 1624 strcmp(bdev->name, base_info->name) == 0) { 1625 raid_bdev_configure_base_bdev(base_info); 1626 break; 1627 } 1628 } 1629 } 1630 1631 spdk_bdev_module_examine_done(&g_raid_if); 1632 } 1633 1634 /* Log component for bdev raid bdev module */ 1635 SPDK_LOG_REGISTER_COMPONENT(bdev_raid) 1636