1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. 3 * All rights reserved. 4 * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "bdev_raid.h" 8 #include "spdk/env.h" 9 #include "spdk/thread.h" 10 #include "spdk/log.h" 11 #include "spdk/string.h" 12 #include "spdk/util.h" 13 #include "spdk/json.h" 14 15 static bool g_shutdown_started = false; 16 17 /* List of all raid bdevs */ 18 struct raid_all_tailq g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list); 19 20 static TAILQ_HEAD(, raid_bdev_module) g_raid_modules = TAILQ_HEAD_INITIALIZER(g_raid_modules); 21 22 static struct raid_bdev_module * 23 raid_bdev_module_find(enum raid_level level) 24 { 25 struct raid_bdev_module *raid_module; 26 27 TAILQ_FOREACH(raid_module, &g_raid_modules, link) { 28 if (raid_module->level == level) { 29 return raid_module; 30 } 31 } 32 33 return NULL; 34 } 35 36 void 37 raid_bdev_module_list_add(struct raid_bdev_module *raid_module) 38 { 39 if (raid_bdev_module_find(raid_module->level) != NULL) { 40 SPDK_ERRLOG("module for raid level '%s' already registered.\n", 41 raid_bdev_level_to_str(raid_module->level)); 42 assert(false); 43 } else { 44 TAILQ_INSERT_TAIL(&g_raid_modules, raid_module, link); 45 } 46 } 47 48 /* Function declarations */ 49 static void raid_bdev_examine(struct spdk_bdev *bdev); 50 static int raid_bdev_init(void); 51 static void raid_bdev_deconfigure(struct raid_bdev *raid_bdev, 52 raid_bdev_destruct_cb cb_fn, void *cb_arg); 53 54 /* 55 * brief: 56 * raid_bdev_create_cb function is a cb function for raid bdev which creates the 57 * hierarchy from raid bdev to base bdev io channels. It will be called per core 58 * params: 59 * io_device - pointer to raid bdev io device represented by raid_bdev 60 * ctx_buf - pointer to context buffer for raid bdev io channel 61 * returns: 62 * 0 - success 63 * non zero - failure 64 */ 65 static int 66 raid_bdev_create_cb(void *io_device, void *ctx_buf) 67 { 68 struct raid_bdev *raid_bdev = io_device; 69 struct raid_bdev_io_channel *raid_ch = ctx_buf; 70 uint8_t i; 71 int ret = 0; 72 73 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_create_cb, %p\n", raid_ch); 74 75 assert(raid_bdev != NULL); 76 assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); 77 78 raid_ch->num_channels = raid_bdev->num_base_bdevs; 79 80 raid_ch->base_channel = calloc(raid_ch->num_channels, 81 sizeof(struct spdk_io_channel *)); 82 if (!raid_ch->base_channel) { 83 SPDK_ERRLOG("Unable to allocate base bdevs io channel\n"); 84 return -ENOMEM; 85 } 86 87 spdk_spin_lock(&raid_bdev->base_bdev_lock); 88 for (i = 0; i < raid_ch->num_channels; i++) { 89 /* 90 * Get the spdk_io_channel for all the base bdevs. This is used during 91 * split logic to send the respective child bdev ios to respective base 92 * bdev io channel. 93 */ 94 if (raid_bdev->base_bdev_info[i].desc == NULL) { 95 continue; 96 } 97 raid_ch->base_channel[i] = spdk_bdev_get_io_channel( 98 raid_bdev->base_bdev_info[i].desc); 99 if (!raid_ch->base_channel[i]) { 100 SPDK_ERRLOG("Unable to create io channel for base bdev\n"); 101 ret = -ENOMEM; 102 break; 103 } 104 } 105 spdk_spin_unlock(&raid_bdev->base_bdev_lock); 106 107 if (!ret && raid_bdev->module->get_io_channel) { 108 raid_ch->module_channel = raid_bdev->module->get_io_channel(raid_bdev); 109 if (!raid_ch->module_channel) { 110 SPDK_ERRLOG("Unable to create io channel for raid module\n"); 111 ret = -ENOMEM; 112 } 113 } 114 115 if (ret) { 116 for (i = 0; i < raid_ch->num_channels; i++) { 117 if (raid_ch->base_channel[i] != NULL) { 118 spdk_put_io_channel(raid_ch->base_channel[i]); 119 } 120 } 121 free(raid_ch->base_channel); 122 raid_ch->base_channel = NULL; 123 } 124 return ret; 125 } 126 127 /* 128 * brief: 129 * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the 130 * hierarchy from raid bdev to base bdev io channels. It will be called per core 131 * params: 132 * io_device - pointer to raid bdev io device represented by raid_bdev 133 * ctx_buf - pointer to context buffer for raid bdev io channel 134 * returns: 135 * none 136 */ 137 static void 138 raid_bdev_destroy_cb(void *io_device, void *ctx_buf) 139 { 140 struct raid_bdev_io_channel *raid_ch = ctx_buf; 141 uint8_t i; 142 143 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destroy_cb\n"); 144 145 assert(raid_ch != NULL); 146 assert(raid_ch->base_channel); 147 148 if (raid_ch->module_channel) { 149 spdk_put_io_channel(raid_ch->module_channel); 150 } 151 152 for (i = 0; i < raid_ch->num_channels; i++) { 153 /* Free base bdev channels */ 154 if (raid_ch->base_channel[i] != NULL) { 155 spdk_put_io_channel(raid_ch->base_channel[i]); 156 } 157 } 158 free(raid_ch->base_channel); 159 raid_ch->base_channel = NULL; 160 } 161 162 /* 163 * brief: 164 * raid_bdev_cleanup is used to cleanup raid_bdev related data 165 * structures. 166 * params: 167 * raid_bdev - pointer to raid_bdev 168 * returns: 169 * none 170 */ 171 static void 172 raid_bdev_cleanup(struct raid_bdev *raid_bdev) 173 { 174 struct raid_base_bdev_info *base_info; 175 176 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_cleanup, %p name %s, state %s\n", 177 raid_bdev, raid_bdev->bdev.name, raid_bdev_state_to_str(raid_bdev->state)); 178 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 179 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 180 181 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 182 assert(base_info->desc == NULL); 183 free(base_info->name); 184 } 185 186 TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link); 187 } 188 189 static void 190 raid_bdev_free(struct raid_bdev *raid_bdev) 191 { 192 spdk_spin_destroy(&raid_bdev->base_bdev_lock); 193 free(raid_bdev->base_bdev_info); 194 free(raid_bdev->bdev.name); 195 free(raid_bdev); 196 } 197 198 static void 199 raid_bdev_cleanup_and_free(struct raid_bdev *raid_bdev) 200 { 201 raid_bdev_cleanup(raid_bdev); 202 raid_bdev_free(raid_bdev); 203 } 204 205 /* 206 * brief: 207 * free resource of base bdev for raid bdev 208 * params: 209 * base_info - raid base bdev info 210 * returns: 211 * 0 - success 212 * non zero - failure 213 */ 214 static void 215 raid_bdev_free_base_bdev_resource(struct raid_base_bdev_info *base_info) 216 { 217 struct raid_bdev *raid_bdev = base_info->raid_bdev; 218 219 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 220 221 free(base_info->name); 222 base_info->name = NULL; 223 224 if (base_info->desc == NULL) { 225 return; 226 } 227 228 spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(base_info->desc)); 229 spdk_bdev_close(base_info->desc); 230 base_info->desc = NULL; 231 232 assert(raid_bdev->num_base_bdevs_discovered); 233 raid_bdev->num_base_bdevs_discovered--; 234 } 235 236 static void 237 raid_bdev_io_device_unregister_cb(void *io_device) 238 { 239 struct raid_bdev *raid_bdev = io_device; 240 241 if (raid_bdev->num_base_bdevs_discovered == 0) { 242 /* Free raid_bdev when there are no base bdevs left */ 243 SPDK_DEBUGLOG(bdev_raid, "raid bdev base bdevs is 0, going to free all in destruct\n"); 244 raid_bdev_cleanup(raid_bdev); 245 spdk_bdev_destruct_done(&raid_bdev->bdev, 0); 246 raid_bdev_free(raid_bdev); 247 } else { 248 spdk_bdev_destruct_done(&raid_bdev->bdev, 0); 249 } 250 } 251 252 void 253 raid_bdev_module_stop_done(struct raid_bdev *raid_bdev) 254 { 255 if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) { 256 spdk_io_device_unregister(raid_bdev, raid_bdev_io_device_unregister_cb); 257 } 258 } 259 260 static void 261 _raid_bdev_destruct(void *ctxt) 262 { 263 struct raid_bdev *raid_bdev = ctxt; 264 struct raid_base_bdev_info *base_info; 265 266 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destruct\n"); 267 268 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 269 /* 270 * Close all base bdev descriptors for which call has come from below 271 * layers. Also close the descriptors if we have started shutdown. 272 */ 273 if (g_shutdown_started || base_info->remove_scheduled == true) { 274 raid_bdev_free_base_bdev_resource(base_info); 275 } 276 } 277 278 if (g_shutdown_started) { 279 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 280 } 281 282 if (raid_bdev->module->stop != NULL) { 283 if (raid_bdev->module->stop(raid_bdev) == false) { 284 return; 285 } 286 } 287 288 raid_bdev_module_stop_done(raid_bdev); 289 } 290 291 static int 292 raid_bdev_destruct(void *ctx) 293 { 294 spdk_thread_exec_msg(spdk_thread_get_app_thread(), _raid_bdev_destruct, ctx); 295 296 return 1; 297 } 298 299 void 300 raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status) 301 { 302 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 303 304 spdk_bdev_io_complete(bdev_io, status); 305 } 306 307 /* 308 * brief: 309 * raid_bdev_io_complete_part - signal the completion of a part of the expected 310 * base bdev IOs and complete the raid_io if this is the final expected IO. 311 * The caller should first set raid_io->base_bdev_io_remaining. This function 312 * will decrement this counter by the value of the 'completed' parameter and 313 * complete the raid_io if the counter reaches 0. The caller is free to 314 * interpret the 'base_bdev_io_remaining' and 'completed' values as needed, 315 * it can represent e.g. blocks or IOs. 316 * params: 317 * raid_io - pointer to raid_bdev_io 318 * completed - the part of the raid_io that has been completed 319 * status - status of the base IO 320 * returns: 321 * true - if the raid_io is completed 322 * false - otherwise 323 */ 324 bool 325 raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed, 326 enum spdk_bdev_io_status status) 327 { 328 assert(raid_io->base_bdev_io_remaining >= completed); 329 raid_io->base_bdev_io_remaining -= completed; 330 331 if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { 332 raid_io->base_bdev_io_status = status; 333 } 334 335 if (raid_io->base_bdev_io_remaining == 0) { 336 raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); 337 return true; 338 } else { 339 return false; 340 } 341 } 342 343 /* 344 * brief: 345 * raid_bdev_queue_io_wait function processes the IO which failed to submit. 346 * It will try to queue the IOs after storing the context to bdev wait queue logic. 347 * params: 348 * raid_io - pointer to raid_bdev_io 349 * bdev - the block device that the IO is submitted to 350 * ch - io channel 351 * cb_fn - callback when the spdk_bdev_io for bdev becomes available 352 * returns: 353 * none 354 */ 355 void 356 raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, 357 struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn) 358 { 359 raid_io->waitq_entry.bdev = bdev; 360 raid_io->waitq_entry.cb_fn = cb_fn; 361 raid_io->waitq_entry.cb_arg = raid_io; 362 spdk_bdev_queue_io_wait(bdev, ch, &raid_io->waitq_entry); 363 } 364 365 static void 366 raid_base_bdev_reset_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 367 { 368 struct raid_bdev_io *raid_io = cb_arg; 369 370 spdk_bdev_free_io(bdev_io); 371 372 raid_bdev_io_complete_part(raid_io, 1, success ? 373 SPDK_BDEV_IO_STATUS_SUCCESS : 374 SPDK_BDEV_IO_STATUS_FAILED); 375 } 376 377 static void raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io); 378 379 static void 380 _raid_bdev_submit_reset_request(void *_raid_io) 381 { 382 struct raid_bdev_io *raid_io = _raid_io; 383 384 raid_bdev_submit_reset_request(raid_io); 385 } 386 387 /* 388 * brief: 389 * raid_bdev_submit_reset_request function submits reset requests 390 * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in 391 * which case it will queue it for later submission 392 * params: 393 * raid_io 394 * returns: 395 * none 396 */ 397 static void 398 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io) 399 { 400 struct raid_bdev *raid_bdev; 401 int ret; 402 uint8_t i; 403 struct raid_base_bdev_info *base_info; 404 struct spdk_io_channel *base_ch; 405 406 raid_bdev = raid_io->raid_bdev; 407 408 if (raid_io->base_bdev_io_remaining == 0) { 409 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; 410 } 411 412 for (i = raid_io->base_bdev_io_submitted; i < raid_bdev->num_base_bdevs; i++) { 413 base_info = &raid_bdev->base_bdev_info[i]; 414 base_ch = raid_io->raid_ch->base_channel[i]; 415 if (base_ch == NULL) { 416 raid_io->base_bdev_io_submitted++; 417 raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS); 418 continue; 419 } 420 ret = spdk_bdev_reset(base_info->desc, base_ch, 421 raid_base_bdev_reset_complete, raid_io); 422 if (ret == 0) { 423 raid_io->base_bdev_io_submitted++; 424 } else if (ret == -ENOMEM) { 425 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), 426 base_ch, _raid_bdev_submit_reset_request); 427 return; 428 } else { 429 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 430 assert(false); 431 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 432 return; 433 } 434 } 435 } 436 437 /* 438 * brief: 439 * Callback function to spdk_bdev_io_get_buf. 440 * params: 441 * ch - pointer to raid bdev io channel 442 * bdev_io - pointer to parent bdev_io on raid bdev device 443 * success - True if buffer is allocated or false otherwise. 444 * returns: 445 * none 446 */ 447 static void 448 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 449 bool success) 450 { 451 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 452 453 if (!success) { 454 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 455 return; 456 } 457 458 raid_io->raid_bdev->module->submit_rw_request(raid_io); 459 } 460 461 /* 462 * brief: 463 * raid_bdev_submit_request function is the submit_request function pointer of 464 * raid bdev function table. This is used to submit the io on raid_bdev to below 465 * layers. 466 * params: 467 * ch - pointer to raid bdev io channel 468 * bdev_io - pointer to parent bdev_io on raid bdev device 469 * returns: 470 * none 471 */ 472 static void 473 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 474 { 475 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 476 477 raid_io->raid_bdev = bdev_io->bdev->ctxt; 478 raid_io->raid_ch = spdk_io_channel_get_ctx(ch); 479 raid_io->base_bdev_io_remaining = 0; 480 raid_io->base_bdev_io_submitted = 0; 481 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 482 483 switch (bdev_io->type) { 484 case SPDK_BDEV_IO_TYPE_READ: 485 spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb, 486 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 487 break; 488 case SPDK_BDEV_IO_TYPE_WRITE: 489 raid_io->raid_bdev->module->submit_rw_request(raid_io); 490 break; 491 492 case SPDK_BDEV_IO_TYPE_RESET: 493 raid_bdev_submit_reset_request(raid_io); 494 break; 495 496 case SPDK_BDEV_IO_TYPE_FLUSH: 497 case SPDK_BDEV_IO_TYPE_UNMAP: 498 raid_io->raid_bdev->module->submit_null_payload_request(raid_io); 499 break; 500 501 default: 502 SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type); 503 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 504 break; 505 } 506 } 507 508 /* 509 * brief: 510 * _raid_bdev_io_type_supported checks whether io_type is supported in 511 * all base bdev modules of raid bdev module. If anyone among the base_bdevs 512 * doesn't support, the raid device doesn't supports. 513 * 514 * params: 515 * raid_bdev - pointer to raid bdev context 516 * io_type - io type 517 * returns: 518 * true - io_type is supported 519 * false - io_type is not supported 520 */ 521 inline static bool 522 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type) 523 { 524 struct raid_base_bdev_info *base_info; 525 526 if (io_type == SPDK_BDEV_IO_TYPE_FLUSH || 527 io_type == SPDK_BDEV_IO_TYPE_UNMAP) { 528 if (raid_bdev->module->submit_null_payload_request == NULL) { 529 return false; 530 } 531 } 532 533 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 534 if (base_info->desc == NULL) { 535 continue; 536 } 537 538 if (spdk_bdev_io_type_supported(spdk_bdev_desc_get_bdev(base_info->desc), io_type) == false) { 539 return false; 540 } 541 } 542 543 return true; 544 } 545 546 /* 547 * brief: 548 * raid_bdev_io_type_supported is the io_supported function for bdev function 549 * table which returns whether the particular io type is supported or not by 550 * raid bdev module 551 * params: 552 * ctx - pointer to raid bdev context 553 * type - io type 554 * returns: 555 * true - io_type is supported 556 * false - io_type is not supported 557 */ 558 static bool 559 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 560 { 561 switch (io_type) { 562 case SPDK_BDEV_IO_TYPE_READ: 563 case SPDK_BDEV_IO_TYPE_WRITE: 564 return true; 565 566 case SPDK_BDEV_IO_TYPE_FLUSH: 567 case SPDK_BDEV_IO_TYPE_RESET: 568 case SPDK_BDEV_IO_TYPE_UNMAP: 569 return _raid_bdev_io_type_supported(ctx, io_type); 570 571 default: 572 return false; 573 } 574 575 return false; 576 } 577 578 /* 579 * brief: 580 * raid_bdev_get_io_channel is the get_io_channel function table pointer for 581 * raid bdev. This is used to return the io channel for this raid bdev 582 * params: 583 * ctxt - pointer to raid_bdev 584 * returns: 585 * pointer to io channel for raid bdev 586 */ 587 static struct spdk_io_channel * 588 raid_bdev_get_io_channel(void *ctxt) 589 { 590 struct raid_bdev *raid_bdev = ctxt; 591 592 return spdk_get_io_channel(raid_bdev); 593 } 594 595 void 596 raid_bdev_write_info_json(struct raid_bdev *raid_bdev, struct spdk_json_write_ctx *w) 597 { 598 struct raid_base_bdev_info *base_info; 599 600 assert(raid_bdev != NULL); 601 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 602 603 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 604 spdk_json_write_named_string(w, "state", raid_bdev_state_to_str(raid_bdev->state)); 605 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 606 spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs); 607 spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered); 608 spdk_json_write_name(w, "base_bdevs_list"); 609 spdk_json_write_array_begin(w); 610 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 611 if (base_info->desc) { 612 spdk_json_write_string(w, spdk_bdev_desc_get_bdev(base_info->desc)->name); 613 } else { 614 spdk_json_write_null(w); 615 } 616 } 617 spdk_json_write_array_end(w); 618 } 619 620 /* 621 * brief: 622 * raid_bdev_dump_info_json is the function table pointer for raid bdev 623 * params: 624 * ctx - pointer to raid_bdev 625 * w - pointer to json context 626 * returns: 627 * 0 - success 628 * non zero - failure 629 */ 630 static int 631 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 632 { 633 struct raid_bdev *raid_bdev = ctx; 634 635 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_dump_config_json\n"); 636 637 /* Dump the raid bdev configuration related information */ 638 spdk_json_write_named_object_begin(w, "raid"); 639 raid_bdev_write_info_json(raid_bdev, w); 640 spdk_json_write_object_end(w); 641 642 return 0; 643 } 644 645 /* 646 * brief: 647 * raid_bdev_write_config_json is the function table pointer for raid bdev 648 * params: 649 * bdev - pointer to spdk_bdev 650 * w - pointer to json context 651 * returns: 652 * none 653 */ 654 static void 655 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 656 { 657 struct raid_bdev *raid_bdev = bdev->ctxt; 658 struct raid_base_bdev_info *base_info; 659 660 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 661 662 spdk_json_write_object_begin(w); 663 664 spdk_json_write_named_string(w, "method", "bdev_raid_create"); 665 666 spdk_json_write_named_object_begin(w, "params"); 667 spdk_json_write_named_string(w, "name", bdev->name); 668 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 669 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 670 671 spdk_json_write_named_array_begin(w, "base_bdevs"); 672 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 673 if (base_info->desc) { 674 spdk_json_write_string(w, spdk_bdev_desc_get_bdev(base_info->desc)->name); 675 } 676 } 677 spdk_json_write_array_end(w); 678 spdk_json_write_object_end(w); 679 680 spdk_json_write_object_end(w); 681 } 682 683 static int 684 raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size) 685 { 686 struct raid_bdev *raid_bdev = ctx; 687 struct raid_base_bdev_info *base_info; 688 int domains_count = 0, rc = 0; 689 690 if (raid_bdev->module->memory_domains_supported == false) { 691 return 0; 692 } 693 694 spdk_spin_lock(&raid_bdev->base_bdev_lock); 695 696 /* First loop to get the number of memory domains */ 697 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 698 if (base_info->desc == NULL) { 699 continue; 700 } 701 rc = spdk_bdev_get_memory_domains(spdk_bdev_desc_get_bdev(base_info->desc), NULL, 0); 702 if (rc < 0) { 703 goto out; 704 } 705 domains_count += rc; 706 } 707 708 if (!domains || array_size < domains_count) { 709 goto out; 710 } 711 712 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 713 if (base_info->desc == NULL) { 714 continue; 715 } 716 rc = spdk_bdev_get_memory_domains(spdk_bdev_desc_get_bdev(base_info->desc), domains, array_size); 717 if (rc < 0) { 718 goto out; 719 } 720 domains += rc; 721 array_size -= rc; 722 } 723 out: 724 spdk_spin_unlock(&raid_bdev->base_bdev_lock); 725 726 if (rc < 0) { 727 return rc; 728 } 729 730 return domains_count; 731 } 732 733 /* g_raid_bdev_fn_table is the function table for raid bdev */ 734 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = { 735 .destruct = raid_bdev_destruct, 736 .submit_request = raid_bdev_submit_request, 737 .io_type_supported = raid_bdev_io_type_supported, 738 .get_io_channel = raid_bdev_get_io_channel, 739 .dump_info_json = raid_bdev_dump_info_json, 740 .write_config_json = raid_bdev_write_config_json, 741 .get_memory_domains = raid_bdev_get_memory_domains, 742 }; 743 744 struct raid_bdev * 745 raid_bdev_find_by_name(const char *name) 746 { 747 struct raid_bdev *raid_bdev; 748 749 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 750 if (strcmp(raid_bdev->bdev.name, name) == 0) { 751 return raid_bdev; 752 } 753 } 754 755 return NULL; 756 } 757 758 static struct { 759 const char *name; 760 enum raid_level value; 761 } g_raid_level_names[] = { 762 { "raid0", RAID0 }, 763 { "0", RAID0 }, 764 { "raid1", RAID1 }, 765 { "1", RAID1 }, 766 { "raid5f", RAID5F }, 767 { "5f", RAID5F }, 768 { "concat", CONCAT }, 769 { } 770 }; 771 772 static struct { 773 const char *name; 774 enum raid_bdev_state value; 775 } g_raid_state_names[] = { 776 { "online", RAID_BDEV_STATE_ONLINE }, 777 { "configuring", RAID_BDEV_STATE_CONFIGURING }, 778 { "offline", RAID_BDEV_STATE_OFFLINE }, 779 { } 780 }; 781 782 /* We have to use the typedef in the function declaration to appease astyle. */ 783 typedef enum raid_level raid_level_t; 784 typedef enum raid_bdev_state raid_bdev_state_t; 785 786 raid_level_t 787 raid_bdev_str_to_level(const char *str) 788 { 789 unsigned int i; 790 791 assert(str != NULL); 792 793 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 794 if (strcasecmp(g_raid_level_names[i].name, str) == 0) { 795 return g_raid_level_names[i].value; 796 } 797 } 798 799 return INVALID_RAID_LEVEL; 800 } 801 802 const char * 803 raid_bdev_level_to_str(enum raid_level level) 804 { 805 unsigned int i; 806 807 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 808 if (g_raid_level_names[i].value == level) { 809 return g_raid_level_names[i].name; 810 } 811 } 812 813 return ""; 814 } 815 816 raid_bdev_state_t 817 raid_bdev_str_to_state(const char *str) 818 { 819 unsigned int i; 820 821 assert(str != NULL); 822 823 for (i = 0; g_raid_state_names[i].name != NULL; i++) { 824 if (strcasecmp(g_raid_state_names[i].name, str) == 0) { 825 return g_raid_state_names[i].value; 826 } 827 } 828 829 return RAID_BDEV_STATE_MAX; 830 } 831 832 const char * 833 raid_bdev_state_to_str(enum raid_bdev_state state) 834 { 835 unsigned int i; 836 837 for (i = 0; g_raid_state_names[i].name != NULL; i++) { 838 if (g_raid_state_names[i].value == state) { 839 return g_raid_state_names[i].name; 840 } 841 } 842 843 assert(false); 844 return ""; 845 } 846 847 /* 848 * brief: 849 * raid_bdev_fini_start is called when bdev layer is starting the 850 * shutdown process 851 * params: 852 * none 853 * returns: 854 * none 855 */ 856 static void 857 raid_bdev_fini_start(void) 858 { 859 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_fini_start\n"); 860 g_shutdown_started = true; 861 } 862 863 /* 864 * brief: 865 * raid_bdev_exit is called on raid bdev module exit time by bdev layer 866 * params: 867 * none 868 * returns: 869 * none 870 */ 871 static void 872 raid_bdev_exit(void) 873 { 874 struct raid_bdev *raid_bdev, *tmp; 875 876 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_exit\n"); 877 878 TAILQ_FOREACH_SAFE(raid_bdev, &g_raid_bdev_list, global_link, tmp) { 879 raid_bdev_cleanup_and_free(raid_bdev); 880 } 881 } 882 883 /* 884 * brief: 885 * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid 886 * module 887 * params: 888 * none 889 * returns: 890 * size of spdk_bdev_io context for raid 891 */ 892 static int 893 raid_bdev_get_ctx_size(void) 894 { 895 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_get_ctx_size\n"); 896 return sizeof(struct raid_bdev_io); 897 } 898 899 static struct spdk_bdev_module g_raid_if = { 900 .name = "raid", 901 .module_init = raid_bdev_init, 902 .fini_start = raid_bdev_fini_start, 903 .module_fini = raid_bdev_exit, 904 .get_ctx_size = raid_bdev_get_ctx_size, 905 .examine_config = raid_bdev_examine, 906 .async_init = false, 907 .async_fini = false, 908 }; 909 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if) 910 911 /* 912 * brief: 913 * raid_bdev_init is the initialization function for raid bdev module 914 * params: 915 * none 916 * returns: 917 * 0 - success 918 * non zero - failure 919 */ 920 static int 921 raid_bdev_init(void) 922 { 923 return 0; 924 } 925 926 /* 927 * brief: 928 * raid_bdev_create allocates raid bdev based on passed configuration 929 * params: 930 * name - name for raid bdev 931 * strip_size - strip size in KB 932 * num_base_bdevs - number of base bdevs 933 * level - raid level 934 * raid_bdev_out - the created raid bdev 935 * returns: 936 * 0 - success 937 * non zero - failure 938 */ 939 int 940 raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, 941 enum raid_level level, struct raid_bdev **raid_bdev_out, const struct spdk_uuid *uuid) 942 { 943 struct raid_bdev *raid_bdev; 944 struct spdk_bdev *raid_bdev_gen; 945 struct raid_bdev_module *module; 946 struct raid_base_bdev_info *base_info; 947 uint8_t min_operational; 948 949 if (raid_bdev_find_by_name(name) != NULL) { 950 SPDK_ERRLOG("Duplicate raid bdev name found: %s\n", name); 951 return -EEXIST; 952 } 953 954 if (level == RAID1) { 955 if (strip_size != 0) { 956 SPDK_ERRLOG("Strip size is not supported by raid1\n"); 957 return -EINVAL; 958 } 959 } else if (spdk_u32_is_pow2(strip_size) == false) { 960 SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size); 961 return -EINVAL; 962 } 963 964 module = raid_bdev_module_find(level); 965 if (module == NULL) { 966 SPDK_ERRLOG("Unsupported raid level '%d'\n", level); 967 return -EINVAL; 968 } 969 970 assert(module->base_bdevs_min != 0); 971 if (num_base_bdevs < module->base_bdevs_min) { 972 SPDK_ERRLOG("At least %u base devices required for %s\n", 973 module->base_bdevs_min, 974 raid_bdev_level_to_str(level)); 975 return -EINVAL; 976 } 977 978 switch (module->base_bdevs_constraint.type) { 979 case CONSTRAINT_MAX_BASE_BDEVS_REMOVED: 980 min_operational = num_base_bdevs - module->base_bdevs_constraint.value; 981 break; 982 case CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL: 983 min_operational = module->base_bdevs_constraint.value; 984 break; 985 case CONSTRAINT_UNSET: 986 if (module->base_bdevs_constraint.value != 0) { 987 SPDK_ERRLOG("Unexpected constraint value '%u' provided for raid bdev '%s'.\n", 988 (uint8_t)module->base_bdevs_constraint.value, name); 989 return -EINVAL; 990 } 991 min_operational = num_base_bdevs; 992 break; 993 default: 994 SPDK_ERRLOG("Unrecognised constraint type '%u' in module for raid level '%s'.\n", 995 (uint8_t)module->base_bdevs_constraint.type, 996 raid_bdev_level_to_str(module->level)); 997 return -EINVAL; 998 }; 999 1000 if (min_operational == 0 || min_operational > num_base_bdevs) { 1001 SPDK_ERRLOG("Wrong constraint value for raid level '%s'.\n", 1002 raid_bdev_level_to_str(module->level)); 1003 return -EINVAL; 1004 } 1005 1006 raid_bdev = calloc(1, sizeof(*raid_bdev)); 1007 if (!raid_bdev) { 1008 SPDK_ERRLOG("Unable to allocate memory for raid bdev\n"); 1009 return -ENOMEM; 1010 } 1011 1012 spdk_spin_init(&raid_bdev->base_bdev_lock); 1013 raid_bdev->module = module; 1014 raid_bdev->num_base_bdevs = num_base_bdevs; 1015 raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, 1016 sizeof(struct raid_base_bdev_info)); 1017 if (!raid_bdev->base_bdev_info) { 1018 SPDK_ERRLOG("Unable able to allocate base bdev info\n"); 1019 raid_bdev_free(raid_bdev); 1020 return -ENOMEM; 1021 } 1022 1023 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1024 base_info->raid_bdev = raid_bdev; 1025 } 1026 1027 /* strip_size_kb is from the rpc param. strip_size is in blocks and used 1028 * internally and set later. 1029 */ 1030 raid_bdev->strip_size = 0; 1031 raid_bdev->strip_size_kb = strip_size; 1032 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1033 raid_bdev->level = level; 1034 raid_bdev->min_base_bdevs_operational = min_operational; 1035 1036 raid_bdev_gen = &raid_bdev->bdev; 1037 1038 raid_bdev_gen->name = strdup(name); 1039 if (!raid_bdev_gen->name) { 1040 SPDK_ERRLOG("Unable to allocate name for raid\n"); 1041 raid_bdev_free(raid_bdev); 1042 return -ENOMEM; 1043 } 1044 1045 raid_bdev_gen->product_name = "Raid Volume"; 1046 raid_bdev_gen->ctxt = raid_bdev; 1047 raid_bdev_gen->fn_table = &g_raid_bdev_fn_table; 1048 raid_bdev_gen->module = &g_raid_if; 1049 raid_bdev_gen->write_cache = 0; 1050 spdk_uuid_copy(&raid_bdev_gen->uuid, uuid); 1051 1052 TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link); 1053 1054 *raid_bdev_out = raid_bdev; 1055 1056 return 0; 1057 } 1058 1059 /* 1060 * brief: 1061 * Check underlying block devices against support for metadata. Do not configure 1062 * md support when parameters from block devices are inconsistent. 1063 * params: 1064 * raid_bdev - pointer to raid bdev 1065 * returns: 1066 * 0 - The raid bdev md parameters were successfully configured. 1067 * non zero - Failed to configure md. 1068 */ 1069 static int 1070 raid_bdev_configure_md(struct raid_bdev *raid_bdev) 1071 { 1072 struct spdk_bdev *base_bdev; 1073 uint8_t i; 1074 1075 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1076 base_bdev = spdk_bdev_desc_get_bdev(raid_bdev->base_bdev_info[i].desc); 1077 1078 /* Currently, RAID bdevs do not support DIF or DIX, so a RAID bdev cannot 1079 * be created on top of any bdev which supports it */ 1080 if (spdk_bdev_get_dif_type(base_bdev) != SPDK_DIF_DISABLE) { 1081 SPDK_ERRLOG("at least one base bdev has DIF or DIX enabled " 1082 "- unsupported RAID configuration\n"); 1083 return -EPERM; 1084 } 1085 1086 if (i == 0) { 1087 raid_bdev->bdev.md_len = spdk_bdev_get_md_size(base_bdev); 1088 raid_bdev->bdev.md_interleave = spdk_bdev_is_md_interleaved(base_bdev); 1089 continue; 1090 } 1091 1092 if (raid_bdev->bdev.md_len != spdk_bdev_get_md_size(base_bdev) || 1093 raid_bdev->bdev.md_interleave != spdk_bdev_is_md_interleaved(base_bdev)) { 1094 SPDK_ERRLOG("base bdevs are configured with different metadata formats\n"); 1095 return -EPERM; 1096 } 1097 } 1098 1099 return 0; 1100 } 1101 1102 /* 1103 * brief: 1104 * If raid bdev config is complete, then only register the raid bdev to 1105 * bdev layer and remove this raid bdev from configuring list and 1106 * insert the raid bdev to configured list 1107 * params: 1108 * raid_bdev - pointer to raid bdev 1109 * returns: 1110 * 0 - success 1111 * non zero - failure 1112 */ 1113 static int 1114 raid_bdev_configure(struct raid_bdev *raid_bdev) 1115 { 1116 uint32_t blocklen = 0; 1117 struct spdk_bdev *raid_bdev_gen; 1118 struct raid_base_bdev_info *base_info; 1119 struct spdk_bdev *base_bdev; 1120 int rc = 0; 1121 1122 assert(raid_bdev->state == RAID_BDEV_STATE_CONFIGURING); 1123 assert(raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs); 1124 1125 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1126 assert(base_info->desc != NULL); 1127 base_bdev = spdk_bdev_desc_get_bdev(base_info->desc); 1128 /* Check blocklen for all base bdevs that it should be same */ 1129 if (blocklen == 0) { 1130 blocklen = base_bdev->blocklen; 1131 } else if (blocklen != base_bdev->blocklen) { 1132 /* 1133 * Assumption is that all the base bdevs for any raid bdev should 1134 * have same blocklen 1135 */ 1136 SPDK_ERRLOG("Blocklen of various bdevs not matching\n"); 1137 return -EINVAL; 1138 } 1139 } 1140 assert(blocklen > 0); 1141 1142 /* The strip_size_kb is read in from user in KB. Convert to blocks here for 1143 * internal use. 1144 */ 1145 raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / blocklen; 1146 if (raid_bdev->strip_size == 0 && raid_bdev->level != RAID1) { 1147 SPDK_ERRLOG("Strip size cannot be smaller than the device block size\n"); 1148 return -EINVAL; 1149 } 1150 raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size); 1151 raid_bdev->blocklen_shift = spdk_u32log2(blocklen); 1152 1153 raid_bdev_gen = &raid_bdev->bdev; 1154 raid_bdev_gen->blocklen = blocklen; 1155 1156 rc = raid_bdev_configure_md(raid_bdev); 1157 if (rc != 0) { 1158 SPDK_ERRLOG("raid metadata configuration failed\n"); 1159 return rc; 1160 } 1161 1162 rc = raid_bdev->module->start(raid_bdev); 1163 if (rc != 0) { 1164 SPDK_ERRLOG("raid module startup callback failed\n"); 1165 return rc; 1166 } 1167 raid_bdev->state = RAID_BDEV_STATE_ONLINE; 1168 SPDK_DEBUGLOG(bdev_raid, "io device register %p\n", raid_bdev); 1169 SPDK_DEBUGLOG(bdev_raid, "blockcnt %" PRIu64 ", blocklen %u\n", 1170 raid_bdev_gen->blockcnt, raid_bdev_gen->blocklen); 1171 spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb, 1172 sizeof(struct raid_bdev_io_channel), 1173 raid_bdev->bdev.name); 1174 rc = spdk_bdev_register(raid_bdev_gen); 1175 if (rc != 0) { 1176 SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n"); 1177 if (raid_bdev->module->stop != NULL) { 1178 raid_bdev->module->stop(raid_bdev); 1179 } 1180 spdk_io_device_unregister(raid_bdev, NULL); 1181 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1182 return rc; 1183 } 1184 SPDK_DEBUGLOG(bdev_raid, "raid bdev generic %p\n", raid_bdev_gen); 1185 SPDK_DEBUGLOG(bdev_raid, "raid bdev is created with name %s, raid_bdev %p\n", 1186 raid_bdev_gen->name, raid_bdev); 1187 1188 return 0; 1189 } 1190 1191 /* 1192 * brief: 1193 * If raid bdev is online and registered, change the bdev state to 1194 * configuring and unregister this raid device. Queue this raid device 1195 * in configuring list 1196 * params: 1197 * raid_bdev - pointer to raid bdev 1198 * cb_fn - callback function 1199 * cb_arg - argument to callback function 1200 * returns: 1201 * none 1202 */ 1203 static void 1204 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, 1205 void *cb_arg) 1206 { 1207 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1208 if (cb_fn) { 1209 cb_fn(cb_arg, 0); 1210 } 1211 return; 1212 } 1213 1214 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 1215 assert(raid_bdev->num_base_bdevs_discovered); 1216 SPDK_DEBUGLOG(bdev_raid, "raid bdev state changing from online to offline\n"); 1217 1218 spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg); 1219 } 1220 1221 /* 1222 * brief: 1223 * raid_bdev_find_base_info_by_bdev function finds the base bdev info by bdev. 1224 * params: 1225 * base_bdev - pointer to base bdev 1226 * returns: 1227 * base bdev info if found, otherwise NULL. 1228 */ 1229 static struct raid_base_bdev_info * 1230 raid_bdev_find_base_info_by_bdev(struct spdk_bdev *base_bdev) 1231 { 1232 struct raid_bdev *raid_bdev; 1233 struct raid_base_bdev_info *base_info; 1234 1235 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1236 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1237 if (base_info->desc != NULL && 1238 spdk_bdev_desc_get_bdev(base_info->desc) == base_bdev) { 1239 return base_info; 1240 } 1241 } 1242 } 1243 1244 return NULL; 1245 } 1246 1247 static void 1248 raid_bdev_remove_base_bdev_on_unquiesced(void *ctx, int status) 1249 { 1250 struct raid_base_bdev_info *base_info = ctx; 1251 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1252 1253 base_info->remove_scheduled = false; 1254 1255 if (status != 0) { 1256 SPDK_ERRLOG("Failed to unquiesce raid bdev %s: %s\n", 1257 raid_bdev->bdev.name, spdk_strerror(-status)); 1258 goto out; 1259 } 1260 1261 spdk_spin_lock(&raid_bdev->base_bdev_lock); 1262 raid_bdev_free_base_bdev_resource(base_info); 1263 spdk_spin_unlock(&raid_bdev->base_bdev_lock); 1264 out: 1265 if (base_info->remove_cb != NULL) { 1266 base_info->remove_cb(base_info->remove_cb_ctx, status); 1267 } 1268 } 1269 1270 static void 1271 raid_bdev_channel_remove_base_bdev(struct spdk_io_channel_iter *i) 1272 { 1273 struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i); 1274 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 1275 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch); 1276 uint8_t idx = base_info - base_info->raid_bdev->base_bdev_info; 1277 1278 SPDK_DEBUGLOG(bdev_raid, "slot: %u raid_ch: %p\n", idx, raid_ch); 1279 1280 if (raid_ch->base_channel[idx] != NULL) { 1281 spdk_put_io_channel(raid_ch->base_channel[idx]); 1282 raid_ch->base_channel[idx] = NULL; 1283 } 1284 1285 spdk_for_each_channel_continue(i, 0); 1286 } 1287 1288 static void 1289 raid_bdev_channels_remove_base_bdev_done(struct spdk_io_channel_iter *i, int status) 1290 { 1291 struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i); 1292 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1293 1294 spdk_bdev_unquiesce(&raid_bdev->bdev, &g_raid_if, raid_bdev_remove_base_bdev_on_unquiesced, 1295 base_info); 1296 } 1297 1298 static void 1299 raid_bdev_remove_base_bdev_on_quiesced(void *ctx, int status) 1300 { 1301 struct raid_base_bdev_info *base_info = ctx; 1302 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1303 1304 if (status != 0) { 1305 SPDK_ERRLOG("Failed to quiesce raid bdev %s: %s\n", 1306 raid_bdev->bdev.name, spdk_strerror(-status)); 1307 base_info->remove_scheduled = false; 1308 if (base_info->remove_cb != NULL) { 1309 base_info->remove_cb(base_info->remove_cb_ctx, status); 1310 } 1311 return; 1312 } 1313 1314 spdk_for_each_channel(raid_bdev, raid_bdev_channel_remove_base_bdev, base_info, 1315 raid_bdev_channels_remove_base_bdev_done); 1316 } 1317 1318 /* 1319 * brief: 1320 * raid_bdev_remove_base_bdev function is called by below layers when base_bdev 1321 * is removed. This function checks if this base bdev is part of any raid bdev 1322 * or not. If yes, it takes necessary action on that particular raid bdev. 1323 * params: 1324 * base_bdev - pointer to base bdev which got removed 1325 * cb_fn - callback function 1326 * cb_arg - argument to callback function 1327 * returns: 1328 * 0 - success 1329 * non zero - failure 1330 */ 1331 int 1332 raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev, raid_bdev_remove_base_bdev_cb cb_fn, 1333 void *cb_ctx) 1334 { 1335 struct raid_bdev *raid_bdev; 1336 struct raid_base_bdev_info *base_info; 1337 1338 SPDK_DEBUGLOG(bdev_raid, "%s\n", base_bdev->name); 1339 1340 /* Find the raid_bdev which has claimed this base_bdev */ 1341 base_info = raid_bdev_find_base_info_by_bdev(base_bdev); 1342 if (!base_info) { 1343 SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); 1344 return -ENODEV; 1345 } 1346 raid_bdev = base_info->raid_bdev; 1347 1348 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1349 1350 if (base_info->remove_scheduled) { 1351 return 0; 1352 } 1353 1354 assert(base_info->desc); 1355 base_info->remove_scheduled = true; 1356 base_info->remove_cb = cb_fn; 1357 base_info->remove_cb_ctx = cb_ctx; 1358 1359 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1360 /* 1361 * As raid bdev is not registered yet or already unregistered, 1362 * so cleanup should be done here itself. 1363 */ 1364 raid_bdev_free_base_bdev_resource(base_info); 1365 if (raid_bdev->num_base_bdevs_discovered == 0) { 1366 /* There is no base bdev for this raid, so free the raid device. */ 1367 raid_bdev_cleanup_and_free(raid_bdev); 1368 } 1369 } else if (raid_bdev->num_base_bdevs_discovered == raid_bdev->min_base_bdevs_operational) { 1370 /* 1371 * After this base bdev is removed there will not be enough base bdevs 1372 * to keep the raid bdev operational. 1373 */ 1374 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_ctx); 1375 } else { 1376 int ret; 1377 1378 ret = spdk_bdev_quiesce(&raid_bdev->bdev, &g_raid_if, 1379 raid_bdev_remove_base_bdev_on_quiesced, base_info); 1380 if (ret != 0) { 1381 base_info->remove_scheduled = false; 1382 } 1383 } 1384 1385 return 0; 1386 } 1387 1388 /* 1389 * brief: 1390 * raid_bdev_resize_base_bdev function is called by below layers when base_bdev 1391 * is resized. This function checks if the smallest size of the base_bdevs is changed. 1392 * If yes, call module handler to resize the raid_bdev if implemented. 1393 * params: 1394 * base_bdev - pointer to base bdev which got resized. 1395 * returns: 1396 * none 1397 */ 1398 static void 1399 raid_bdev_resize_base_bdev(struct spdk_bdev *base_bdev) 1400 { 1401 struct raid_bdev *raid_bdev; 1402 struct raid_base_bdev_info *base_info; 1403 1404 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_resize_base_bdev\n"); 1405 1406 base_info = raid_bdev_find_base_info_by_bdev(base_bdev); 1407 1408 /* Find the raid_bdev which has claimed this base_bdev */ 1409 if (!base_info) { 1410 SPDK_ERRLOG("raid_bdev whose base_bdev '%s' not found\n", base_bdev->name); 1411 return; 1412 } 1413 raid_bdev = base_info->raid_bdev; 1414 1415 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1416 1417 SPDK_NOTICELOG("base_bdev '%s' was resized: old size %" PRIu64 ", new size %" PRIu64 "\n", 1418 base_bdev->name, base_info->blockcnt, base_bdev->blockcnt); 1419 1420 if (raid_bdev->module->resize) { 1421 raid_bdev->module->resize(raid_bdev); 1422 } 1423 } 1424 1425 /* 1426 * brief: 1427 * raid_bdev_event_base_bdev function is called by below layers when base_bdev 1428 * triggers asynchronous event. 1429 * params: 1430 * type - event details. 1431 * bdev - bdev that triggered event. 1432 * event_ctx - context for event. 1433 * returns: 1434 * none 1435 */ 1436 static void 1437 raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 1438 void *event_ctx) 1439 { 1440 int rc; 1441 1442 switch (type) { 1443 case SPDK_BDEV_EVENT_REMOVE: 1444 rc = raid_bdev_remove_base_bdev(bdev, NULL, NULL); 1445 if (rc != 0) { 1446 SPDK_ERRLOG("Failed to remove base bdev %s: %s\n", 1447 spdk_bdev_get_name(bdev), spdk_strerror(-rc)); 1448 } 1449 break; 1450 case SPDK_BDEV_EVENT_RESIZE: 1451 raid_bdev_resize_base_bdev(bdev); 1452 break; 1453 default: 1454 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 1455 break; 1456 } 1457 } 1458 1459 /* 1460 * brief: 1461 * Deletes the specified raid bdev 1462 * params: 1463 * raid_bdev - pointer to raid bdev 1464 * cb_fn - callback function 1465 * cb_arg - argument to callback function 1466 */ 1467 void 1468 raid_bdev_delete(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, void *cb_arg) 1469 { 1470 struct raid_base_bdev_info *base_info; 1471 1472 SPDK_DEBUGLOG(bdev_raid, "delete raid bdev: %s\n", raid_bdev->bdev.name); 1473 1474 if (raid_bdev->destroy_started) { 1475 SPDK_DEBUGLOG(bdev_raid, "destroying raid bdev %s is already started\n", 1476 raid_bdev->bdev.name); 1477 if (cb_fn) { 1478 cb_fn(cb_arg, -EALREADY); 1479 } 1480 return; 1481 } 1482 1483 raid_bdev->destroy_started = true; 1484 1485 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1486 base_info->remove_scheduled = true; 1487 1488 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1489 /* 1490 * As raid bdev is not registered yet or already unregistered, 1491 * so cleanup should be done here itself. 1492 */ 1493 raid_bdev_free_base_bdev_resource(base_info); 1494 } 1495 } 1496 1497 if (raid_bdev->num_base_bdevs_discovered == 0) { 1498 /* There is no base bdev for this raid, so free the raid device. */ 1499 raid_bdev_cleanup_and_free(raid_bdev); 1500 if (cb_fn) { 1501 cb_fn(cb_arg, 0); 1502 } 1503 } else { 1504 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg); 1505 } 1506 } 1507 1508 static int 1509 raid_bdev_configure_base_bdev(struct raid_base_bdev_info *base_info) 1510 { 1511 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1512 struct spdk_bdev_desc *desc; 1513 struct spdk_bdev *bdev; 1514 int rc; 1515 1516 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1517 assert(base_info->name != NULL); 1518 assert(base_info->desc == NULL); 1519 1520 rc = spdk_bdev_open_ext(base_info->name, true, raid_bdev_event_base_bdev, NULL, &desc); 1521 if (rc != 0) { 1522 if (rc != -ENODEV) { 1523 SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", base_info->name); 1524 } 1525 return rc; 1526 } 1527 1528 bdev = spdk_bdev_desc_get_bdev(desc); 1529 1530 rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if); 1531 if (rc != 0) { 1532 SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); 1533 spdk_bdev_close(desc); 1534 return rc; 1535 } 1536 1537 SPDK_DEBUGLOG(bdev_raid, "bdev %s is claimed\n", bdev->name); 1538 1539 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 1540 1541 base_info->desc = desc; 1542 base_info->blockcnt = bdev->blockcnt; 1543 raid_bdev->num_base_bdevs_discovered++; 1544 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1545 1546 if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) { 1547 rc = raid_bdev_configure(raid_bdev); 1548 if (rc != 0) { 1549 SPDK_ERRLOG("Failed to configure raid bdev\n"); 1550 return rc; 1551 } 1552 } 1553 1554 return 0; 1555 } 1556 1557 /* 1558 * brief: 1559 * raid_bdev_add_base_device function is the actual function which either adds 1560 * the nvme base device to existing raid bdev or create a new raid bdev. It also claims 1561 * the base device and keep the open descriptor. 1562 * params: 1563 * raid_bdev - pointer to raid bdev 1564 * name - name of the base bdev 1565 * slot - position to add base bdev 1566 * returns: 1567 * 0 - success 1568 * non zero - failure 1569 */ 1570 int 1571 raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name, uint8_t slot) 1572 { 1573 struct raid_base_bdev_info *base_info; 1574 int rc; 1575 1576 if (slot >= raid_bdev->num_base_bdevs) { 1577 return -EINVAL; 1578 } 1579 1580 base_info = &raid_bdev->base_bdev_info[slot]; 1581 1582 if (base_info->name != NULL) { 1583 SPDK_ERRLOG("Slot %u on raid bdev '%s' already assigned to bdev '%s'\n", 1584 slot, raid_bdev->bdev.name, base_info->name); 1585 return -EBUSY; 1586 } 1587 1588 base_info->name = strdup(name); 1589 if (base_info->name == NULL) { 1590 return -ENOMEM; 1591 } 1592 1593 rc = raid_bdev_configure_base_bdev(base_info); 1594 if (rc != 0) { 1595 if (rc != -ENODEV) { 1596 SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", name); 1597 } 1598 return rc; 1599 } 1600 1601 return 0; 1602 } 1603 1604 /* 1605 * brief: 1606 * raid_bdev_examine function is the examine function call by the below layers 1607 * like bdev_nvme layer. This function will check if this base bdev can be 1608 * claimed by this raid bdev or not. 1609 * params: 1610 * bdev - pointer to base bdev 1611 * returns: 1612 * none 1613 */ 1614 static void 1615 raid_bdev_examine(struct spdk_bdev *bdev) 1616 { 1617 struct raid_bdev *raid_bdev; 1618 struct raid_base_bdev_info *base_info; 1619 1620 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1621 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1622 if (base_info->desc == NULL && base_info->name != NULL && 1623 strcmp(bdev->name, base_info->name) == 0) { 1624 raid_bdev_configure_base_bdev(base_info); 1625 break; 1626 } 1627 } 1628 } 1629 1630 spdk_bdev_module_examine_done(&g_raid_if); 1631 } 1632 1633 /* Log component for bdev raid bdev module */ 1634 SPDK_LOG_REGISTER_COMPONENT(bdev_raid) 1635