1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. 3 * All rights reserved. 4 * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "bdev_raid.h" 8 #include "spdk/env.h" 9 #include "spdk/thread.h" 10 #include "spdk/log.h" 11 #include "spdk/string.h" 12 #include "spdk/util.h" 13 #include "spdk/json.h" 14 15 static bool g_shutdown_started = false; 16 17 /* List of all raid bdevs */ 18 struct raid_all_tailq g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list); 19 20 static TAILQ_HEAD(, raid_bdev_module) g_raid_modules = TAILQ_HEAD_INITIALIZER(g_raid_modules); 21 22 static struct raid_bdev_module * 23 raid_bdev_module_find(enum raid_level level) 24 { 25 struct raid_bdev_module *raid_module; 26 27 TAILQ_FOREACH(raid_module, &g_raid_modules, link) { 28 if (raid_module->level == level) { 29 return raid_module; 30 } 31 } 32 33 return NULL; 34 } 35 36 void 37 raid_bdev_module_list_add(struct raid_bdev_module *raid_module) 38 { 39 if (raid_bdev_module_find(raid_module->level) != NULL) { 40 SPDK_ERRLOG("module for raid level '%s' already registered.\n", 41 raid_bdev_level_to_str(raid_module->level)); 42 assert(false); 43 } else { 44 TAILQ_INSERT_TAIL(&g_raid_modules, raid_module, link); 45 } 46 } 47 48 /* Function declarations */ 49 static void raid_bdev_examine(struct spdk_bdev *bdev); 50 static int raid_bdev_init(void); 51 static void raid_bdev_deconfigure(struct raid_bdev *raid_bdev, 52 raid_bdev_destruct_cb cb_fn, void *cb_arg); 53 54 /* 55 * brief: 56 * raid_bdev_create_cb function is a cb function for raid bdev which creates the 57 * hierarchy from raid bdev to base bdev io channels. It will be called per core 58 * params: 59 * io_device - pointer to raid bdev io device represented by raid_bdev 60 * ctx_buf - pointer to context buffer for raid bdev io channel 61 * returns: 62 * 0 - success 63 * non zero - failure 64 */ 65 static int 66 raid_bdev_create_cb(void *io_device, void *ctx_buf) 67 { 68 struct raid_bdev *raid_bdev = io_device; 69 struct raid_bdev_io_channel *raid_ch = ctx_buf; 70 uint8_t i; 71 int ret = 0; 72 73 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_create_cb, %p\n", raid_ch); 74 75 assert(raid_bdev != NULL); 76 assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); 77 78 raid_ch->num_channels = raid_bdev->num_base_bdevs; 79 80 raid_ch->base_channel = calloc(raid_ch->num_channels, 81 sizeof(struct spdk_io_channel *)); 82 if (!raid_ch->base_channel) { 83 SPDK_ERRLOG("Unable to allocate base bdevs io channel\n"); 84 return -ENOMEM; 85 } 86 for (i = 0; i < raid_ch->num_channels; i++) { 87 /* 88 * Get the spdk_io_channel for all the base bdevs. This is used during 89 * split logic to send the respective child bdev ios to respective base 90 * bdev io channel. 91 */ 92 raid_ch->base_channel[i] = spdk_bdev_get_io_channel( 93 raid_bdev->base_bdev_info[i].desc); 94 if (!raid_ch->base_channel[i]) { 95 SPDK_ERRLOG("Unable to create io channel for base bdev\n"); 96 ret = -ENOMEM; 97 break; 98 } 99 } 100 101 if (!ret && raid_bdev->module->get_io_channel) { 102 raid_ch->module_channel = raid_bdev->module->get_io_channel(raid_bdev); 103 if (!raid_ch->module_channel) { 104 SPDK_ERRLOG("Unable to create io channel for raid module\n"); 105 ret = -ENOMEM; 106 } 107 } 108 109 if (ret) { 110 uint8_t j; 111 112 for (j = 0; j < i; j++) { 113 spdk_put_io_channel(raid_ch->base_channel[j]); 114 } 115 free(raid_ch->base_channel); 116 raid_ch->base_channel = NULL; 117 } 118 return ret; 119 } 120 121 /* 122 * brief: 123 * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the 124 * hierarchy from raid bdev to base bdev io channels. It will be called per core 125 * params: 126 * io_device - pointer to raid bdev io device represented by raid_bdev 127 * ctx_buf - pointer to context buffer for raid bdev io channel 128 * returns: 129 * none 130 */ 131 static void 132 raid_bdev_destroy_cb(void *io_device, void *ctx_buf) 133 { 134 struct raid_bdev_io_channel *raid_ch = ctx_buf; 135 uint8_t i; 136 137 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destroy_cb\n"); 138 139 assert(raid_ch != NULL); 140 assert(raid_ch->base_channel); 141 142 if (raid_ch->module_channel) { 143 spdk_put_io_channel(raid_ch->module_channel); 144 } 145 146 for (i = 0; i < raid_ch->num_channels; i++) { 147 /* Free base bdev channels */ 148 assert(raid_ch->base_channel[i] != NULL); 149 spdk_put_io_channel(raid_ch->base_channel[i]); 150 } 151 free(raid_ch->base_channel); 152 raid_ch->base_channel = NULL; 153 } 154 155 /* 156 * brief: 157 * raid_bdev_cleanup is used to cleanup raid_bdev related data 158 * structures. 159 * params: 160 * raid_bdev - pointer to raid_bdev 161 * returns: 162 * none 163 */ 164 static void 165 raid_bdev_cleanup(struct raid_bdev *raid_bdev) 166 { 167 struct raid_base_bdev_info *base_info; 168 169 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_cleanup, %p name %s, state %s\n", 170 raid_bdev, raid_bdev->bdev.name, raid_bdev_state_to_str(raid_bdev->state)); 171 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 172 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 173 174 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 175 assert(base_info->bdev == NULL); 176 assert(base_info->desc == NULL); 177 free(base_info->name); 178 } 179 180 TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link); 181 free(raid_bdev->base_bdev_info); 182 } 183 184 static void 185 raid_bdev_free(struct raid_bdev *raid_bdev) 186 { 187 free(raid_bdev->bdev.name); 188 free(raid_bdev); 189 } 190 191 static void 192 raid_bdev_cleanup_and_free(struct raid_bdev *raid_bdev) 193 { 194 raid_bdev_cleanup(raid_bdev); 195 raid_bdev_free(raid_bdev); 196 } 197 198 /* 199 * brief: 200 * free resource of base bdev for raid bdev 201 * params: 202 * raid_bdev - pointer to raid bdev 203 * base_info - raid base bdev info 204 * returns: 205 * 0 - success 206 * non zero - failure 207 */ 208 static void 209 raid_bdev_free_base_bdev_resource(struct raid_bdev *raid_bdev, 210 struct raid_base_bdev_info *base_info) 211 { 212 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 213 214 free(base_info->name); 215 base_info->name = NULL; 216 217 if (base_info->bdev == NULL) { 218 return; 219 } 220 221 assert(base_info->desc); 222 spdk_bdev_module_release_bdev(base_info->bdev); 223 spdk_bdev_close(base_info->desc); 224 base_info->desc = NULL; 225 base_info->bdev = NULL; 226 227 assert(raid_bdev->num_base_bdevs_discovered); 228 raid_bdev->num_base_bdevs_discovered--; 229 } 230 231 static void 232 raid_bdev_io_device_unregister_cb(void *io_device) 233 { 234 struct raid_bdev *raid_bdev = io_device; 235 236 if (raid_bdev->num_base_bdevs_discovered == 0) { 237 /* Free raid_bdev when there are no base bdevs left */ 238 SPDK_DEBUGLOG(bdev_raid, "raid bdev base bdevs is 0, going to free all in destruct\n"); 239 raid_bdev_cleanup(raid_bdev); 240 spdk_bdev_destruct_done(&raid_bdev->bdev, 0); 241 raid_bdev_free(raid_bdev); 242 } else { 243 spdk_bdev_destruct_done(&raid_bdev->bdev, 0); 244 } 245 } 246 247 void 248 raid_bdev_module_stop_done(struct raid_bdev *raid_bdev) 249 { 250 if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) { 251 spdk_io_device_unregister(raid_bdev, raid_bdev_io_device_unregister_cb); 252 } 253 } 254 255 static void 256 _raid_bdev_destruct(void *ctxt) 257 { 258 struct raid_bdev *raid_bdev = ctxt; 259 struct raid_base_bdev_info *base_info; 260 261 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destruct\n"); 262 263 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 264 /* 265 * Close all base bdev descriptors for which call has come from below 266 * layers. Also close the descriptors if we have started shutdown. 267 */ 268 if (g_shutdown_started || base_info->remove_scheduled == true) { 269 raid_bdev_free_base_bdev_resource(raid_bdev, base_info); 270 } 271 } 272 273 if (g_shutdown_started) { 274 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 275 } 276 277 if (raid_bdev->module->stop != NULL) { 278 if (raid_bdev->module->stop(raid_bdev) == false) { 279 return; 280 } 281 } 282 283 raid_bdev_module_stop_done(raid_bdev); 284 } 285 286 static int 287 raid_bdev_destruct(void *ctx) 288 { 289 spdk_thread_exec_msg(spdk_thread_get_app_thread(), _raid_bdev_destruct, ctx); 290 291 return 1; 292 } 293 294 void 295 raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status) 296 { 297 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 298 299 spdk_bdev_io_complete(bdev_io, status); 300 } 301 302 /* 303 * brief: 304 * raid_bdev_io_complete_part - signal the completion of a part of the expected 305 * base bdev IOs and complete the raid_io if this is the final expected IO. 306 * The caller should first set raid_io->base_bdev_io_remaining. This function 307 * will decrement this counter by the value of the 'completed' parameter and 308 * complete the raid_io if the counter reaches 0. The caller is free to 309 * interpret the 'base_bdev_io_remaining' and 'completed' values as needed, 310 * it can represent e.g. blocks or IOs. 311 * params: 312 * raid_io - pointer to raid_bdev_io 313 * completed - the part of the raid_io that has been completed 314 * status - status of the base IO 315 * returns: 316 * true - if the raid_io is completed 317 * false - otherwise 318 */ 319 bool 320 raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed, 321 enum spdk_bdev_io_status status) 322 { 323 assert(raid_io->base_bdev_io_remaining >= completed); 324 raid_io->base_bdev_io_remaining -= completed; 325 326 if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { 327 raid_io->base_bdev_io_status = status; 328 } 329 330 if (raid_io->base_bdev_io_remaining == 0) { 331 raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); 332 return true; 333 } else { 334 return false; 335 } 336 } 337 338 /* 339 * brief: 340 * raid_bdev_queue_io_wait function processes the IO which failed to submit. 341 * It will try to queue the IOs after storing the context to bdev wait queue logic. 342 * params: 343 * raid_io - pointer to raid_bdev_io 344 * bdev - the block device that the IO is submitted to 345 * ch - io channel 346 * cb_fn - callback when the spdk_bdev_io for bdev becomes available 347 * returns: 348 * none 349 */ 350 void 351 raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, 352 struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn) 353 { 354 raid_io->waitq_entry.bdev = bdev; 355 raid_io->waitq_entry.cb_fn = cb_fn; 356 raid_io->waitq_entry.cb_arg = raid_io; 357 spdk_bdev_queue_io_wait(bdev, ch, &raid_io->waitq_entry); 358 } 359 360 static void 361 raid_base_bdev_reset_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 362 { 363 struct raid_bdev_io *raid_io = cb_arg; 364 365 spdk_bdev_free_io(bdev_io); 366 367 raid_bdev_io_complete_part(raid_io, 1, success ? 368 SPDK_BDEV_IO_STATUS_SUCCESS : 369 SPDK_BDEV_IO_STATUS_FAILED); 370 } 371 372 static void raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io); 373 374 static void 375 _raid_bdev_submit_reset_request(void *_raid_io) 376 { 377 struct raid_bdev_io *raid_io = _raid_io; 378 379 raid_bdev_submit_reset_request(raid_io); 380 } 381 382 /* 383 * brief: 384 * raid_bdev_submit_reset_request function submits reset requests 385 * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in 386 * which case it will queue it for later submission 387 * params: 388 * raid_io 389 * returns: 390 * none 391 */ 392 static void 393 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io) 394 { 395 struct raid_bdev *raid_bdev; 396 int ret; 397 uint8_t i; 398 struct raid_base_bdev_info *base_info; 399 struct spdk_io_channel *base_ch; 400 401 raid_bdev = raid_io->raid_bdev; 402 403 if (raid_io->base_bdev_io_remaining == 0) { 404 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; 405 } 406 407 while (raid_io->base_bdev_io_submitted < raid_bdev->num_base_bdevs) { 408 i = raid_io->base_bdev_io_submitted; 409 base_info = &raid_bdev->base_bdev_info[i]; 410 base_ch = raid_io->raid_ch->base_channel[i]; 411 ret = spdk_bdev_reset(base_info->desc, base_ch, 412 raid_base_bdev_reset_complete, raid_io); 413 if (ret == 0) { 414 raid_io->base_bdev_io_submitted++; 415 } else if (ret == -ENOMEM) { 416 raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, 417 _raid_bdev_submit_reset_request); 418 return; 419 } else { 420 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 421 assert(false); 422 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 423 return; 424 } 425 } 426 } 427 428 /* 429 * brief: 430 * Callback function to spdk_bdev_io_get_buf. 431 * params: 432 * ch - pointer to raid bdev io channel 433 * bdev_io - pointer to parent bdev_io on raid bdev device 434 * success - True if buffer is allocated or false otherwise. 435 * returns: 436 * none 437 */ 438 static void 439 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 440 bool success) 441 { 442 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 443 444 if (!success) { 445 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 446 return; 447 } 448 449 raid_io->raid_bdev->module->submit_rw_request(raid_io); 450 } 451 452 /* 453 * brief: 454 * raid_bdev_submit_request function is the submit_request function pointer of 455 * raid bdev function table. This is used to submit the io on raid_bdev to below 456 * layers. 457 * params: 458 * ch - pointer to raid bdev io channel 459 * bdev_io - pointer to parent bdev_io on raid bdev device 460 * returns: 461 * none 462 */ 463 static void 464 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 465 { 466 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 467 468 raid_io->raid_bdev = bdev_io->bdev->ctxt; 469 raid_io->raid_ch = spdk_io_channel_get_ctx(ch); 470 raid_io->base_bdev_io_remaining = 0; 471 raid_io->base_bdev_io_submitted = 0; 472 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 473 474 switch (bdev_io->type) { 475 case SPDK_BDEV_IO_TYPE_READ: 476 spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb, 477 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 478 break; 479 case SPDK_BDEV_IO_TYPE_WRITE: 480 raid_io->raid_bdev->module->submit_rw_request(raid_io); 481 break; 482 483 case SPDK_BDEV_IO_TYPE_RESET: 484 raid_bdev_submit_reset_request(raid_io); 485 break; 486 487 case SPDK_BDEV_IO_TYPE_FLUSH: 488 case SPDK_BDEV_IO_TYPE_UNMAP: 489 raid_io->raid_bdev->module->submit_null_payload_request(raid_io); 490 break; 491 492 default: 493 SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type); 494 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 495 break; 496 } 497 } 498 499 /* 500 * brief: 501 * _raid_bdev_io_type_supported checks whether io_type is supported in 502 * all base bdev modules of raid bdev module. If anyone among the base_bdevs 503 * doesn't support, the raid device doesn't supports. 504 * 505 * params: 506 * raid_bdev - pointer to raid bdev context 507 * io_type - io type 508 * returns: 509 * true - io_type is supported 510 * false - io_type is not supported 511 */ 512 inline static bool 513 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type) 514 { 515 struct raid_base_bdev_info *base_info; 516 517 if (io_type == SPDK_BDEV_IO_TYPE_FLUSH || 518 io_type == SPDK_BDEV_IO_TYPE_UNMAP) { 519 if (raid_bdev->module->submit_null_payload_request == NULL) { 520 return false; 521 } 522 } 523 524 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 525 if (base_info->bdev == NULL) { 526 assert(false); 527 continue; 528 } 529 530 if (spdk_bdev_io_type_supported(base_info->bdev, io_type) == false) { 531 return false; 532 } 533 } 534 535 return true; 536 } 537 538 /* 539 * brief: 540 * raid_bdev_io_type_supported is the io_supported function for bdev function 541 * table which returns whether the particular io type is supported or not by 542 * raid bdev module 543 * params: 544 * ctx - pointer to raid bdev context 545 * type - io type 546 * returns: 547 * true - io_type is supported 548 * false - io_type is not supported 549 */ 550 static bool 551 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 552 { 553 switch (io_type) { 554 case SPDK_BDEV_IO_TYPE_READ: 555 case SPDK_BDEV_IO_TYPE_WRITE: 556 return true; 557 558 case SPDK_BDEV_IO_TYPE_FLUSH: 559 case SPDK_BDEV_IO_TYPE_RESET: 560 case SPDK_BDEV_IO_TYPE_UNMAP: 561 return _raid_bdev_io_type_supported(ctx, io_type); 562 563 default: 564 return false; 565 } 566 567 return false; 568 } 569 570 /* 571 * brief: 572 * raid_bdev_get_io_channel is the get_io_channel function table pointer for 573 * raid bdev. This is used to return the io channel for this raid bdev 574 * params: 575 * ctxt - pointer to raid_bdev 576 * returns: 577 * pointer to io channel for raid bdev 578 */ 579 static struct spdk_io_channel * 580 raid_bdev_get_io_channel(void *ctxt) 581 { 582 struct raid_bdev *raid_bdev = ctxt; 583 584 return spdk_get_io_channel(raid_bdev); 585 } 586 587 void 588 raid_bdev_write_info_json(struct raid_bdev *raid_bdev, struct spdk_json_write_ctx *w) 589 { 590 struct raid_base_bdev_info *base_info; 591 592 assert(raid_bdev != NULL); 593 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 594 595 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 596 spdk_json_write_named_string(w, "state", raid_bdev_state_to_str(raid_bdev->state)); 597 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 598 spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs); 599 spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered); 600 spdk_json_write_name(w, "base_bdevs_list"); 601 spdk_json_write_array_begin(w); 602 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 603 if (base_info->bdev) { 604 spdk_json_write_string(w, base_info->bdev->name); 605 } else { 606 spdk_json_write_null(w); 607 } 608 } 609 spdk_json_write_array_end(w); 610 } 611 612 /* 613 * brief: 614 * raid_bdev_dump_info_json is the function table pointer for raid bdev 615 * params: 616 * ctx - pointer to raid_bdev 617 * w - pointer to json context 618 * returns: 619 * 0 - success 620 * non zero - failure 621 */ 622 static int 623 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 624 { 625 struct raid_bdev *raid_bdev = ctx; 626 627 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_dump_config_json\n"); 628 629 /* Dump the raid bdev configuration related information */ 630 spdk_json_write_named_object_begin(w, "raid"); 631 raid_bdev_write_info_json(raid_bdev, w); 632 spdk_json_write_object_end(w); 633 634 return 0; 635 } 636 637 /* 638 * brief: 639 * raid_bdev_write_config_json is the function table pointer for raid bdev 640 * params: 641 * bdev - pointer to spdk_bdev 642 * w - pointer to json context 643 * returns: 644 * none 645 */ 646 static void 647 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 648 { 649 struct raid_bdev *raid_bdev = bdev->ctxt; 650 struct raid_base_bdev_info *base_info; 651 652 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 653 654 spdk_json_write_object_begin(w); 655 656 spdk_json_write_named_string(w, "method", "bdev_raid_create"); 657 658 spdk_json_write_named_object_begin(w, "params"); 659 spdk_json_write_named_string(w, "name", bdev->name); 660 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 661 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 662 663 spdk_json_write_named_array_begin(w, "base_bdevs"); 664 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 665 if (base_info->bdev) { 666 spdk_json_write_string(w, base_info->bdev->name); 667 } 668 } 669 spdk_json_write_array_end(w); 670 spdk_json_write_object_end(w); 671 672 spdk_json_write_object_end(w); 673 } 674 675 static int 676 raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size) 677 { 678 struct raid_bdev *raid_bdev = ctx; 679 struct spdk_bdev *base_bdev; 680 uint32_t i; 681 int domains_count = 0, rc; 682 683 /* First loop to get the number of memory domains */ 684 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 685 base_bdev = raid_bdev->base_bdev_info[i].bdev; 686 rc = spdk_bdev_get_memory_domains(base_bdev, NULL, 0); 687 if (rc < 0) { 688 return rc; 689 } 690 domains_count += rc; 691 } 692 693 if (!domains || array_size < domains_count) { 694 return domains_count; 695 } 696 697 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 698 base_bdev = raid_bdev->base_bdev_info[i].bdev; 699 rc = spdk_bdev_get_memory_domains(base_bdev, domains, array_size); 700 if (rc < 0) { 701 return rc; 702 } 703 domains += rc; 704 array_size -= rc; 705 } 706 707 return domains_count; 708 } 709 710 /* g_raid_bdev_fn_table is the function table for raid bdev */ 711 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = { 712 .destruct = raid_bdev_destruct, 713 .submit_request = raid_bdev_submit_request, 714 .io_type_supported = raid_bdev_io_type_supported, 715 .get_io_channel = raid_bdev_get_io_channel, 716 .dump_info_json = raid_bdev_dump_info_json, 717 .write_config_json = raid_bdev_write_config_json, 718 .get_memory_domains = raid_bdev_get_memory_domains, 719 }; 720 721 struct raid_bdev * 722 raid_bdev_find_by_name(const char *name) 723 { 724 struct raid_bdev *raid_bdev; 725 726 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 727 if (strcmp(raid_bdev->bdev.name, name) == 0) { 728 return raid_bdev; 729 } 730 } 731 732 return NULL; 733 } 734 735 static struct { 736 const char *name; 737 enum raid_level value; 738 } g_raid_level_names[] = { 739 { "raid0", RAID0 }, 740 { "0", RAID0 }, 741 { "raid1", RAID1 }, 742 { "1", RAID1 }, 743 { "raid5f", RAID5F }, 744 { "5f", RAID5F }, 745 { "concat", CONCAT }, 746 { } 747 }; 748 749 static struct { 750 const char *name; 751 enum raid_bdev_state value; 752 } g_raid_state_names[] = { 753 { "online", RAID_BDEV_STATE_ONLINE }, 754 { "configuring", RAID_BDEV_STATE_CONFIGURING }, 755 { "offline", RAID_BDEV_STATE_OFFLINE }, 756 { } 757 }; 758 759 /* We have to use the typedef in the function declaration to appease astyle. */ 760 typedef enum raid_level raid_level_t; 761 typedef enum raid_bdev_state raid_bdev_state_t; 762 763 raid_level_t 764 raid_bdev_str_to_level(const char *str) 765 { 766 unsigned int i; 767 768 assert(str != NULL); 769 770 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 771 if (strcasecmp(g_raid_level_names[i].name, str) == 0) { 772 return g_raid_level_names[i].value; 773 } 774 } 775 776 return INVALID_RAID_LEVEL; 777 } 778 779 const char * 780 raid_bdev_level_to_str(enum raid_level level) 781 { 782 unsigned int i; 783 784 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 785 if (g_raid_level_names[i].value == level) { 786 return g_raid_level_names[i].name; 787 } 788 } 789 790 return ""; 791 } 792 793 raid_bdev_state_t 794 raid_bdev_str_to_state(const char *str) 795 { 796 unsigned int i; 797 798 assert(str != NULL); 799 800 for (i = 0; g_raid_state_names[i].name != NULL; i++) { 801 if (strcasecmp(g_raid_state_names[i].name, str) == 0) { 802 return g_raid_state_names[i].value; 803 } 804 } 805 806 return RAID_BDEV_STATE_MAX; 807 } 808 809 const char * 810 raid_bdev_state_to_str(enum raid_bdev_state state) 811 { 812 unsigned int i; 813 814 for (i = 0; g_raid_state_names[i].name != NULL; i++) { 815 if (g_raid_state_names[i].value == state) { 816 return g_raid_state_names[i].name; 817 } 818 } 819 820 assert(false); 821 return ""; 822 } 823 824 /* 825 * brief: 826 * raid_bdev_fini_start is called when bdev layer is starting the 827 * shutdown process 828 * params: 829 * none 830 * returns: 831 * none 832 */ 833 static void 834 raid_bdev_fini_start(void) 835 { 836 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_fini_start\n"); 837 g_shutdown_started = true; 838 } 839 840 /* 841 * brief: 842 * raid_bdev_exit is called on raid bdev module exit time by bdev layer 843 * params: 844 * none 845 * returns: 846 * none 847 */ 848 static void 849 raid_bdev_exit(void) 850 { 851 struct raid_bdev *raid_bdev, *tmp; 852 853 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_exit\n"); 854 855 TAILQ_FOREACH_SAFE(raid_bdev, &g_raid_bdev_list, global_link, tmp) { 856 raid_bdev_cleanup_and_free(raid_bdev); 857 } 858 } 859 860 /* 861 * brief: 862 * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid 863 * module 864 * params: 865 * none 866 * returns: 867 * size of spdk_bdev_io context for raid 868 */ 869 static int 870 raid_bdev_get_ctx_size(void) 871 { 872 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_get_ctx_size\n"); 873 return sizeof(struct raid_bdev_io); 874 } 875 876 static struct spdk_bdev_module g_raid_if = { 877 .name = "raid", 878 .module_init = raid_bdev_init, 879 .fini_start = raid_bdev_fini_start, 880 .module_fini = raid_bdev_exit, 881 .get_ctx_size = raid_bdev_get_ctx_size, 882 .examine_config = raid_bdev_examine, 883 .async_init = false, 884 .async_fini = false, 885 }; 886 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if) 887 888 /* 889 * brief: 890 * raid_bdev_init is the initialization function for raid bdev module 891 * params: 892 * none 893 * returns: 894 * 0 - success 895 * non zero - failure 896 */ 897 static int 898 raid_bdev_init(void) 899 { 900 return 0; 901 } 902 903 /* 904 * brief: 905 * raid_bdev_create allocates raid bdev based on passed configuration 906 * params: 907 * name - name for raid bdev 908 * strip_size - strip size in KB 909 * num_base_bdevs - number of base bdevs 910 * level - raid level 911 * raid_bdev_out - the created raid bdev 912 * returns: 913 * 0 - success 914 * non zero - failure 915 */ 916 int 917 raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, 918 enum raid_level level, struct raid_bdev **raid_bdev_out) 919 { 920 struct raid_bdev *raid_bdev; 921 struct spdk_bdev *raid_bdev_gen; 922 struct raid_bdev_module *module; 923 uint8_t min_operational; 924 925 if (raid_bdev_find_by_name(name) != NULL) { 926 SPDK_ERRLOG("Duplicate raid bdev name found: %s\n", name); 927 return -EEXIST; 928 } 929 930 if (level == RAID1) { 931 if (strip_size != 0) { 932 SPDK_ERRLOG("Strip size is not supported by raid1\n"); 933 return -EINVAL; 934 } 935 } else if (spdk_u32_is_pow2(strip_size) == false) { 936 SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size); 937 return -EINVAL; 938 } 939 940 module = raid_bdev_module_find(level); 941 if (module == NULL) { 942 SPDK_ERRLOG("Unsupported raid level '%d'\n", level); 943 return -EINVAL; 944 } 945 946 assert(module->base_bdevs_min != 0); 947 if (num_base_bdevs < module->base_bdevs_min) { 948 SPDK_ERRLOG("At least %u base devices required for %s\n", 949 module->base_bdevs_min, 950 raid_bdev_level_to_str(level)); 951 return -EINVAL; 952 } 953 954 switch (module->base_bdevs_constraint.type) { 955 case CONSTRAINT_MAX_BASE_BDEVS_REMOVED: 956 min_operational = num_base_bdevs - module->base_bdevs_constraint.value; 957 break; 958 case CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL: 959 min_operational = module->base_bdevs_constraint.value; 960 break; 961 case CONSTRAINT_UNSET: 962 if (module->base_bdevs_constraint.value != 0) { 963 SPDK_ERRLOG("Unexpected constraint value '%u' provided for raid bdev '%s'.\n", 964 (uint8_t)module->base_bdevs_constraint.value, name); 965 return -EINVAL; 966 } 967 min_operational = num_base_bdevs; 968 break; 969 default: 970 SPDK_ERRLOG("Unrecognised constraint type '%u' in module for raid level '%s'.\n", 971 (uint8_t)module->base_bdevs_constraint.type, 972 raid_bdev_level_to_str(module->level)); 973 return -EINVAL; 974 }; 975 976 if (min_operational == 0 || min_operational > num_base_bdevs) { 977 SPDK_ERRLOG("Wrong constraint value for raid level '%s'.\n", 978 raid_bdev_level_to_str(module->level)); 979 return -EINVAL; 980 } 981 982 raid_bdev = calloc(1, sizeof(*raid_bdev)); 983 if (!raid_bdev) { 984 SPDK_ERRLOG("Unable to allocate memory for raid bdev\n"); 985 return -ENOMEM; 986 } 987 988 raid_bdev->module = module; 989 raid_bdev->num_base_bdevs = num_base_bdevs; 990 raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, 991 sizeof(struct raid_base_bdev_info)); 992 if (!raid_bdev->base_bdev_info) { 993 SPDK_ERRLOG("Unable able to allocate base bdev info\n"); 994 free(raid_bdev); 995 return -ENOMEM; 996 } 997 998 /* strip_size_kb is from the rpc param. strip_size is in blocks and used 999 * internally and set later. 1000 */ 1001 raid_bdev->strip_size = 0; 1002 raid_bdev->strip_size_kb = strip_size; 1003 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1004 raid_bdev->level = level; 1005 raid_bdev->min_base_bdevs_operational = min_operational; 1006 1007 raid_bdev_gen = &raid_bdev->bdev; 1008 1009 raid_bdev_gen->name = strdup(name); 1010 if (!raid_bdev_gen->name) { 1011 SPDK_ERRLOG("Unable to allocate name for raid\n"); 1012 free(raid_bdev->base_bdev_info); 1013 free(raid_bdev); 1014 return -ENOMEM; 1015 } 1016 1017 raid_bdev_gen->product_name = "Raid Volume"; 1018 raid_bdev_gen->ctxt = raid_bdev; 1019 raid_bdev_gen->fn_table = &g_raid_bdev_fn_table; 1020 raid_bdev_gen->module = &g_raid_if; 1021 raid_bdev_gen->write_cache = 0; 1022 1023 TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link); 1024 1025 *raid_bdev_out = raid_bdev; 1026 1027 return 0; 1028 } 1029 1030 /* 1031 * brief: 1032 * Check underlying block devices against support for metadata. Do not configure 1033 * md support when parameters from block devices are inconsistent. 1034 * params: 1035 * raid_bdev - pointer to raid bdev 1036 * returns: 1037 * 0 - The raid bdev md parameters were successfully configured. 1038 * non zero - Failed to configure md. 1039 */ 1040 static int 1041 raid_bdev_configure_md(struct raid_bdev *raid_bdev) 1042 { 1043 struct spdk_bdev *base_bdev; 1044 uint8_t i; 1045 1046 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1047 base_bdev = raid_bdev->base_bdev_info[i].bdev; 1048 1049 if (i == 0) { 1050 raid_bdev->bdev.md_len = spdk_bdev_get_md_size(base_bdev); 1051 raid_bdev->bdev.md_interleave = spdk_bdev_is_md_interleaved(base_bdev); 1052 raid_bdev->bdev.dif_type = spdk_bdev_get_dif_type(base_bdev); 1053 raid_bdev->bdev.dif_is_head_of_md = spdk_bdev_is_dif_head_of_md(base_bdev); 1054 raid_bdev->bdev.dif_check_flags = base_bdev->dif_check_flags; 1055 continue; 1056 } 1057 1058 if (raid_bdev->bdev.md_len != spdk_bdev_get_md_size(base_bdev) || 1059 raid_bdev->bdev.md_interleave != spdk_bdev_is_md_interleaved(base_bdev) || 1060 raid_bdev->bdev.dif_type != spdk_bdev_get_dif_type(base_bdev) || 1061 raid_bdev->bdev.dif_is_head_of_md != spdk_bdev_is_dif_head_of_md(base_bdev) || 1062 raid_bdev->bdev.dif_check_flags != base_bdev->dif_check_flags) { 1063 SPDK_ERRLOG("base bdevs are configured with different metadata formats\n"); 1064 return -EPERM; 1065 } 1066 } 1067 1068 return 0; 1069 } 1070 1071 /* 1072 * brief: 1073 * If raid bdev config is complete, then only register the raid bdev to 1074 * bdev layer and remove this raid bdev from configuring list and 1075 * insert the raid bdev to configured list 1076 * params: 1077 * raid_bdev - pointer to raid bdev 1078 * returns: 1079 * 0 - success 1080 * non zero - failure 1081 */ 1082 static int 1083 raid_bdev_configure(struct raid_bdev *raid_bdev) 1084 { 1085 uint32_t blocklen = 0; 1086 struct spdk_bdev *raid_bdev_gen; 1087 struct raid_base_bdev_info *base_info; 1088 int rc = 0; 1089 1090 assert(raid_bdev->state == RAID_BDEV_STATE_CONFIGURING); 1091 assert(raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs); 1092 1093 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1094 assert(base_info->bdev != NULL); 1095 /* Check blocklen for all base bdevs that it should be same */ 1096 if (blocklen == 0) { 1097 blocklen = base_info->bdev->blocklen; 1098 } else if (blocklen != base_info->bdev->blocklen) { 1099 /* 1100 * Assumption is that all the base bdevs for any raid bdev should 1101 * have same blocklen 1102 */ 1103 SPDK_ERRLOG("Blocklen of various bdevs not matching\n"); 1104 return -EINVAL; 1105 } 1106 } 1107 assert(blocklen > 0); 1108 1109 /* The strip_size_kb is read in from user in KB. Convert to blocks here for 1110 * internal use. 1111 */ 1112 raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / blocklen; 1113 raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size); 1114 raid_bdev->blocklen_shift = spdk_u32log2(blocklen); 1115 1116 raid_bdev_gen = &raid_bdev->bdev; 1117 raid_bdev_gen->blocklen = blocklen; 1118 1119 rc = raid_bdev_configure_md(raid_bdev); 1120 if (rc != 0) { 1121 SPDK_ERRLOG("raid metadata configuration failed\n"); 1122 return rc; 1123 } 1124 1125 rc = raid_bdev->module->start(raid_bdev); 1126 if (rc != 0) { 1127 SPDK_ERRLOG("raid module startup callback failed\n"); 1128 return rc; 1129 } 1130 raid_bdev->state = RAID_BDEV_STATE_ONLINE; 1131 SPDK_DEBUGLOG(bdev_raid, "io device register %p\n", raid_bdev); 1132 SPDK_DEBUGLOG(bdev_raid, "blockcnt %" PRIu64 ", blocklen %u\n", 1133 raid_bdev_gen->blockcnt, raid_bdev_gen->blocklen); 1134 spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb, 1135 sizeof(struct raid_bdev_io_channel), 1136 raid_bdev->bdev.name); 1137 rc = spdk_bdev_register(raid_bdev_gen); 1138 if (rc != 0) { 1139 SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n"); 1140 if (raid_bdev->module->stop != NULL) { 1141 raid_bdev->module->stop(raid_bdev); 1142 } 1143 spdk_io_device_unregister(raid_bdev, NULL); 1144 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1145 return rc; 1146 } 1147 SPDK_DEBUGLOG(bdev_raid, "raid bdev generic %p\n", raid_bdev_gen); 1148 SPDK_DEBUGLOG(bdev_raid, "raid bdev is created with name %s, raid_bdev %p\n", 1149 raid_bdev_gen->name, raid_bdev); 1150 1151 return 0; 1152 } 1153 1154 /* 1155 * brief: 1156 * If raid bdev is online and registered, change the bdev state to 1157 * configuring and unregister this raid device. Queue this raid device 1158 * in configuring list 1159 * params: 1160 * raid_bdev - pointer to raid bdev 1161 * cb_fn - callback function 1162 * cb_arg - argument to callback function 1163 * returns: 1164 * none 1165 */ 1166 static void 1167 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, 1168 void *cb_arg) 1169 { 1170 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1171 if (cb_fn) { 1172 cb_fn(cb_arg, 0); 1173 } 1174 return; 1175 } 1176 1177 assert(raid_bdev->num_base_bdevs == raid_bdev->num_base_bdevs_discovered); 1178 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 1179 assert(raid_bdev->num_base_bdevs_discovered); 1180 SPDK_DEBUGLOG(bdev_raid, "raid bdev state changing from online to offline\n"); 1181 1182 spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg); 1183 } 1184 1185 /* 1186 * brief: 1187 * raid_bdev_find_by_base_bdev function finds the raid bdev which has 1188 * claimed the base bdev. 1189 * params: 1190 * base_bdev - pointer to base bdev pointer 1191 * _raid_bdev - Reference to pointer to raid bdev 1192 * _base_info - Reference to the raid base bdev info. 1193 * returns: 1194 * true - if the raid bdev is found. 1195 * false - if the raid bdev is not found. 1196 */ 1197 static bool 1198 raid_bdev_find_by_base_bdev(struct spdk_bdev *base_bdev, struct raid_bdev **_raid_bdev, 1199 struct raid_base_bdev_info **_base_info) 1200 { 1201 struct raid_bdev *raid_bdev; 1202 struct raid_base_bdev_info *base_info; 1203 1204 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1205 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1206 if (base_info->bdev == base_bdev) { 1207 *_raid_bdev = raid_bdev; 1208 *_base_info = base_info; 1209 return true; 1210 } 1211 } 1212 } 1213 1214 return false; 1215 } 1216 1217 /* 1218 * brief: 1219 * raid_bdev_remove_base_bdev function is called by below layers when base_bdev 1220 * is removed. This function checks if this base bdev is part of any raid bdev 1221 * or not. If yes, it takes necessary action on that particular raid bdev. 1222 * params: 1223 * base_bdev - pointer to base bdev which got removed 1224 * returns: 1225 * none 1226 */ 1227 static void 1228 raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev) 1229 { 1230 struct raid_bdev *raid_bdev = NULL; 1231 struct raid_base_bdev_info *base_info; 1232 1233 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_remove_base_bdev\n"); 1234 1235 /* Find the raid_bdev which has claimed this base_bdev */ 1236 if (!raid_bdev_find_by_base_bdev(base_bdev, &raid_bdev, &base_info)) { 1237 SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); 1238 return; 1239 } 1240 1241 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1242 1243 assert(base_info->desc); 1244 base_info->remove_scheduled = true; 1245 1246 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1247 /* 1248 * As raid bdev is not registered yet or already unregistered, 1249 * so cleanup should be done here itself. 1250 */ 1251 raid_bdev_free_base_bdev_resource(raid_bdev, base_info); 1252 if (raid_bdev->num_base_bdevs_discovered == 0) { 1253 /* There is no base bdev for this raid, so free the raid device. */ 1254 raid_bdev_cleanup_and_free(raid_bdev); 1255 return; 1256 } 1257 } 1258 1259 raid_bdev_deconfigure(raid_bdev, NULL, NULL); 1260 } 1261 1262 /* 1263 * brief: 1264 * raid_bdev_resize_base_bdev function is called by below layers when base_bdev 1265 * is resized. This function checks if the smallest size of the base_bdevs is changed. 1266 * If yes, call module handler to resize the raid_bdev if implemented. 1267 * params: 1268 * base_bdev - pointer to base bdev which got resized. 1269 * returns: 1270 * none 1271 */ 1272 static void 1273 raid_bdev_resize_base_bdev(struct spdk_bdev *base_bdev) 1274 { 1275 struct raid_bdev *raid_bdev = NULL; 1276 struct raid_base_bdev_info *base_info; 1277 1278 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_resize_base_bdev\n"); 1279 1280 /* Find the raid_bdev which has claimed this base_bdev */ 1281 if (!raid_bdev_find_by_base_bdev(base_bdev, &raid_bdev, &base_info)) { 1282 SPDK_ERRLOG("raid_bdev whose base_bdev '%s' not found\n", base_bdev->name); 1283 return; 1284 } 1285 1286 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1287 1288 SPDK_NOTICELOG("base_bdev '%s' was resized: old size %" PRIu64 ", new size %" PRIu64 "\n", 1289 base_bdev->name, base_info->blockcnt, base_bdev->blockcnt); 1290 1291 if (raid_bdev->module->resize) { 1292 raid_bdev->module->resize(raid_bdev); 1293 } 1294 } 1295 1296 /* 1297 * brief: 1298 * raid_bdev_event_base_bdev function is called by below layers when base_bdev 1299 * triggers asynchronous event. 1300 * params: 1301 * type - event details. 1302 * bdev - bdev that triggered event. 1303 * event_ctx - context for event. 1304 * returns: 1305 * none 1306 */ 1307 static void 1308 raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 1309 void *event_ctx) 1310 { 1311 switch (type) { 1312 case SPDK_BDEV_EVENT_REMOVE: 1313 raid_bdev_remove_base_bdev(bdev); 1314 break; 1315 case SPDK_BDEV_EVENT_RESIZE: 1316 raid_bdev_resize_base_bdev(bdev); 1317 break; 1318 default: 1319 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 1320 break; 1321 } 1322 } 1323 1324 /* 1325 * brief: 1326 * Deletes the specified raid bdev 1327 * params: 1328 * raid_bdev - pointer to raid bdev 1329 * cb_fn - callback function 1330 * cb_arg - argument to callback function 1331 */ 1332 void 1333 raid_bdev_delete(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, void *cb_arg) 1334 { 1335 struct raid_base_bdev_info *base_info; 1336 1337 SPDK_DEBUGLOG(bdev_raid, "delete raid bdev: %s\n", raid_bdev->bdev.name); 1338 1339 if (raid_bdev->destroy_started) { 1340 SPDK_DEBUGLOG(bdev_raid, "destroying raid bdev %s is already started\n", 1341 raid_bdev->bdev.name); 1342 if (cb_fn) { 1343 cb_fn(cb_arg, -EALREADY); 1344 } 1345 return; 1346 } 1347 1348 raid_bdev->destroy_started = true; 1349 1350 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1351 base_info->remove_scheduled = true; 1352 1353 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1354 /* 1355 * As raid bdev is not registered yet or already unregistered, 1356 * so cleanup should be done here itself. 1357 */ 1358 raid_bdev_free_base_bdev_resource(raid_bdev, base_info); 1359 } 1360 } 1361 1362 if (raid_bdev->num_base_bdevs_discovered == 0) { 1363 /* There is no base bdev for this raid, so free the raid device. */ 1364 raid_bdev_cleanup_and_free(raid_bdev); 1365 if (cb_fn) { 1366 cb_fn(cb_arg, 0); 1367 } 1368 } else { 1369 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg); 1370 } 1371 } 1372 1373 static int 1374 raid_bdev_configure_base_bdev(struct raid_bdev *raid_bdev, struct raid_base_bdev_info *base_info) 1375 { 1376 struct spdk_bdev_desc *desc; 1377 struct spdk_bdev *bdev; 1378 int rc; 1379 1380 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1381 assert(base_info->name != NULL); 1382 assert(base_info->bdev == NULL); 1383 1384 rc = spdk_bdev_open_ext(base_info->name, true, raid_bdev_event_base_bdev, NULL, &desc); 1385 if (rc != 0) { 1386 if (rc != -ENODEV) { 1387 SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", base_info->name); 1388 } 1389 return rc; 1390 } 1391 1392 bdev = spdk_bdev_desc_get_bdev(desc); 1393 1394 rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if); 1395 if (rc != 0) { 1396 SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); 1397 spdk_bdev_close(desc); 1398 return rc; 1399 } 1400 1401 SPDK_DEBUGLOG(bdev_raid, "bdev %s is claimed\n", bdev->name); 1402 1403 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 1404 1405 base_info->bdev = bdev; 1406 base_info->desc = desc; 1407 base_info->blockcnt = bdev->blockcnt; 1408 raid_bdev->num_base_bdevs_discovered++; 1409 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1410 1411 if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) { 1412 rc = raid_bdev_configure(raid_bdev); 1413 if (rc != 0) { 1414 SPDK_ERRLOG("Failed to configure raid bdev\n"); 1415 return rc; 1416 } 1417 } 1418 1419 return 0; 1420 } 1421 1422 /* 1423 * brief: 1424 * raid_bdev_add_base_device function is the actual function which either adds 1425 * the nvme base device to existing raid bdev or create a new raid bdev. It also claims 1426 * the base device and keep the open descriptor. 1427 * params: 1428 * raid_bdev - pointer to raid bdev 1429 * name - name of the base bdev 1430 * slot - position to add base bdev 1431 * returns: 1432 * 0 - success 1433 * non zero - failure 1434 */ 1435 int 1436 raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name, uint8_t slot) 1437 { 1438 struct raid_base_bdev_info *base_info; 1439 int rc; 1440 1441 if (slot >= raid_bdev->num_base_bdevs) { 1442 return -EINVAL; 1443 } 1444 1445 base_info = &raid_bdev->base_bdev_info[slot]; 1446 1447 if (base_info->name != NULL) { 1448 SPDK_ERRLOG("Slot %u on raid bdev '%s' already assigned to bdev '%s'\n", 1449 slot, raid_bdev->bdev.name, base_info->name); 1450 return -EBUSY; 1451 } 1452 1453 base_info->name = strdup(name); 1454 if (base_info->name == NULL) { 1455 return -ENOMEM; 1456 } 1457 1458 rc = raid_bdev_configure_base_bdev(raid_bdev, base_info); 1459 if (rc != 0) { 1460 if (rc != -ENODEV) { 1461 SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", name); 1462 } 1463 return rc; 1464 } 1465 1466 return 0; 1467 } 1468 1469 /* 1470 * brief: 1471 * raid_bdev_examine function is the examine function call by the below layers 1472 * like bdev_nvme layer. This function will check if this base bdev can be 1473 * claimed by this raid bdev or not. 1474 * params: 1475 * bdev - pointer to base bdev 1476 * returns: 1477 * none 1478 */ 1479 static void 1480 raid_bdev_examine(struct spdk_bdev *bdev) 1481 { 1482 struct raid_bdev *raid_bdev; 1483 struct raid_base_bdev_info *base_info; 1484 1485 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1486 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1487 if (base_info->bdev == NULL && strcmp(bdev->name, base_info->name) == 0) { 1488 raid_bdev_configure_base_bdev(raid_bdev, base_info); 1489 break; 1490 } 1491 } 1492 } 1493 1494 spdk_bdev_module_examine_done(&g_raid_if); 1495 } 1496 1497 /* Log component for bdev raid bdev module */ 1498 SPDK_LOG_REGISTER_COMPONENT(bdev_raid) 1499