1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. 3 * All rights reserved. 4 * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "bdev_raid.h" 8 #include "spdk/env.h" 9 #include "spdk/thread.h" 10 #include "spdk/log.h" 11 #include "spdk/string.h" 12 #include "spdk/util.h" 13 #include "spdk/json.h" 14 15 static bool g_shutdown_started = false; 16 17 /* List of all raid bdevs */ 18 struct raid_all_tailq g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list); 19 20 static TAILQ_HEAD(, raid_bdev_module) g_raid_modules = TAILQ_HEAD_INITIALIZER(g_raid_modules); 21 22 static struct raid_bdev_module * 23 raid_bdev_module_find(enum raid_level level) 24 { 25 struct raid_bdev_module *raid_module; 26 27 TAILQ_FOREACH(raid_module, &g_raid_modules, link) { 28 if (raid_module->level == level) { 29 return raid_module; 30 } 31 } 32 33 return NULL; 34 } 35 36 void 37 raid_bdev_module_list_add(struct raid_bdev_module *raid_module) 38 { 39 if (raid_bdev_module_find(raid_module->level) != NULL) { 40 SPDK_ERRLOG("module for raid level '%s' already registered.\n", 41 raid_bdev_level_to_str(raid_module->level)); 42 assert(false); 43 } else { 44 TAILQ_INSERT_TAIL(&g_raid_modules, raid_module, link); 45 } 46 } 47 48 /* Function declarations */ 49 static void raid_bdev_examine(struct spdk_bdev *bdev); 50 static int raid_bdev_init(void); 51 static void raid_bdev_deconfigure(struct raid_bdev *raid_bdev, 52 raid_bdev_destruct_cb cb_fn, void *cb_arg); 53 54 /* 55 * brief: 56 * raid_bdev_create_cb function is a cb function for raid bdev which creates the 57 * hierarchy from raid bdev to base bdev io channels. It will be called per core 58 * params: 59 * io_device - pointer to raid bdev io device represented by raid_bdev 60 * ctx_buf - pointer to context buffer for raid bdev io channel 61 * returns: 62 * 0 - success 63 * non zero - failure 64 */ 65 static int 66 raid_bdev_create_cb(void *io_device, void *ctx_buf) 67 { 68 struct raid_bdev *raid_bdev = io_device; 69 struct raid_bdev_io_channel *raid_ch = ctx_buf; 70 uint8_t i; 71 int ret = 0; 72 73 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_create_cb, %p\n", raid_ch); 74 75 assert(raid_bdev != NULL); 76 assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); 77 78 raid_ch->num_channels = raid_bdev->num_base_bdevs; 79 80 raid_ch->base_channel = calloc(raid_ch->num_channels, 81 sizeof(struct spdk_io_channel *)); 82 if (!raid_ch->base_channel) { 83 SPDK_ERRLOG("Unable to allocate base bdevs io channel\n"); 84 return -ENOMEM; 85 } 86 for (i = 0; i < raid_ch->num_channels; i++) { 87 /* 88 * Get the spdk_io_channel for all the base bdevs. This is used during 89 * split logic to send the respective child bdev ios to respective base 90 * bdev io channel. 91 */ 92 raid_ch->base_channel[i] = spdk_bdev_get_io_channel( 93 raid_bdev->base_bdev_info[i].desc); 94 if (!raid_ch->base_channel[i]) { 95 SPDK_ERRLOG("Unable to create io channel for base bdev\n"); 96 ret = -ENOMEM; 97 break; 98 } 99 } 100 101 if (!ret && raid_bdev->module->get_io_channel) { 102 raid_ch->module_channel = raid_bdev->module->get_io_channel(raid_bdev); 103 if (!raid_ch->module_channel) { 104 SPDK_ERRLOG("Unable to create io channel for raid module\n"); 105 ret = -ENOMEM; 106 } 107 } 108 109 if (ret) { 110 uint8_t j; 111 112 for (j = 0; j < i; j++) { 113 spdk_put_io_channel(raid_ch->base_channel[j]); 114 } 115 free(raid_ch->base_channel); 116 raid_ch->base_channel = NULL; 117 } 118 return ret; 119 } 120 121 /* 122 * brief: 123 * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the 124 * hierarchy from raid bdev to base bdev io channels. It will be called per core 125 * params: 126 * io_device - pointer to raid bdev io device represented by raid_bdev 127 * ctx_buf - pointer to context buffer for raid bdev io channel 128 * returns: 129 * none 130 */ 131 static void 132 raid_bdev_destroy_cb(void *io_device, void *ctx_buf) 133 { 134 struct raid_bdev_io_channel *raid_ch = ctx_buf; 135 uint8_t i; 136 137 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destroy_cb\n"); 138 139 assert(raid_ch != NULL); 140 assert(raid_ch->base_channel); 141 142 if (raid_ch->module_channel) { 143 spdk_put_io_channel(raid_ch->module_channel); 144 } 145 146 for (i = 0; i < raid_ch->num_channels; i++) { 147 /* Free base bdev channels */ 148 assert(raid_ch->base_channel[i] != NULL); 149 spdk_put_io_channel(raid_ch->base_channel[i]); 150 } 151 free(raid_ch->base_channel); 152 raid_ch->base_channel = NULL; 153 } 154 155 /* 156 * brief: 157 * raid_bdev_cleanup is used to cleanup raid_bdev related data 158 * structures. 159 * params: 160 * raid_bdev - pointer to raid_bdev 161 * returns: 162 * none 163 */ 164 static void 165 raid_bdev_cleanup(struct raid_bdev *raid_bdev) 166 { 167 struct raid_base_bdev_info *base_info; 168 169 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_cleanup, %p name %s, state %s\n", 170 raid_bdev, raid_bdev->bdev.name, raid_bdev_state_to_str(raid_bdev->state)); 171 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 172 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 173 174 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 175 assert(base_info->bdev == NULL); 176 assert(base_info->desc == NULL); 177 free(base_info->name); 178 } 179 180 TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link); 181 free(raid_bdev->base_bdev_info); 182 } 183 184 static void 185 raid_bdev_free(struct raid_bdev *raid_bdev) 186 { 187 free(raid_bdev->bdev.name); 188 free(raid_bdev); 189 } 190 191 static void 192 raid_bdev_cleanup_and_free(struct raid_bdev *raid_bdev) 193 { 194 raid_bdev_cleanup(raid_bdev); 195 raid_bdev_free(raid_bdev); 196 } 197 198 /* 199 * brief: 200 * free resource of base bdev for raid bdev 201 * params: 202 * raid_bdev - pointer to raid bdev 203 * base_info - raid base bdev info 204 * returns: 205 * 0 - success 206 * non zero - failure 207 */ 208 static void 209 raid_bdev_free_base_bdev_resource(struct raid_bdev *raid_bdev, 210 struct raid_base_bdev_info *base_info) 211 { 212 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 213 214 free(base_info->name); 215 base_info->name = NULL; 216 217 if (base_info->bdev == NULL) { 218 return; 219 } 220 221 assert(base_info->desc); 222 spdk_bdev_module_release_bdev(base_info->bdev); 223 spdk_bdev_close(base_info->desc); 224 base_info->desc = NULL; 225 base_info->bdev = NULL; 226 227 assert(raid_bdev->num_base_bdevs_discovered); 228 raid_bdev->num_base_bdevs_discovered--; 229 } 230 231 static void 232 raid_bdev_io_device_unregister_cb(void *io_device) 233 { 234 struct raid_bdev *raid_bdev = io_device; 235 236 if (raid_bdev->num_base_bdevs_discovered == 0) { 237 /* Free raid_bdev when there are no base bdevs left */ 238 SPDK_DEBUGLOG(bdev_raid, "raid bdev base bdevs is 0, going to free all in destruct\n"); 239 raid_bdev_cleanup(raid_bdev); 240 spdk_bdev_destruct_done(&raid_bdev->bdev, 0); 241 raid_bdev_free(raid_bdev); 242 } else { 243 spdk_bdev_destruct_done(&raid_bdev->bdev, 0); 244 } 245 } 246 247 void 248 raid_bdev_module_stop_done(struct raid_bdev *raid_bdev) 249 { 250 if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) { 251 spdk_io_device_unregister(raid_bdev, raid_bdev_io_device_unregister_cb); 252 } 253 } 254 255 static void 256 _raid_bdev_destruct(void *ctxt) 257 { 258 struct raid_bdev *raid_bdev = ctxt; 259 struct raid_base_bdev_info *base_info; 260 261 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destruct\n"); 262 263 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 264 /* 265 * Close all base bdev descriptors for which call has come from below 266 * layers. Also close the descriptors if we have started shutdown. 267 */ 268 if (g_shutdown_started || base_info->remove_scheduled == true) { 269 raid_bdev_free_base_bdev_resource(raid_bdev, base_info); 270 } 271 } 272 273 if (g_shutdown_started) { 274 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 275 } 276 277 if (raid_bdev->module->stop != NULL) { 278 if (raid_bdev->module->stop(raid_bdev) == false) { 279 return; 280 } 281 } 282 283 raid_bdev_module_stop_done(raid_bdev); 284 } 285 286 static int 287 raid_bdev_destruct(void *ctx) 288 { 289 spdk_thread_exec_msg(spdk_thread_get_app_thread(), _raid_bdev_destruct, ctx); 290 291 return 1; 292 } 293 294 void 295 raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status) 296 { 297 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 298 299 spdk_bdev_io_complete(bdev_io, status); 300 } 301 302 /* 303 * brief: 304 * raid_bdev_io_complete_part - signal the completion of a part of the expected 305 * base bdev IOs and complete the raid_io if this is the final expected IO. 306 * The caller should first set raid_io->base_bdev_io_remaining. This function 307 * will decrement this counter by the value of the 'completed' parameter and 308 * complete the raid_io if the counter reaches 0. The caller is free to 309 * interpret the 'base_bdev_io_remaining' and 'completed' values as needed, 310 * it can represent e.g. blocks or IOs. 311 * params: 312 * raid_io - pointer to raid_bdev_io 313 * completed - the part of the raid_io that has been completed 314 * status - status of the base IO 315 * returns: 316 * true - if the raid_io is completed 317 * false - otherwise 318 */ 319 bool 320 raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed, 321 enum spdk_bdev_io_status status) 322 { 323 assert(raid_io->base_bdev_io_remaining >= completed); 324 raid_io->base_bdev_io_remaining -= completed; 325 326 if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { 327 raid_io->base_bdev_io_status = status; 328 } 329 330 if (raid_io->base_bdev_io_remaining == 0) { 331 raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); 332 return true; 333 } else { 334 return false; 335 } 336 } 337 338 /* 339 * brief: 340 * raid_bdev_queue_io_wait function processes the IO which failed to submit. 341 * It will try to queue the IOs after storing the context to bdev wait queue logic. 342 * params: 343 * raid_io - pointer to raid_bdev_io 344 * bdev - the block device that the IO is submitted to 345 * ch - io channel 346 * cb_fn - callback when the spdk_bdev_io for bdev becomes available 347 * returns: 348 * none 349 */ 350 void 351 raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, 352 struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn) 353 { 354 raid_io->waitq_entry.bdev = bdev; 355 raid_io->waitq_entry.cb_fn = cb_fn; 356 raid_io->waitq_entry.cb_arg = raid_io; 357 spdk_bdev_queue_io_wait(bdev, ch, &raid_io->waitq_entry); 358 } 359 360 static void 361 raid_base_bdev_reset_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 362 { 363 struct raid_bdev_io *raid_io = cb_arg; 364 365 spdk_bdev_free_io(bdev_io); 366 367 raid_bdev_io_complete_part(raid_io, 1, success ? 368 SPDK_BDEV_IO_STATUS_SUCCESS : 369 SPDK_BDEV_IO_STATUS_FAILED); 370 } 371 372 static void raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io); 373 374 static void 375 _raid_bdev_submit_reset_request(void *_raid_io) 376 { 377 struct raid_bdev_io *raid_io = _raid_io; 378 379 raid_bdev_submit_reset_request(raid_io); 380 } 381 382 /* 383 * brief: 384 * raid_bdev_submit_reset_request function submits reset requests 385 * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in 386 * which case it will queue it for later submission 387 * params: 388 * raid_io 389 * returns: 390 * none 391 */ 392 static void 393 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io) 394 { 395 struct raid_bdev *raid_bdev; 396 int ret; 397 uint8_t i; 398 struct raid_base_bdev_info *base_info; 399 struct spdk_io_channel *base_ch; 400 401 raid_bdev = raid_io->raid_bdev; 402 403 if (raid_io->base_bdev_io_remaining == 0) { 404 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; 405 } 406 407 while (raid_io->base_bdev_io_submitted < raid_bdev->num_base_bdevs) { 408 i = raid_io->base_bdev_io_submitted; 409 base_info = &raid_bdev->base_bdev_info[i]; 410 base_ch = raid_io->raid_ch->base_channel[i]; 411 ret = spdk_bdev_reset(base_info->desc, base_ch, 412 raid_base_bdev_reset_complete, raid_io); 413 if (ret == 0) { 414 raid_io->base_bdev_io_submitted++; 415 } else if (ret == -ENOMEM) { 416 raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, 417 _raid_bdev_submit_reset_request); 418 return; 419 } else { 420 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 421 assert(false); 422 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 423 return; 424 } 425 } 426 } 427 428 /* 429 * brief: 430 * Callback function to spdk_bdev_io_get_buf. 431 * params: 432 * ch - pointer to raid bdev io channel 433 * bdev_io - pointer to parent bdev_io on raid bdev device 434 * success - True if buffer is allocated or false otherwise. 435 * returns: 436 * none 437 */ 438 static void 439 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 440 bool success) 441 { 442 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 443 444 if (!success) { 445 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 446 return; 447 } 448 449 raid_io->raid_bdev->module->submit_rw_request(raid_io); 450 } 451 452 /* 453 * brief: 454 * raid_bdev_submit_request function is the submit_request function pointer of 455 * raid bdev function table. This is used to submit the io on raid_bdev to below 456 * layers. 457 * params: 458 * ch - pointer to raid bdev io channel 459 * bdev_io - pointer to parent bdev_io on raid bdev device 460 * returns: 461 * none 462 */ 463 static void 464 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 465 { 466 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 467 468 raid_io->raid_bdev = bdev_io->bdev->ctxt; 469 raid_io->raid_ch = spdk_io_channel_get_ctx(ch); 470 raid_io->base_bdev_io_remaining = 0; 471 raid_io->base_bdev_io_submitted = 0; 472 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 473 474 switch (bdev_io->type) { 475 case SPDK_BDEV_IO_TYPE_READ: 476 spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb, 477 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 478 break; 479 case SPDK_BDEV_IO_TYPE_WRITE: 480 raid_io->raid_bdev->module->submit_rw_request(raid_io); 481 break; 482 483 case SPDK_BDEV_IO_TYPE_RESET: 484 raid_bdev_submit_reset_request(raid_io); 485 break; 486 487 case SPDK_BDEV_IO_TYPE_FLUSH: 488 case SPDK_BDEV_IO_TYPE_UNMAP: 489 raid_io->raid_bdev->module->submit_null_payload_request(raid_io); 490 break; 491 492 default: 493 SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type); 494 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 495 break; 496 } 497 } 498 499 /* 500 * brief: 501 * _raid_bdev_io_type_supported checks whether io_type is supported in 502 * all base bdev modules of raid bdev module. If anyone among the base_bdevs 503 * doesn't support, the raid device doesn't supports. 504 * 505 * params: 506 * raid_bdev - pointer to raid bdev context 507 * io_type - io type 508 * returns: 509 * true - io_type is supported 510 * false - io_type is not supported 511 */ 512 inline static bool 513 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type) 514 { 515 struct raid_base_bdev_info *base_info; 516 517 if (io_type == SPDK_BDEV_IO_TYPE_FLUSH || 518 io_type == SPDK_BDEV_IO_TYPE_UNMAP) { 519 if (raid_bdev->module->submit_null_payload_request == NULL) { 520 return false; 521 } 522 } 523 524 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 525 if (base_info->bdev == NULL) { 526 assert(false); 527 continue; 528 } 529 530 if (spdk_bdev_io_type_supported(base_info->bdev, io_type) == false) { 531 return false; 532 } 533 } 534 535 return true; 536 } 537 538 /* 539 * brief: 540 * raid_bdev_io_type_supported is the io_supported function for bdev function 541 * table which returns whether the particular io type is supported or not by 542 * raid bdev module 543 * params: 544 * ctx - pointer to raid bdev context 545 * type - io type 546 * returns: 547 * true - io_type is supported 548 * false - io_type is not supported 549 */ 550 static bool 551 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 552 { 553 switch (io_type) { 554 case SPDK_BDEV_IO_TYPE_READ: 555 case SPDK_BDEV_IO_TYPE_WRITE: 556 return true; 557 558 case SPDK_BDEV_IO_TYPE_FLUSH: 559 case SPDK_BDEV_IO_TYPE_RESET: 560 case SPDK_BDEV_IO_TYPE_UNMAP: 561 return _raid_bdev_io_type_supported(ctx, io_type); 562 563 default: 564 return false; 565 } 566 567 return false; 568 } 569 570 /* 571 * brief: 572 * raid_bdev_get_io_channel is the get_io_channel function table pointer for 573 * raid bdev. This is used to return the io channel for this raid bdev 574 * params: 575 * ctxt - pointer to raid_bdev 576 * returns: 577 * pointer to io channel for raid bdev 578 */ 579 static struct spdk_io_channel * 580 raid_bdev_get_io_channel(void *ctxt) 581 { 582 struct raid_bdev *raid_bdev = ctxt; 583 584 return spdk_get_io_channel(raid_bdev); 585 } 586 587 void 588 raid_bdev_write_info_json(struct raid_bdev *raid_bdev, struct spdk_json_write_ctx *w) 589 { 590 struct raid_base_bdev_info *base_info; 591 592 assert(raid_bdev != NULL); 593 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 594 595 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 596 spdk_json_write_named_string(w, "state", raid_bdev_state_to_str(raid_bdev->state)); 597 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 598 spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs); 599 spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered); 600 spdk_json_write_name(w, "base_bdevs_list"); 601 spdk_json_write_array_begin(w); 602 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 603 if (base_info->bdev) { 604 spdk_json_write_string(w, base_info->bdev->name); 605 } else { 606 spdk_json_write_null(w); 607 } 608 } 609 spdk_json_write_array_end(w); 610 } 611 612 /* 613 * brief: 614 * raid_bdev_dump_info_json is the function table pointer for raid bdev 615 * params: 616 * ctx - pointer to raid_bdev 617 * w - pointer to json context 618 * returns: 619 * 0 - success 620 * non zero - failure 621 */ 622 static int 623 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 624 { 625 struct raid_bdev *raid_bdev = ctx; 626 627 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_dump_config_json\n"); 628 629 /* Dump the raid bdev configuration related information */ 630 spdk_json_write_named_object_begin(w, "raid"); 631 raid_bdev_write_info_json(raid_bdev, w); 632 spdk_json_write_object_end(w); 633 634 return 0; 635 } 636 637 /* 638 * brief: 639 * raid_bdev_write_config_json is the function table pointer for raid bdev 640 * params: 641 * bdev - pointer to spdk_bdev 642 * w - pointer to json context 643 * returns: 644 * none 645 */ 646 static void 647 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 648 { 649 struct raid_bdev *raid_bdev = bdev->ctxt; 650 struct raid_base_bdev_info *base_info; 651 652 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 653 654 spdk_json_write_object_begin(w); 655 656 spdk_json_write_named_string(w, "method", "bdev_raid_create"); 657 658 spdk_json_write_named_object_begin(w, "params"); 659 spdk_json_write_named_string(w, "name", bdev->name); 660 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 661 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 662 663 spdk_json_write_named_array_begin(w, "base_bdevs"); 664 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 665 if (base_info->bdev) { 666 spdk_json_write_string(w, base_info->bdev->name); 667 } 668 } 669 spdk_json_write_array_end(w); 670 spdk_json_write_object_end(w); 671 672 spdk_json_write_object_end(w); 673 } 674 675 static int 676 raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size) 677 { 678 struct raid_bdev *raid_bdev = ctx; 679 struct spdk_bdev *base_bdev; 680 uint32_t i; 681 int domains_count = 0, rc; 682 683 /* First loop to get the number of memory domains */ 684 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 685 base_bdev = raid_bdev->base_bdev_info[i].bdev; 686 rc = spdk_bdev_get_memory_domains(base_bdev, NULL, 0); 687 if (rc < 0) { 688 return rc; 689 } 690 domains_count += rc; 691 } 692 693 if (!domains || array_size < domains_count) { 694 return domains_count; 695 } 696 697 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 698 base_bdev = raid_bdev->base_bdev_info[i].bdev; 699 rc = spdk_bdev_get_memory_domains(base_bdev, domains, array_size); 700 if (rc < 0) { 701 return rc; 702 } 703 domains += rc; 704 array_size -= rc; 705 } 706 707 return domains_count; 708 } 709 710 /* g_raid_bdev_fn_table is the function table for raid bdev */ 711 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = { 712 .destruct = raid_bdev_destruct, 713 .submit_request = raid_bdev_submit_request, 714 .io_type_supported = raid_bdev_io_type_supported, 715 .get_io_channel = raid_bdev_get_io_channel, 716 .dump_info_json = raid_bdev_dump_info_json, 717 .write_config_json = raid_bdev_write_config_json, 718 .get_memory_domains = raid_bdev_get_memory_domains, 719 }; 720 721 struct raid_bdev * 722 raid_bdev_find_by_name(const char *name) 723 { 724 struct raid_bdev *raid_bdev; 725 726 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 727 if (strcmp(raid_bdev->bdev.name, name) == 0) { 728 return raid_bdev; 729 } 730 } 731 732 return NULL; 733 } 734 735 static struct { 736 const char *name; 737 enum raid_level value; 738 } g_raid_level_names[] = { 739 { "raid0", RAID0 }, 740 { "0", RAID0 }, 741 { "raid1", RAID1 }, 742 { "1", RAID1 }, 743 { "raid5f", RAID5F }, 744 { "5f", RAID5F }, 745 { "concat", CONCAT }, 746 { } 747 }; 748 749 static struct { 750 const char *name; 751 enum raid_bdev_state value; 752 } g_raid_state_names[] = { 753 { "online", RAID_BDEV_STATE_ONLINE }, 754 { "configuring", RAID_BDEV_STATE_CONFIGURING }, 755 { "offline", RAID_BDEV_STATE_OFFLINE }, 756 { } 757 }; 758 759 /* We have to use the typedef in the function declaration to appease astyle. */ 760 typedef enum raid_level raid_level_t; 761 typedef enum raid_bdev_state raid_bdev_state_t; 762 763 raid_level_t 764 raid_bdev_str_to_level(const char *str) 765 { 766 unsigned int i; 767 768 assert(str != NULL); 769 770 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 771 if (strcasecmp(g_raid_level_names[i].name, str) == 0) { 772 return g_raid_level_names[i].value; 773 } 774 } 775 776 return INVALID_RAID_LEVEL; 777 } 778 779 const char * 780 raid_bdev_level_to_str(enum raid_level level) 781 { 782 unsigned int i; 783 784 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 785 if (g_raid_level_names[i].value == level) { 786 return g_raid_level_names[i].name; 787 } 788 } 789 790 return ""; 791 } 792 793 raid_bdev_state_t 794 raid_bdev_str_to_state(const char *str) 795 { 796 unsigned int i; 797 798 assert(str != NULL); 799 800 for (i = 0; g_raid_state_names[i].name != NULL; i++) { 801 if (strcasecmp(g_raid_state_names[i].name, str) == 0) { 802 return g_raid_state_names[i].value; 803 } 804 } 805 806 return RAID_BDEV_STATE_MAX; 807 } 808 809 const char * 810 raid_bdev_state_to_str(enum raid_bdev_state state) 811 { 812 unsigned int i; 813 814 for (i = 0; g_raid_state_names[i].name != NULL; i++) { 815 if (g_raid_state_names[i].value == state) { 816 return g_raid_state_names[i].name; 817 } 818 } 819 820 assert(false); 821 return ""; 822 } 823 824 /* 825 * brief: 826 * raid_bdev_fini_start is called when bdev layer is starting the 827 * shutdown process 828 * params: 829 * none 830 * returns: 831 * none 832 */ 833 static void 834 raid_bdev_fini_start(void) 835 { 836 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_fini_start\n"); 837 g_shutdown_started = true; 838 } 839 840 /* 841 * brief: 842 * raid_bdev_exit is called on raid bdev module exit time by bdev layer 843 * params: 844 * none 845 * returns: 846 * none 847 */ 848 static void 849 raid_bdev_exit(void) 850 { 851 struct raid_bdev *raid_bdev, *tmp; 852 853 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_exit\n"); 854 855 TAILQ_FOREACH_SAFE(raid_bdev, &g_raid_bdev_list, global_link, tmp) { 856 raid_bdev_cleanup_and_free(raid_bdev); 857 } 858 } 859 860 /* 861 * brief: 862 * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid 863 * module 864 * params: 865 * none 866 * returns: 867 * size of spdk_bdev_io context for raid 868 */ 869 static int 870 raid_bdev_get_ctx_size(void) 871 { 872 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_get_ctx_size\n"); 873 return sizeof(struct raid_bdev_io); 874 } 875 876 static struct spdk_bdev_module g_raid_if = { 877 .name = "raid", 878 .module_init = raid_bdev_init, 879 .fini_start = raid_bdev_fini_start, 880 .module_fini = raid_bdev_exit, 881 .get_ctx_size = raid_bdev_get_ctx_size, 882 .examine_config = raid_bdev_examine, 883 .async_init = false, 884 .async_fini = false, 885 }; 886 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if) 887 888 /* 889 * brief: 890 * raid_bdev_init is the initialization function for raid bdev module 891 * params: 892 * none 893 * returns: 894 * 0 - success 895 * non zero - failure 896 */ 897 static int 898 raid_bdev_init(void) 899 { 900 return 0; 901 } 902 903 /* 904 * brief: 905 * raid_bdev_create allocates raid bdev based on passed configuration 906 * params: 907 * name - name for raid bdev 908 * strip_size - strip size in KB 909 * num_base_bdevs - number of base bdevs 910 * level - raid level 911 * raid_bdev_out - the created raid bdev 912 * returns: 913 * 0 - success 914 * non zero - failure 915 */ 916 int 917 raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, 918 enum raid_level level, struct raid_bdev **raid_bdev_out) 919 { 920 struct raid_bdev *raid_bdev; 921 struct spdk_bdev *raid_bdev_gen; 922 struct raid_bdev_module *module; 923 uint8_t min_operational; 924 925 if (raid_bdev_find_by_name(name) != NULL) { 926 SPDK_ERRLOG("Duplicate raid bdev name found: %s\n", name); 927 return -EEXIST; 928 } 929 930 if (level == RAID1) { 931 if (strip_size != 0) { 932 SPDK_ERRLOG("Strip size is not supported by raid1\n"); 933 return -EINVAL; 934 } 935 } else if (spdk_u32_is_pow2(strip_size) == false) { 936 SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size); 937 return -EINVAL; 938 } 939 940 module = raid_bdev_module_find(level); 941 if (module == NULL) { 942 SPDK_ERRLOG("Unsupported raid level '%d'\n", level); 943 return -EINVAL; 944 } 945 946 assert(module->base_bdevs_min != 0); 947 if (num_base_bdevs < module->base_bdevs_min) { 948 SPDK_ERRLOG("At least %u base devices required for %s\n", 949 module->base_bdevs_min, 950 raid_bdev_level_to_str(level)); 951 return -EINVAL; 952 } 953 954 switch (module->base_bdevs_constraint.type) { 955 case CONSTRAINT_MAX_BASE_BDEVS_REMOVED: 956 min_operational = num_base_bdevs - module->base_bdevs_constraint.value; 957 break; 958 case CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL: 959 min_operational = module->base_bdevs_constraint.value; 960 break; 961 case CONSTRAINT_UNSET: 962 if (module->base_bdevs_constraint.value != 0) { 963 SPDK_ERRLOG("Unexpected constraint value '%u' provided for raid bdev '%s'.\n", 964 (uint8_t)module->base_bdevs_constraint.value, name); 965 return -EINVAL; 966 } 967 min_operational = num_base_bdevs; 968 break; 969 default: 970 SPDK_ERRLOG("Unrecognised constraint type '%u' in module for raid level '%s'.\n", 971 (uint8_t)module->base_bdevs_constraint.type, 972 raid_bdev_level_to_str(module->level)); 973 return -EINVAL; 974 }; 975 976 if (min_operational == 0 || min_operational > num_base_bdevs) { 977 SPDK_ERRLOG("Wrong constraint value for raid level '%s'.\n", 978 raid_bdev_level_to_str(module->level)); 979 return -EINVAL; 980 } 981 982 raid_bdev = calloc(1, sizeof(*raid_bdev)); 983 if (!raid_bdev) { 984 SPDK_ERRLOG("Unable to allocate memory for raid bdev\n"); 985 return -ENOMEM; 986 } 987 988 raid_bdev->module = module; 989 raid_bdev->num_base_bdevs = num_base_bdevs; 990 raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, 991 sizeof(struct raid_base_bdev_info)); 992 if (!raid_bdev->base_bdev_info) { 993 SPDK_ERRLOG("Unable able to allocate base bdev info\n"); 994 free(raid_bdev); 995 return -ENOMEM; 996 } 997 998 /* strip_size_kb is from the rpc param. strip_size is in blocks and used 999 * internally and set later. 1000 */ 1001 raid_bdev->strip_size = 0; 1002 raid_bdev->strip_size_kb = strip_size; 1003 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1004 raid_bdev->level = level; 1005 raid_bdev->min_base_bdevs_operational = min_operational; 1006 1007 raid_bdev_gen = &raid_bdev->bdev; 1008 1009 raid_bdev_gen->name = strdup(name); 1010 if (!raid_bdev_gen->name) { 1011 SPDK_ERRLOG("Unable to allocate name for raid\n"); 1012 free(raid_bdev->base_bdev_info); 1013 free(raid_bdev); 1014 return -ENOMEM; 1015 } 1016 1017 raid_bdev_gen->product_name = "Raid Volume"; 1018 raid_bdev_gen->ctxt = raid_bdev; 1019 raid_bdev_gen->fn_table = &g_raid_bdev_fn_table; 1020 raid_bdev_gen->module = &g_raid_if; 1021 raid_bdev_gen->write_cache = 0; 1022 1023 TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link); 1024 1025 *raid_bdev_out = raid_bdev; 1026 1027 return 0; 1028 } 1029 1030 /* 1031 * brief: 1032 * Check underlying block devices against support for metadata. Do not configure 1033 * md support when parameters from block devices are inconsistent. 1034 * params: 1035 * raid_bdev - pointer to raid bdev 1036 * returns: 1037 * 0 - The raid bdev md parameters were successfully configured. 1038 * non zero - Failed to configure md. 1039 */ 1040 static int 1041 raid_bdev_configure_md(struct raid_bdev *raid_bdev) 1042 { 1043 struct spdk_bdev *base_bdev; 1044 uint8_t i; 1045 1046 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1047 base_bdev = raid_bdev->base_bdev_info[i].bdev; 1048 1049 if (i == 0) { 1050 raid_bdev->bdev.md_len = spdk_bdev_get_md_size(base_bdev); 1051 raid_bdev->bdev.md_interleave = spdk_bdev_is_md_interleaved(base_bdev); 1052 raid_bdev->bdev.dif_type = spdk_bdev_get_dif_type(base_bdev); 1053 raid_bdev->bdev.dif_is_head_of_md = spdk_bdev_is_dif_head_of_md(base_bdev); 1054 raid_bdev->bdev.dif_check_flags = base_bdev->dif_check_flags; 1055 continue; 1056 } 1057 1058 if (raid_bdev->bdev.md_len != spdk_bdev_get_md_size(base_bdev) || 1059 raid_bdev->bdev.md_interleave != spdk_bdev_is_md_interleaved(base_bdev) || 1060 raid_bdev->bdev.dif_type != spdk_bdev_get_dif_type(base_bdev) || 1061 raid_bdev->bdev.dif_is_head_of_md != spdk_bdev_is_dif_head_of_md(base_bdev) || 1062 raid_bdev->bdev.dif_check_flags != base_bdev->dif_check_flags) { 1063 SPDK_ERRLOG("base bdevs are configured with different metadata formats\n"); 1064 return -EPERM; 1065 } 1066 } 1067 1068 return 0; 1069 } 1070 1071 /* 1072 * brief: 1073 * If raid bdev config is complete, then only register the raid bdev to 1074 * bdev layer and remove this raid bdev from configuring list and 1075 * insert the raid bdev to configured list 1076 * params: 1077 * raid_bdev - pointer to raid bdev 1078 * returns: 1079 * 0 - success 1080 * non zero - failure 1081 */ 1082 static int 1083 raid_bdev_configure(struct raid_bdev *raid_bdev) 1084 { 1085 uint32_t blocklen = 0; 1086 struct spdk_bdev *raid_bdev_gen; 1087 struct raid_base_bdev_info *base_info; 1088 int rc = 0; 1089 1090 assert(raid_bdev->state == RAID_BDEV_STATE_CONFIGURING); 1091 assert(raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs); 1092 1093 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1094 assert(base_info->bdev != NULL); 1095 /* Check blocklen for all base bdevs that it should be same */ 1096 if (blocklen == 0) { 1097 blocklen = base_info->bdev->blocklen; 1098 } else if (blocklen != base_info->bdev->blocklen) { 1099 /* 1100 * Assumption is that all the base bdevs for any raid bdev should 1101 * have same blocklen 1102 */ 1103 SPDK_ERRLOG("Blocklen of various bdevs not matching\n"); 1104 return -EINVAL; 1105 } 1106 } 1107 assert(blocklen > 0); 1108 1109 /* The strip_size_kb is read in from user in KB. Convert to blocks here for 1110 * internal use. 1111 */ 1112 raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / blocklen; 1113 if (raid_bdev->strip_size == 0 && raid_bdev->level != RAID1) { 1114 SPDK_ERRLOG("Strip size cannot be smaller than the device block size\n"); 1115 return -EINVAL; 1116 } 1117 raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size); 1118 raid_bdev->blocklen_shift = spdk_u32log2(blocklen); 1119 1120 raid_bdev_gen = &raid_bdev->bdev; 1121 raid_bdev_gen->blocklen = blocklen; 1122 1123 rc = raid_bdev_configure_md(raid_bdev); 1124 if (rc != 0) { 1125 SPDK_ERRLOG("raid metadata configuration failed\n"); 1126 return rc; 1127 } 1128 1129 rc = raid_bdev->module->start(raid_bdev); 1130 if (rc != 0) { 1131 SPDK_ERRLOG("raid module startup callback failed\n"); 1132 return rc; 1133 } 1134 raid_bdev->state = RAID_BDEV_STATE_ONLINE; 1135 SPDK_DEBUGLOG(bdev_raid, "io device register %p\n", raid_bdev); 1136 SPDK_DEBUGLOG(bdev_raid, "blockcnt %" PRIu64 ", blocklen %u\n", 1137 raid_bdev_gen->blockcnt, raid_bdev_gen->blocklen); 1138 spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb, 1139 sizeof(struct raid_bdev_io_channel), 1140 raid_bdev->bdev.name); 1141 rc = spdk_bdev_register(raid_bdev_gen); 1142 if (rc != 0) { 1143 SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n"); 1144 if (raid_bdev->module->stop != NULL) { 1145 raid_bdev->module->stop(raid_bdev); 1146 } 1147 spdk_io_device_unregister(raid_bdev, NULL); 1148 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1149 return rc; 1150 } 1151 SPDK_DEBUGLOG(bdev_raid, "raid bdev generic %p\n", raid_bdev_gen); 1152 SPDK_DEBUGLOG(bdev_raid, "raid bdev is created with name %s, raid_bdev %p\n", 1153 raid_bdev_gen->name, raid_bdev); 1154 1155 return 0; 1156 } 1157 1158 /* 1159 * brief: 1160 * If raid bdev is online and registered, change the bdev state to 1161 * configuring and unregister this raid device. Queue this raid device 1162 * in configuring list 1163 * params: 1164 * raid_bdev - pointer to raid bdev 1165 * cb_fn - callback function 1166 * cb_arg - argument to callback function 1167 * returns: 1168 * none 1169 */ 1170 static void 1171 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, 1172 void *cb_arg) 1173 { 1174 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1175 if (cb_fn) { 1176 cb_fn(cb_arg, 0); 1177 } 1178 return; 1179 } 1180 1181 assert(raid_bdev->num_base_bdevs == raid_bdev->num_base_bdevs_discovered); 1182 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 1183 assert(raid_bdev->num_base_bdevs_discovered); 1184 SPDK_DEBUGLOG(bdev_raid, "raid bdev state changing from online to offline\n"); 1185 1186 spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg); 1187 } 1188 1189 /* 1190 * brief: 1191 * raid_bdev_find_by_base_bdev function finds the raid bdev which has 1192 * claimed the base bdev. 1193 * params: 1194 * base_bdev - pointer to base bdev pointer 1195 * _raid_bdev - Reference to pointer to raid bdev 1196 * _base_info - Reference to the raid base bdev info. 1197 * returns: 1198 * true - if the raid bdev is found. 1199 * false - if the raid bdev is not found. 1200 */ 1201 static bool 1202 raid_bdev_find_by_base_bdev(struct spdk_bdev *base_bdev, struct raid_bdev **_raid_bdev, 1203 struct raid_base_bdev_info **_base_info) 1204 { 1205 struct raid_bdev *raid_bdev; 1206 struct raid_base_bdev_info *base_info; 1207 1208 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1209 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1210 if (base_info->bdev == base_bdev) { 1211 *_raid_bdev = raid_bdev; 1212 *_base_info = base_info; 1213 return true; 1214 } 1215 } 1216 } 1217 1218 return false; 1219 } 1220 1221 /* 1222 * brief: 1223 * raid_bdev_remove_base_bdev function is called by below layers when base_bdev 1224 * is removed. This function checks if this base bdev is part of any raid bdev 1225 * or not. If yes, it takes necessary action on that particular raid bdev. 1226 * params: 1227 * base_bdev - pointer to base bdev which got removed 1228 * returns: 1229 * none 1230 */ 1231 static void 1232 raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev) 1233 { 1234 struct raid_bdev *raid_bdev = NULL; 1235 struct raid_base_bdev_info *base_info; 1236 1237 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_remove_base_bdev\n"); 1238 1239 /* Find the raid_bdev which has claimed this base_bdev */ 1240 if (!raid_bdev_find_by_base_bdev(base_bdev, &raid_bdev, &base_info)) { 1241 SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); 1242 return; 1243 } 1244 1245 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1246 1247 assert(base_info->desc); 1248 base_info->remove_scheduled = true; 1249 1250 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1251 /* 1252 * As raid bdev is not registered yet or already unregistered, 1253 * so cleanup should be done here itself. 1254 */ 1255 raid_bdev_free_base_bdev_resource(raid_bdev, base_info); 1256 if (raid_bdev->num_base_bdevs_discovered == 0) { 1257 /* There is no base bdev for this raid, so free the raid device. */ 1258 raid_bdev_cleanup_and_free(raid_bdev); 1259 return; 1260 } 1261 } 1262 1263 raid_bdev_deconfigure(raid_bdev, NULL, NULL); 1264 } 1265 1266 /* 1267 * brief: 1268 * raid_bdev_resize_base_bdev function is called by below layers when base_bdev 1269 * is resized. This function checks if the smallest size of the base_bdevs is changed. 1270 * If yes, call module handler to resize the raid_bdev if implemented. 1271 * params: 1272 * base_bdev - pointer to base bdev which got resized. 1273 * returns: 1274 * none 1275 */ 1276 static void 1277 raid_bdev_resize_base_bdev(struct spdk_bdev *base_bdev) 1278 { 1279 struct raid_bdev *raid_bdev = NULL; 1280 struct raid_base_bdev_info *base_info; 1281 1282 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_resize_base_bdev\n"); 1283 1284 /* Find the raid_bdev which has claimed this base_bdev */ 1285 if (!raid_bdev_find_by_base_bdev(base_bdev, &raid_bdev, &base_info)) { 1286 SPDK_ERRLOG("raid_bdev whose base_bdev '%s' not found\n", base_bdev->name); 1287 return; 1288 } 1289 1290 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1291 1292 SPDK_NOTICELOG("base_bdev '%s' was resized: old size %" PRIu64 ", new size %" PRIu64 "\n", 1293 base_bdev->name, base_info->blockcnt, base_bdev->blockcnt); 1294 1295 if (raid_bdev->module->resize) { 1296 raid_bdev->module->resize(raid_bdev); 1297 } 1298 } 1299 1300 /* 1301 * brief: 1302 * raid_bdev_event_base_bdev function is called by below layers when base_bdev 1303 * triggers asynchronous event. 1304 * params: 1305 * type - event details. 1306 * bdev - bdev that triggered event. 1307 * event_ctx - context for event. 1308 * returns: 1309 * none 1310 */ 1311 static void 1312 raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 1313 void *event_ctx) 1314 { 1315 switch (type) { 1316 case SPDK_BDEV_EVENT_REMOVE: 1317 raid_bdev_remove_base_bdev(bdev); 1318 break; 1319 case SPDK_BDEV_EVENT_RESIZE: 1320 raid_bdev_resize_base_bdev(bdev); 1321 break; 1322 default: 1323 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 1324 break; 1325 } 1326 } 1327 1328 /* 1329 * brief: 1330 * Deletes the specified raid bdev 1331 * params: 1332 * raid_bdev - pointer to raid bdev 1333 * cb_fn - callback function 1334 * cb_arg - argument to callback function 1335 */ 1336 void 1337 raid_bdev_delete(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, void *cb_arg) 1338 { 1339 struct raid_base_bdev_info *base_info; 1340 1341 SPDK_DEBUGLOG(bdev_raid, "delete raid bdev: %s\n", raid_bdev->bdev.name); 1342 1343 if (raid_bdev->destroy_started) { 1344 SPDK_DEBUGLOG(bdev_raid, "destroying raid bdev %s is already started\n", 1345 raid_bdev->bdev.name); 1346 if (cb_fn) { 1347 cb_fn(cb_arg, -EALREADY); 1348 } 1349 return; 1350 } 1351 1352 raid_bdev->destroy_started = true; 1353 1354 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1355 base_info->remove_scheduled = true; 1356 1357 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1358 /* 1359 * As raid bdev is not registered yet or already unregistered, 1360 * so cleanup should be done here itself. 1361 */ 1362 raid_bdev_free_base_bdev_resource(raid_bdev, base_info); 1363 } 1364 } 1365 1366 if (raid_bdev->num_base_bdevs_discovered == 0) { 1367 /* There is no base bdev for this raid, so free the raid device. */ 1368 raid_bdev_cleanup_and_free(raid_bdev); 1369 if (cb_fn) { 1370 cb_fn(cb_arg, 0); 1371 } 1372 } else { 1373 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg); 1374 } 1375 } 1376 1377 static int 1378 raid_bdev_configure_base_bdev(struct raid_bdev *raid_bdev, struct raid_base_bdev_info *base_info) 1379 { 1380 struct spdk_bdev_desc *desc; 1381 struct spdk_bdev *bdev; 1382 int rc; 1383 1384 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1385 assert(base_info->name != NULL); 1386 assert(base_info->bdev == NULL); 1387 1388 rc = spdk_bdev_open_ext(base_info->name, true, raid_bdev_event_base_bdev, NULL, &desc); 1389 if (rc != 0) { 1390 if (rc != -ENODEV) { 1391 SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", base_info->name); 1392 } 1393 return rc; 1394 } 1395 1396 bdev = spdk_bdev_desc_get_bdev(desc); 1397 1398 rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if); 1399 if (rc != 0) { 1400 SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); 1401 spdk_bdev_close(desc); 1402 return rc; 1403 } 1404 1405 SPDK_DEBUGLOG(bdev_raid, "bdev %s is claimed\n", bdev->name); 1406 1407 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 1408 1409 base_info->bdev = bdev; 1410 base_info->desc = desc; 1411 base_info->blockcnt = bdev->blockcnt; 1412 raid_bdev->num_base_bdevs_discovered++; 1413 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1414 1415 if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) { 1416 rc = raid_bdev_configure(raid_bdev); 1417 if (rc != 0) { 1418 SPDK_ERRLOG("Failed to configure raid bdev\n"); 1419 return rc; 1420 } 1421 } 1422 1423 return 0; 1424 } 1425 1426 /* 1427 * brief: 1428 * raid_bdev_add_base_device function is the actual function which either adds 1429 * the nvme base device to existing raid bdev or create a new raid bdev. It also claims 1430 * the base device and keep the open descriptor. 1431 * params: 1432 * raid_bdev - pointer to raid bdev 1433 * name - name of the base bdev 1434 * slot - position to add base bdev 1435 * returns: 1436 * 0 - success 1437 * non zero - failure 1438 */ 1439 int 1440 raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name, uint8_t slot) 1441 { 1442 struct raid_base_bdev_info *base_info; 1443 int rc; 1444 1445 if (slot >= raid_bdev->num_base_bdevs) { 1446 return -EINVAL; 1447 } 1448 1449 base_info = &raid_bdev->base_bdev_info[slot]; 1450 1451 if (base_info->name != NULL) { 1452 SPDK_ERRLOG("Slot %u on raid bdev '%s' already assigned to bdev '%s'\n", 1453 slot, raid_bdev->bdev.name, base_info->name); 1454 return -EBUSY; 1455 } 1456 1457 base_info->name = strdup(name); 1458 if (base_info->name == NULL) { 1459 return -ENOMEM; 1460 } 1461 1462 rc = raid_bdev_configure_base_bdev(raid_bdev, base_info); 1463 if (rc != 0) { 1464 if (rc != -ENODEV) { 1465 SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", name); 1466 } 1467 return rc; 1468 } 1469 1470 return 0; 1471 } 1472 1473 /* 1474 * brief: 1475 * raid_bdev_examine function is the examine function call by the below layers 1476 * like bdev_nvme layer. This function will check if this base bdev can be 1477 * claimed by this raid bdev or not. 1478 * params: 1479 * bdev - pointer to base bdev 1480 * returns: 1481 * none 1482 */ 1483 static void 1484 raid_bdev_examine(struct spdk_bdev *bdev) 1485 { 1486 struct raid_bdev *raid_bdev; 1487 struct raid_base_bdev_info *base_info; 1488 1489 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1490 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1491 if (base_info->bdev == NULL && strcmp(bdev->name, base_info->name) == 0) { 1492 raid_bdev_configure_base_bdev(raid_bdev, base_info); 1493 break; 1494 } 1495 } 1496 } 1497 1498 spdk_bdev_module_examine_done(&g_raid_if); 1499 } 1500 1501 /* Log component for bdev raid bdev module */ 1502 SPDK_LOG_REGISTER_COMPONENT(bdev_raid) 1503