1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. 3 * All rights reserved. 4 * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "bdev_raid.h" 8 #include "spdk/env.h" 9 #include "spdk/thread.h" 10 #include "spdk/log.h" 11 #include "spdk/string.h" 12 #include "spdk/util.h" 13 #include "spdk/json.h" 14 15 static bool g_shutdown_started = false; 16 17 /* List of all raid bdevs */ 18 struct raid_all_tailq g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list); 19 20 static TAILQ_HEAD(, raid_bdev_module) g_raid_modules = TAILQ_HEAD_INITIALIZER(g_raid_modules); 21 22 static struct raid_bdev_module * 23 raid_bdev_module_find(enum raid_level level) 24 { 25 struct raid_bdev_module *raid_module; 26 27 TAILQ_FOREACH(raid_module, &g_raid_modules, link) { 28 if (raid_module->level == level) { 29 return raid_module; 30 } 31 } 32 33 return NULL; 34 } 35 36 void 37 raid_bdev_module_list_add(struct raid_bdev_module *raid_module) 38 { 39 if (raid_bdev_module_find(raid_module->level) != NULL) { 40 SPDK_ERRLOG("module for raid level '%s' already registered.\n", 41 raid_bdev_level_to_str(raid_module->level)); 42 assert(false); 43 } else { 44 TAILQ_INSERT_TAIL(&g_raid_modules, raid_module, link); 45 } 46 } 47 48 /* Function declarations */ 49 static void raid_bdev_examine(struct spdk_bdev *bdev); 50 static int raid_bdev_init(void); 51 static void raid_bdev_deconfigure(struct raid_bdev *raid_bdev, 52 raid_bdev_destruct_cb cb_fn, void *cb_arg); 53 54 /* 55 * brief: 56 * raid_bdev_create_cb function is a cb function for raid bdev which creates the 57 * hierarchy from raid bdev to base bdev io channels. It will be called per core 58 * params: 59 * io_device - pointer to raid bdev io device represented by raid_bdev 60 * ctx_buf - pointer to context buffer for raid bdev io channel 61 * returns: 62 * 0 - success 63 * non zero - failure 64 */ 65 static int 66 raid_bdev_create_cb(void *io_device, void *ctx_buf) 67 { 68 struct raid_bdev *raid_bdev = io_device; 69 struct raid_bdev_io_channel *raid_ch = ctx_buf; 70 uint8_t i; 71 int ret = 0; 72 73 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_create_cb, %p\n", raid_ch); 74 75 assert(raid_bdev != NULL); 76 assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); 77 78 raid_ch->num_channels = raid_bdev->num_base_bdevs; 79 80 raid_ch->base_channel = calloc(raid_ch->num_channels, 81 sizeof(struct spdk_io_channel *)); 82 if (!raid_ch->base_channel) { 83 SPDK_ERRLOG("Unable to allocate base bdevs io channel\n"); 84 return -ENOMEM; 85 } 86 for (i = 0; i < raid_ch->num_channels; i++) { 87 /* 88 * Get the spdk_io_channel for all the base bdevs. This is used during 89 * split logic to send the respective child bdev ios to respective base 90 * bdev io channel. 91 */ 92 raid_ch->base_channel[i] = spdk_bdev_get_io_channel( 93 raid_bdev->base_bdev_info[i].desc); 94 if (!raid_ch->base_channel[i]) { 95 SPDK_ERRLOG("Unable to create io channel for base bdev\n"); 96 ret = -ENOMEM; 97 break; 98 } 99 } 100 101 if (!ret && raid_bdev->module->get_io_channel) { 102 raid_ch->module_channel = raid_bdev->module->get_io_channel(raid_bdev); 103 if (!raid_ch->module_channel) { 104 SPDK_ERRLOG("Unable to create io channel for raid module\n"); 105 ret = -ENOMEM; 106 } 107 } 108 109 if (ret) { 110 uint8_t j; 111 112 for (j = 0; j < i; j++) { 113 spdk_put_io_channel(raid_ch->base_channel[j]); 114 } 115 free(raid_ch->base_channel); 116 raid_ch->base_channel = NULL; 117 } 118 return ret; 119 } 120 121 /* 122 * brief: 123 * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the 124 * hierarchy from raid bdev to base bdev io channels. It will be called per core 125 * params: 126 * io_device - pointer to raid bdev io device represented by raid_bdev 127 * ctx_buf - pointer to context buffer for raid bdev io channel 128 * returns: 129 * none 130 */ 131 static void 132 raid_bdev_destroy_cb(void *io_device, void *ctx_buf) 133 { 134 struct raid_bdev_io_channel *raid_ch = ctx_buf; 135 uint8_t i; 136 137 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destroy_cb\n"); 138 139 assert(raid_ch != NULL); 140 assert(raid_ch->base_channel); 141 142 if (raid_ch->module_channel) { 143 spdk_put_io_channel(raid_ch->module_channel); 144 } 145 146 for (i = 0; i < raid_ch->num_channels; i++) { 147 /* Free base bdev channels */ 148 assert(raid_ch->base_channel[i] != NULL); 149 spdk_put_io_channel(raid_ch->base_channel[i]); 150 } 151 free(raid_ch->base_channel); 152 raid_ch->base_channel = NULL; 153 } 154 155 /* 156 * brief: 157 * raid_bdev_cleanup is used to cleanup raid_bdev related data 158 * structures. 159 * params: 160 * raid_bdev - pointer to raid_bdev 161 * returns: 162 * none 163 */ 164 static void 165 raid_bdev_cleanup(struct raid_bdev *raid_bdev) 166 { 167 struct raid_base_bdev_info *base_info; 168 169 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_cleanup, %p name %s, state %s\n", 170 raid_bdev, raid_bdev->bdev.name, raid_bdev_state_to_str(raid_bdev->state)); 171 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 172 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 173 174 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 175 assert(base_info->bdev == NULL); 176 assert(base_info->desc == NULL); 177 free(base_info->name); 178 } 179 180 TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link); 181 free(raid_bdev->base_bdev_info); 182 } 183 184 static void 185 raid_bdev_free(struct raid_bdev *raid_bdev) 186 { 187 free(raid_bdev->bdev.name); 188 free(raid_bdev); 189 } 190 191 static void 192 raid_bdev_cleanup_and_free(struct raid_bdev *raid_bdev) 193 { 194 raid_bdev_cleanup(raid_bdev); 195 raid_bdev_free(raid_bdev); 196 } 197 198 /* 199 * brief: 200 * free resource of base bdev for raid bdev 201 * params: 202 * raid_bdev - pointer to raid bdev 203 * base_info - raid base bdev info 204 * returns: 205 * 0 - success 206 * non zero - failure 207 */ 208 static void 209 raid_bdev_free_base_bdev_resource(struct raid_bdev *raid_bdev, 210 struct raid_base_bdev_info *base_info) 211 { 212 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 213 214 free(base_info->name); 215 base_info->name = NULL; 216 217 if (base_info->bdev == NULL) { 218 return; 219 } 220 221 assert(base_info->desc); 222 spdk_bdev_module_release_bdev(base_info->bdev); 223 spdk_bdev_close(base_info->desc); 224 base_info->desc = NULL; 225 base_info->bdev = NULL; 226 227 assert(raid_bdev->num_base_bdevs_discovered); 228 raid_bdev->num_base_bdevs_discovered--; 229 } 230 231 static void 232 raid_bdev_io_device_unregister_cb(void *io_device) 233 { 234 struct raid_bdev *raid_bdev = io_device; 235 236 if (raid_bdev->num_base_bdevs_discovered == 0) { 237 /* Free raid_bdev when there are no base bdevs left */ 238 SPDK_DEBUGLOG(bdev_raid, "raid bdev base bdevs is 0, going to free all in destruct\n"); 239 raid_bdev_cleanup(raid_bdev); 240 spdk_bdev_destruct_done(&raid_bdev->bdev, 0); 241 raid_bdev_free(raid_bdev); 242 } else { 243 spdk_bdev_destruct_done(&raid_bdev->bdev, 0); 244 } 245 } 246 247 void 248 raid_bdev_module_stop_done(struct raid_bdev *raid_bdev) 249 { 250 if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) { 251 spdk_io_device_unregister(raid_bdev, raid_bdev_io_device_unregister_cb); 252 } 253 } 254 255 static void 256 _raid_bdev_destruct(void *ctxt) 257 { 258 struct raid_bdev *raid_bdev = ctxt; 259 struct raid_base_bdev_info *base_info; 260 261 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destruct\n"); 262 263 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 264 /* 265 * Close all base bdev descriptors for which call has come from below 266 * layers. Also close the descriptors if we have started shutdown. 267 */ 268 if (g_shutdown_started || base_info->remove_scheduled == true) { 269 raid_bdev_free_base_bdev_resource(raid_bdev, base_info); 270 } 271 } 272 273 if (g_shutdown_started) { 274 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 275 } 276 277 if (raid_bdev->module->stop != NULL) { 278 if (raid_bdev->module->stop(raid_bdev) == false) { 279 return; 280 } 281 } 282 283 raid_bdev_module_stop_done(raid_bdev); 284 } 285 286 static int 287 raid_bdev_destruct(void *ctx) 288 { 289 spdk_thread_exec_msg(spdk_thread_get_app_thread(), _raid_bdev_destruct, ctx); 290 291 return 1; 292 } 293 294 void 295 raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status) 296 { 297 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 298 299 spdk_bdev_io_complete(bdev_io, status); 300 } 301 302 /* 303 * brief: 304 * raid_bdev_io_complete_part - signal the completion of a part of the expected 305 * base bdev IOs and complete the raid_io if this is the final expected IO. 306 * The caller should first set raid_io->base_bdev_io_remaining. This function 307 * will decrement this counter by the value of the 'completed' parameter and 308 * complete the raid_io if the counter reaches 0. The caller is free to 309 * interpret the 'base_bdev_io_remaining' and 'completed' values as needed, 310 * it can represent e.g. blocks or IOs. 311 * params: 312 * raid_io - pointer to raid_bdev_io 313 * completed - the part of the raid_io that has been completed 314 * status - status of the base IO 315 * returns: 316 * true - if the raid_io is completed 317 * false - otherwise 318 */ 319 bool 320 raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed, 321 enum spdk_bdev_io_status status) 322 { 323 assert(raid_io->base_bdev_io_remaining >= completed); 324 raid_io->base_bdev_io_remaining -= completed; 325 326 if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { 327 raid_io->base_bdev_io_status = status; 328 } 329 330 if (raid_io->base_bdev_io_remaining == 0) { 331 raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); 332 return true; 333 } else { 334 return false; 335 } 336 } 337 338 /* 339 * brief: 340 * raid_bdev_queue_io_wait function processes the IO which failed to submit. 341 * It will try to queue the IOs after storing the context to bdev wait queue logic. 342 * params: 343 * raid_io - pointer to raid_bdev_io 344 * bdev - the block device that the IO is submitted to 345 * ch - io channel 346 * cb_fn - callback when the spdk_bdev_io for bdev becomes available 347 * returns: 348 * none 349 */ 350 void 351 raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, 352 struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn) 353 { 354 raid_io->waitq_entry.bdev = bdev; 355 raid_io->waitq_entry.cb_fn = cb_fn; 356 raid_io->waitq_entry.cb_arg = raid_io; 357 spdk_bdev_queue_io_wait(bdev, ch, &raid_io->waitq_entry); 358 } 359 360 static void 361 raid_base_bdev_reset_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 362 { 363 struct raid_bdev_io *raid_io = cb_arg; 364 365 spdk_bdev_free_io(bdev_io); 366 367 raid_bdev_io_complete_part(raid_io, 1, success ? 368 SPDK_BDEV_IO_STATUS_SUCCESS : 369 SPDK_BDEV_IO_STATUS_FAILED); 370 } 371 372 static void raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io); 373 374 static void 375 _raid_bdev_submit_reset_request(void *_raid_io) 376 { 377 struct raid_bdev_io *raid_io = _raid_io; 378 379 raid_bdev_submit_reset_request(raid_io); 380 } 381 382 /* 383 * brief: 384 * raid_bdev_submit_reset_request function submits reset requests 385 * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in 386 * which case it will queue it for later submission 387 * params: 388 * raid_io 389 * returns: 390 * none 391 */ 392 static void 393 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io) 394 { 395 struct raid_bdev *raid_bdev; 396 int ret; 397 uint8_t i; 398 struct raid_base_bdev_info *base_info; 399 struct spdk_io_channel *base_ch; 400 401 raid_bdev = raid_io->raid_bdev; 402 403 if (raid_io->base_bdev_io_remaining == 0) { 404 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; 405 } 406 407 while (raid_io->base_bdev_io_submitted < raid_bdev->num_base_bdevs) { 408 i = raid_io->base_bdev_io_submitted; 409 base_info = &raid_bdev->base_bdev_info[i]; 410 base_ch = raid_io->raid_ch->base_channel[i]; 411 ret = spdk_bdev_reset(base_info->desc, base_ch, 412 raid_base_bdev_reset_complete, raid_io); 413 if (ret == 0) { 414 raid_io->base_bdev_io_submitted++; 415 } else if (ret == -ENOMEM) { 416 raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, 417 _raid_bdev_submit_reset_request); 418 return; 419 } else { 420 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 421 assert(false); 422 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 423 return; 424 } 425 } 426 } 427 428 /* 429 * brief: 430 * Callback function to spdk_bdev_io_get_buf. 431 * params: 432 * ch - pointer to raid bdev io channel 433 * bdev_io - pointer to parent bdev_io on raid bdev device 434 * success - True if buffer is allocated or false otherwise. 435 * returns: 436 * none 437 */ 438 static void 439 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 440 bool success) 441 { 442 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 443 444 if (!success) { 445 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 446 return; 447 } 448 449 raid_io->raid_bdev->module->submit_rw_request(raid_io); 450 } 451 452 /* 453 * brief: 454 * raid_bdev_submit_request function is the submit_request function pointer of 455 * raid bdev function table. This is used to submit the io on raid_bdev to below 456 * layers. 457 * params: 458 * ch - pointer to raid bdev io channel 459 * bdev_io - pointer to parent bdev_io on raid bdev device 460 * returns: 461 * none 462 */ 463 static void 464 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 465 { 466 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 467 468 raid_io->raid_bdev = bdev_io->bdev->ctxt; 469 raid_io->raid_ch = spdk_io_channel_get_ctx(ch); 470 raid_io->base_bdev_io_remaining = 0; 471 raid_io->base_bdev_io_submitted = 0; 472 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 473 474 switch (bdev_io->type) { 475 case SPDK_BDEV_IO_TYPE_READ: 476 spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb, 477 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 478 break; 479 case SPDK_BDEV_IO_TYPE_WRITE: 480 raid_io->raid_bdev->module->submit_rw_request(raid_io); 481 break; 482 483 case SPDK_BDEV_IO_TYPE_RESET: 484 raid_bdev_submit_reset_request(raid_io); 485 break; 486 487 case SPDK_BDEV_IO_TYPE_FLUSH: 488 case SPDK_BDEV_IO_TYPE_UNMAP: 489 raid_io->raid_bdev->module->submit_null_payload_request(raid_io); 490 break; 491 492 default: 493 SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type); 494 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 495 break; 496 } 497 } 498 499 /* 500 * brief: 501 * _raid_bdev_io_type_supported checks whether io_type is supported in 502 * all base bdev modules of raid bdev module. If anyone among the base_bdevs 503 * doesn't support, the raid device doesn't supports. 504 * 505 * params: 506 * raid_bdev - pointer to raid bdev context 507 * io_type - io type 508 * returns: 509 * true - io_type is supported 510 * false - io_type is not supported 511 */ 512 inline static bool 513 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type) 514 { 515 struct raid_base_bdev_info *base_info; 516 517 if (io_type == SPDK_BDEV_IO_TYPE_FLUSH || 518 io_type == SPDK_BDEV_IO_TYPE_UNMAP) { 519 if (raid_bdev->module->submit_null_payload_request == NULL) { 520 return false; 521 } 522 } 523 524 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 525 if (base_info->bdev == NULL) { 526 assert(false); 527 continue; 528 } 529 530 if (spdk_bdev_io_type_supported(base_info->bdev, io_type) == false) { 531 return false; 532 } 533 } 534 535 return true; 536 } 537 538 /* 539 * brief: 540 * raid_bdev_io_type_supported is the io_supported function for bdev function 541 * table which returns whether the particular io type is supported or not by 542 * raid bdev module 543 * params: 544 * ctx - pointer to raid bdev context 545 * type - io type 546 * returns: 547 * true - io_type is supported 548 * false - io_type is not supported 549 */ 550 static bool 551 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 552 { 553 switch (io_type) { 554 case SPDK_BDEV_IO_TYPE_READ: 555 case SPDK_BDEV_IO_TYPE_WRITE: 556 return true; 557 558 case SPDK_BDEV_IO_TYPE_FLUSH: 559 case SPDK_BDEV_IO_TYPE_RESET: 560 case SPDK_BDEV_IO_TYPE_UNMAP: 561 return _raid_bdev_io_type_supported(ctx, io_type); 562 563 default: 564 return false; 565 } 566 567 return false; 568 } 569 570 /* 571 * brief: 572 * raid_bdev_get_io_channel is the get_io_channel function table pointer for 573 * raid bdev. This is used to return the io channel for this raid bdev 574 * params: 575 * ctxt - pointer to raid_bdev 576 * returns: 577 * pointer to io channel for raid bdev 578 */ 579 static struct spdk_io_channel * 580 raid_bdev_get_io_channel(void *ctxt) 581 { 582 struct raid_bdev *raid_bdev = ctxt; 583 584 return spdk_get_io_channel(raid_bdev); 585 } 586 587 void 588 raid_bdev_write_info_json(struct raid_bdev *raid_bdev, struct spdk_json_write_ctx *w) 589 { 590 struct raid_base_bdev_info *base_info; 591 592 assert(raid_bdev != NULL); 593 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 594 595 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 596 spdk_json_write_named_string(w, "state", raid_bdev_state_to_str(raid_bdev->state)); 597 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 598 spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs); 599 spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered); 600 spdk_json_write_name(w, "base_bdevs_list"); 601 spdk_json_write_array_begin(w); 602 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 603 if (base_info->bdev) { 604 spdk_json_write_string(w, base_info->bdev->name); 605 } else { 606 spdk_json_write_null(w); 607 } 608 } 609 spdk_json_write_array_end(w); 610 } 611 612 /* 613 * brief: 614 * raid_bdev_dump_info_json is the function table pointer for raid bdev 615 * params: 616 * ctx - pointer to raid_bdev 617 * w - pointer to json context 618 * returns: 619 * 0 - success 620 * non zero - failure 621 */ 622 static int 623 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 624 { 625 struct raid_bdev *raid_bdev = ctx; 626 627 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_dump_config_json\n"); 628 629 /* Dump the raid bdev configuration related information */ 630 spdk_json_write_named_object_begin(w, "raid"); 631 raid_bdev_write_info_json(raid_bdev, w); 632 spdk_json_write_object_end(w); 633 634 return 0; 635 } 636 637 /* 638 * brief: 639 * raid_bdev_write_config_json is the function table pointer for raid bdev 640 * params: 641 * bdev - pointer to spdk_bdev 642 * w - pointer to json context 643 * returns: 644 * none 645 */ 646 static void 647 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 648 { 649 struct raid_bdev *raid_bdev = bdev->ctxt; 650 struct raid_base_bdev_info *base_info; 651 652 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 653 654 spdk_json_write_object_begin(w); 655 656 spdk_json_write_named_string(w, "method", "bdev_raid_create"); 657 658 spdk_json_write_named_object_begin(w, "params"); 659 spdk_json_write_named_string(w, "name", bdev->name); 660 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 661 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 662 663 spdk_json_write_named_array_begin(w, "base_bdevs"); 664 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 665 if (base_info->bdev) { 666 spdk_json_write_string(w, base_info->bdev->name); 667 } 668 } 669 spdk_json_write_array_end(w); 670 spdk_json_write_object_end(w); 671 672 spdk_json_write_object_end(w); 673 } 674 675 static int 676 raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size) 677 { 678 struct raid_bdev *raid_bdev = ctx; 679 struct spdk_bdev *base_bdev; 680 uint32_t i; 681 int domains_count = 0, rc; 682 683 if (raid_bdev->module->memory_domains_supported == false) { 684 return 0; 685 } 686 687 /* First loop to get the number of memory domains */ 688 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 689 base_bdev = raid_bdev->base_bdev_info[i].bdev; 690 rc = spdk_bdev_get_memory_domains(base_bdev, NULL, 0); 691 if (rc < 0) { 692 return rc; 693 } 694 domains_count += rc; 695 } 696 697 if (!domains || array_size < domains_count) { 698 return domains_count; 699 } 700 701 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 702 base_bdev = raid_bdev->base_bdev_info[i].bdev; 703 rc = spdk_bdev_get_memory_domains(base_bdev, domains, array_size); 704 if (rc < 0) { 705 return rc; 706 } 707 domains += rc; 708 array_size -= rc; 709 } 710 711 return domains_count; 712 } 713 714 /* g_raid_bdev_fn_table is the function table for raid bdev */ 715 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = { 716 .destruct = raid_bdev_destruct, 717 .submit_request = raid_bdev_submit_request, 718 .io_type_supported = raid_bdev_io_type_supported, 719 .get_io_channel = raid_bdev_get_io_channel, 720 .dump_info_json = raid_bdev_dump_info_json, 721 .write_config_json = raid_bdev_write_config_json, 722 .get_memory_domains = raid_bdev_get_memory_domains, 723 }; 724 725 struct raid_bdev * 726 raid_bdev_find_by_name(const char *name) 727 { 728 struct raid_bdev *raid_bdev; 729 730 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 731 if (strcmp(raid_bdev->bdev.name, name) == 0) { 732 return raid_bdev; 733 } 734 } 735 736 return NULL; 737 } 738 739 static struct { 740 const char *name; 741 enum raid_level value; 742 } g_raid_level_names[] = { 743 { "raid0", RAID0 }, 744 { "0", RAID0 }, 745 { "raid1", RAID1 }, 746 { "1", RAID1 }, 747 { "raid5f", RAID5F }, 748 { "5f", RAID5F }, 749 { "concat", CONCAT }, 750 { } 751 }; 752 753 static struct { 754 const char *name; 755 enum raid_bdev_state value; 756 } g_raid_state_names[] = { 757 { "online", RAID_BDEV_STATE_ONLINE }, 758 { "configuring", RAID_BDEV_STATE_CONFIGURING }, 759 { "offline", RAID_BDEV_STATE_OFFLINE }, 760 { } 761 }; 762 763 /* We have to use the typedef in the function declaration to appease astyle. */ 764 typedef enum raid_level raid_level_t; 765 typedef enum raid_bdev_state raid_bdev_state_t; 766 767 raid_level_t 768 raid_bdev_str_to_level(const char *str) 769 { 770 unsigned int i; 771 772 assert(str != NULL); 773 774 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 775 if (strcasecmp(g_raid_level_names[i].name, str) == 0) { 776 return g_raid_level_names[i].value; 777 } 778 } 779 780 return INVALID_RAID_LEVEL; 781 } 782 783 const char * 784 raid_bdev_level_to_str(enum raid_level level) 785 { 786 unsigned int i; 787 788 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 789 if (g_raid_level_names[i].value == level) { 790 return g_raid_level_names[i].name; 791 } 792 } 793 794 return ""; 795 } 796 797 raid_bdev_state_t 798 raid_bdev_str_to_state(const char *str) 799 { 800 unsigned int i; 801 802 assert(str != NULL); 803 804 for (i = 0; g_raid_state_names[i].name != NULL; i++) { 805 if (strcasecmp(g_raid_state_names[i].name, str) == 0) { 806 return g_raid_state_names[i].value; 807 } 808 } 809 810 return RAID_BDEV_STATE_MAX; 811 } 812 813 const char * 814 raid_bdev_state_to_str(enum raid_bdev_state state) 815 { 816 unsigned int i; 817 818 for (i = 0; g_raid_state_names[i].name != NULL; i++) { 819 if (g_raid_state_names[i].value == state) { 820 return g_raid_state_names[i].name; 821 } 822 } 823 824 assert(false); 825 return ""; 826 } 827 828 /* 829 * brief: 830 * raid_bdev_fini_start is called when bdev layer is starting the 831 * shutdown process 832 * params: 833 * none 834 * returns: 835 * none 836 */ 837 static void 838 raid_bdev_fini_start(void) 839 { 840 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_fini_start\n"); 841 g_shutdown_started = true; 842 } 843 844 /* 845 * brief: 846 * raid_bdev_exit is called on raid bdev module exit time by bdev layer 847 * params: 848 * none 849 * returns: 850 * none 851 */ 852 static void 853 raid_bdev_exit(void) 854 { 855 struct raid_bdev *raid_bdev, *tmp; 856 857 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_exit\n"); 858 859 TAILQ_FOREACH_SAFE(raid_bdev, &g_raid_bdev_list, global_link, tmp) { 860 raid_bdev_cleanup_and_free(raid_bdev); 861 } 862 } 863 864 /* 865 * brief: 866 * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid 867 * module 868 * params: 869 * none 870 * returns: 871 * size of spdk_bdev_io context for raid 872 */ 873 static int 874 raid_bdev_get_ctx_size(void) 875 { 876 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_get_ctx_size\n"); 877 return sizeof(struct raid_bdev_io); 878 } 879 880 static struct spdk_bdev_module g_raid_if = { 881 .name = "raid", 882 .module_init = raid_bdev_init, 883 .fini_start = raid_bdev_fini_start, 884 .module_fini = raid_bdev_exit, 885 .get_ctx_size = raid_bdev_get_ctx_size, 886 .examine_config = raid_bdev_examine, 887 .async_init = false, 888 .async_fini = false, 889 }; 890 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if) 891 892 /* 893 * brief: 894 * raid_bdev_init is the initialization function for raid bdev module 895 * params: 896 * none 897 * returns: 898 * 0 - success 899 * non zero - failure 900 */ 901 static int 902 raid_bdev_init(void) 903 { 904 return 0; 905 } 906 907 /* 908 * brief: 909 * raid_bdev_create allocates raid bdev based on passed configuration 910 * params: 911 * name - name for raid bdev 912 * strip_size - strip size in KB 913 * num_base_bdevs - number of base bdevs 914 * level - raid level 915 * raid_bdev_out - the created raid bdev 916 * returns: 917 * 0 - success 918 * non zero - failure 919 */ 920 int 921 raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, 922 enum raid_level level, struct raid_bdev **raid_bdev_out, const struct spdk_uuid *uuid) 923 { 924 struct raid_bdev *raid_bdev; 925 struct spdk_bdev *raid_bdev_gen; 926 struct raid_bdev_module *module; 927 uint8_t min_operational; 928 929 if (raid_bdev_find_by_name(name) != NULL) { 930 SPDK_ERRLOG("Duplicate raid bdev name found: %s\n", name); 931 return -EEXIST; 932 } 933 934 if (level == RAID1) { 935 if (strip_size != 0) { 936 SPDK_ERRLOG("Strip size is not supported by raid1\n"); 937 return -EINVAL; 938 } 939 } else if (spdk_u32_is_pow2(strip_size) == false) { 940 SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size); 941 return -EINVAL; 942 } 943 944 module = raid_bdev_module_find(level); 945 if (module == NULL) { 946 SPDK_ERRLOG("Unsupported raid level '%d'\n", level); 947 return -EINVAL; 948 } 949 950 assert(module->base_bdevs_min != 0); 951 if (num_base_bdevs < module->base_bdevs_min) { 952 SPDK_ERRLOG("At least %u base devices required for %s\n", 953 module->base_bdevs_min, 954 raid_bdev_level_to_str(level)); 955 return -EINVAL; 956 } 957 958 switch (module->base_bdevs_constraint.type) { 959 case CONSTRAINT_MAX_BASE_BDEVS_REMOVED: 960 min_operational = num_base_bdevs - module->base_bdevs_constraint.value; 961 break; 962 case CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL: 963 min_operational = module->base_bdevs_constraint.value; 964 break; 965 case CONSTRAINT_UNSET: 966 if (module->base_bdevs_constraint.value != 0) { 967 SPDK_ERRLOG("Unexpected constraint value '%u' provided for raid bdev '%s'.\n", 968 (uint8_t)module->base_bdevs_constraint.value, name); 969 return -EINVAL; 970 } 971 min_operational = num_base_bdevs; 972 break; 973 default: 974 SPDK_ERRLOG("Unrecognised constraint type '%u' in module for raid level '%s'.\n", 975 (uint8_t)module->base_bdevs_constraint.type, 976 raid_bdev_level_to_str(module->level)); 977 return -EINVAL; 978 }; 979 980 if (min_operational == 0 || min_operational > num_base_bdevs) { 981 SPDK_ERRLOG("Wrong constraint value for raid level '%s'.\n", 982 raid_bdev_level_to_str(module->level)); 983 return -EINVAL; 984 } 985 986 raid_bdev = calloc(1, sizeof(*raid_bdev)); 987 if (!raid_bdev) { 988 SPDK_ERRLOG("Unable to allocate memory for raid bdev\n"); 989 return -ENOMEM; 990 } 991 992 raid_bdev->module = module; 993 raid_bdev->num_base_bdevs = num_base_bdevs; 994 raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, 995 sizeof(struct raid_base_bdev_info)); 996 if (!raid_bdev->base_bdev_info) { 997 SPDK_ERRLOG("Unable able to allocate base bdev info\n"); 998 free(raid_bdev); 999 return -ENOMEM; 1000 } 1001 1002 /* strip_size_kb is from the rpc param. strip_size is in blocks and used 1003 * internally and set later. 1004 */ 1005 raid_bdev->strip_size = 0; 1006 raid_bdev->strip_size_kb = strip_size; 1007 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1008 raid_bdev->level = level; 1009 raid_bdev->min_base_bdevs_operational = min_operational; 1010 1011 raid_bdev_gen = &raid_bdev->bdev; 1012 1013 raid_bdev_gen->name = strdup(name); 1014 if (!raid_bdev_gen->name) { 1015 SPDK_ERRLOG("Unable to allocate name for raid\n"); 1016 free(raid_bdev->base_bdev_info); 1017 free(raid_bdev); 1018 return -ENOMEM; 1019 } 1020 1021 raid_bdev_gen->product_name = "Raid Volume"; 1022 raid_bdev_gen->ctxt = raid_bdev; 1023 raid_bdev_gen->fn_table = &g_raid_bdev_fn_table; 1024 raid_bdev_gen->module = &g_raid_if; 1025 raid_bdev_gen->write_cache = 0; 1026 1027 if (uuid) { 1028 spdk_uuid_copy(&raid_bdev_gen->uuid, uuid); 1029 } 1030 1031 TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link); 1032 1033 *raid_bdev_out = raid_bdev; 1034 1035 return 0; 1036 } 1037 1038 /* 1039 * brief: 1040 * Check underlying block devices against support for metadata. Do not configure 1041 * md support when parameters from block devices are inconsistent. 1042 * params: 1043 * raid_bdev - pointer to raid bdev 1044 * returns: 1045 * 0 - The raid bdev md parameters were successfully configured. 1046 * non zero - Failed to configure md. 1047 */ 1048 static int 1049 raid_bdev_configure_md(struct raid_bdev *raid_bdev) 1050 { 1051 struct spdk_bdev *base_bdev; 1052 uint8_t i; 1053 1054 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1055 base_bdev = raid_bdev->base_bdev_info[i].bdev; 1056 1057 if (i == 0) { 1058 raid_bdev->bdev.md_len = spdk_bdev_get_md_size(base_bdev); 1059 raid_bdev->bdev.md_interleave = spdk_bdev_is_md_interleaved(base_bdev); 1060 raid_bdev->bdev.dif_type = spdk_bdev_get_dif_type(base_bdev); 1061 raid_bdev->bdev.dif_is_head_of_md = spdk_bdev_is_dif_head_of_md(base_bdev); 1062 raid_bdev->bdev.dif_check_flags = base_bdev->dif_check_flags; 1063 continue; 1064 } 1065 1066 if (raid_bdev->bdev.md_len != spdk_bdev_get_md_size(base_bdev) || 1067 raid_bdev->bdev.md_interleave != spdk_bdev_is_md_interleaved(base_bdev) || 1068 raid_bdev->bdev.dif_type != spdk_bdev_get_dif_type(base_bdev) || 1069 raid_bdev->bdev.dif_is_head_of_md != spdk_bdev_is_dif_head_of_md(base_bdev) || 1070 raid_bdev->bdev.dif_check_flags != base_bdev->dif_check_flags) { 1071 SPDK_ERRLOG("base bdevs are configured with different metadata formats\n"); 1072 return -EPERM; 1073 } 1074 } 1075 1076 return 0; 1077 } 1078 1079 /* 1080 * brief: 1081 * If raid bdev config is complete, then only register the raid bdev to 1082 * bdev layer and remove this raid bdev from configuring list and 1083 * insert the raid bdev to configured list 1084 * params: 1085 * raid_bdev - pointer to raid bdev 1086 * returns: 1087 * 0 - success 1088 * non zero - failure 1089 */ 1090 static int 1091 raid_bdev_configure(struct raid_bdev *raid_bdev) 1092 { 1093 uint32_t blocklen = 0; 1094 struct spdk_bdev *raid_bdev_gen; 1095 struct raid_base_bdev_info *base_info; 1096 int rc = 0; 1097 1098 assert(raid_bdev->state == RAID_BDEV_STATE_CONFIGURING); 1099 assert(raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs); 1100 1101 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1102 assert(base_info->bdev != NULL); 1103 /* Check blocklen for all base bdevs that it should be same */ 1104 if (blocklen == 0) { 1105 blocklen = base_info->bdev->blocklen; 1106 } else if (blocklen != base_info->bdev->blocklen) { 1107 /* 1108 * Assumption is that all the base bdevs for any raid bdev should 1109 * have same blocklen 1110 */ 1111 SPDK_ERRLOG("Blocklen of various bdevs not matching\n"); 1112 return -EINVAL; 1113 } 1114 } 1115 assert(blocklen > 0); 1116 1117 /* The strip_size_kb is read in from user in KB. Convert to blocks here for 1118 * internal use. 1119 */ 1120 raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / blocklen; 1121 if (raid_bdev->strip_size == 0 && raid_bdev->level != RAID1) { 1122 SPDK_ERRLOG("Strip size cannot be smaller than the device block size\n"); 1123 return -EINVAL; 1124 } 1125 raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size); 1126 raid_bdev->blocklen_shift = spdk_u32log2(blocklen); 1127 1128 raid_bdev_gen = &raid_bdev->bdev; 1129 raid_bdev_gen->blocklen = blocklen; 1130 1131 rc = raid_bdev_configure_md(raid_bdev); 1132 if (rc != 0) { 1133 SPDK_ERRLOG("raid metadata configuration failed\n"); 1134 return rc; 1135 } 1136 1137 rc = raid_bdev->module->start(raid_bdev); 1138 if (rc != 0) { 1139 SPDK_ERRLOG("raid module startup callback failed\n"); 1140 return rc; 1141 } 1142 raid_bdev->state = RAID_BDEV_STATE_ONLINE; 1143 SPDK_DEBUGLOG(bdev_raid, "io device register %p\n", raid_bdev); 1144 SPDK_DEBUGLOG(bdev_raid, "blockcnt %" PRIu64 ", blocklen %u\n", 1145 raid_bdev_gen->blockcnt, raid_bdev_gen->blocklen); 1146 spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb, 1147 sizeof(struct raid_bdev_io_channel), 1148 raid_bdev->bdev.name); 1149 rc = spdk_bdev_register(raid_bdev_gen); 1150 if (rc != 0) { 1151 SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n"); 1152 if (raid_bdev->module->stop != NULL) { 1153 raid_bdev->module->stop(raid_bdev); 1154 } 1155 spdk_io_device_unregister(raid_bdev, NULL); 1156 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1157 return rc; 1158 } 1159 SPDK_DEBUGLOG(bdev_raid, "raid bdev generic %p\n", raid_bdev_gen); 1160 SPDK_DEBUGLOG(bdev_raid, "raid bdev is created with name %s, raid_bdev %p\n", 1161 raid_bdev_gen->name, raid_bdev); 1162 1163 return 0; 1164 } 1165 1166 /* 1167 * brief: 1168 * If raid bdev is online and registered, change the bdev state to 1169 * configuring and unregister this raid device. Queue this raid device 1170 * in configuring list 1171 * params: 1172 * raid_bdev - pointer to raid bdev 1173 * cb_fn - callback function 1174 * cb_arg - argument to callback function 1175 * returns: 1176 * none 1177 */ 1178 static void 1179 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, 1180 void *cb_arg) 1181 { 1182 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1183 if (cb_fn) { 1184 cb_fn(cb_arg, 0); 1185 } 1186 return; 1187 } 1188 1189 assert(raid_bdev->num_base_bdevs == raid_bdev->num_base_bdevs_discovered); 1190 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 1191 assert(raid_bdev->num_base_bdevs_discovered); 1192 SPDK_DEBUGLOG(bdev_raid, "raid bdev state changing from online to offline\n"); 1193 1194 spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg); 1195 } 1196 1197 /* 1198 * brief: 1199 * raid_bdev_find_by_base_bdev function finds the raid bdev which has 1200 * claimed the base bdev. 1201 * params: 1202 * base_bdev - pointer to base bdev pointer 1203 * _raid_bdev - Reference to pointer to raid bdev 1204 * _base_info - Reference to the raid base bdev info. 1205 * returns: 1206 * true - if the raid bdev is found. 1207 * false - if the raid bdev is not found. 1208 */ 1209 static bool 1210 raid_bdev_find_by_base_bdev(struct spdk_bdev *base_bdev, struct raid_bdev **_raid_bdev, 1211 struct raid_base_bdev_info **_base_info) 1212 { 1213 struct raid_bdev *raid_bdev; 1214 struct raid_base_bdev_info *base_info; 1215 1216 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1217 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1218 if (base_info->bdev == base_bdev) { 1219 *_raid_bdev = raid_bdev; 1220 *_base_info = base_info; 1221 return true; 1222 } 1223 } 1224 } 1225 1226 return false; 1227 } 1228 1229 /* 1230 * brief: 1231 * raid_bdev_remove_base_bdev function is called by below layers when base_bdev 1232 * is removed. This function checks if this base bdev is part of any raid bdev 1233 * or not. If yes, it takes necessary action on that particular raid bdev. 1234 * params: 1235 * base_bdev - pointer to base bdev which got removed 1236 * returns: 1237 * none 1238 */ 1239 static void 1240 raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev) 1241 { 1242 struct raid_bdev *raid_bdev = NULL; 1243 struct raid_base_bdev_info *base_info; 1244 1245 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_remove_base_bdev\n"); 1246 1247 /* Find the raid_bdev which has claimed this base_bdev */ 1248 if (!raid_bdev_find_by_base_bdev(base_bdev, &raid_bdev, &base_info)) { 1249 SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); 1250 return; 1251 } 1252 1253 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1254 1255 assert(base_info->desc); 1256 base_info->remove_scheduled = true; 1257 1258 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1259 /* 1260 * As raid bdev is not registered yet or already unregistered, 1261 * so cleanup should be done here itself. 1262 */ 1263 raid_bdev_free_base_bdev_resource(raid_bdev, base_info); 1264 if (raid_bdev->num_base_bdevs_discovered == 0) { 1265 /* There is no base bdev for this raid, so free the raid device. */ 1266 raid_bdev_cleanup_and_free(raid_bdev); 1267 return; 1268 } 1269 } 1270 1271 raid_bdev_deconfigure(raid_bdev, NULL, NULL); 1272 } 1273 1274 /* 1275 * brief: 1276 * raid_bdev_resize_base_bdev function is called by below layers when base_bdev 1277 * is resized. This function checks if the smallest size of the base_bdevs is changed. 1278 * If yes, call module handler to resize the raid_bdev if implemented. 1279 * params: 1280 * base_bdev - pointer to base bdev which got resized. 1281 * returns: 1282 * none 1283 */ 1284 static void 1285 raid_bdev_resize_base_bdev(struct spdk_bdev *base_bdev) 1286 { 1287 struct raid_bdev *raid_bdev = NULL; 1288 struct raid_base_bdev_info *base_info; 1289 1290 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_resize_base_bdev\n"); 1291 1292 /* Find the raid_bdev which has claimed this base_bdev */ 1293 if (!raid_bdev_find_by_base_bdev(base_bdev, &raid_bdev, &base_info)) { 1294 SPDK_ERRLOG("raid_bdev whose base_bdev '%s' not found\n", base_bdev->name); 1295 return; 1296 } 1297 1298 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1299 1300 SPDK_NOTICELOG("base_bdev '%s' was resized: old size %" PRIu64 ", new size %" PRIu64 "\n", 1301 base_bdev->name, base_info->blockcnt, base_bdev->blockcnt); 1302 1303 if (raid_bdev->module->resize) { 1304 raid_bdev->module->resize(raid_bdev); 1305 } 1306 } 1307 1308 /* 1309 * brief: 1310 * raid_bdev_event_base_bdev function is called by below layers when base_bdev 1311 * triggers asynchronous event. 1312 * params: 1313 * type - event details. 1314 * bdev - bdev that triggered event. 1315 * event_ctx - context for event. 1316 * returns: 1317 * none 1318 */ 1319 static void 1320 raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 1321 void *event_ctx) 1322 { 1323 switch (type) { 1324 case SPDK_BDEV_EVENT_REMOVE: 1325 raid_bdev_remove_base_bdev(bdev); 1326 break; 1327 case SPDK_BDEV_EVENT_RESIZE: 1328 raid_bdev_resize_base_bdev(bdev); 1329 break; 1330 default: 1331 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 1332 break; 1333 } 1334 } 1335 1336 /* 1337 * brief: 1338 * Deletes the specified raid bdev 1339 * params: 1340 * raid_bdev - pointer to raid bdev 1341 * cb_fn - callback function 1342 * cb_arg - argument to callback function 1343 */ 1344 void 1345 raid_bdev_delete(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, void *cb_arg) 1346 { 1347 struct raid_base_bdev_info *base_info; 1348 1349 SPDK_DEBUGLOG(bdev_raid, "delete raid bdev: %s\n", raid_bdev->bdev.name); 1350 1351 if (raid_bdev->destroy_started) { 1352 SPDK_DEBUGLOG(bdev_raid, "destroying raid bdev %s is already started\n", 1353 raid_bdev->bdev.name); 1354 if (cb_fn) { 1355 cb_fn(cb_arg, -EALREADY); 1356 } 1357 return; 1358 } 1359 1360 raid_bdev->destroy_started = true; 1361 1362 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1363 base_info->remove_scheduled = true; 1364 1365 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1366 /* 1367 * As raid bdev is not registered yet or already unregistered, 1368 * so cleanup should be done here itself. 1369 */ 1370 raid_bdev_free_base_bdev_resource(raid_bdev, base_info); 1371 } 1372 } 1373 1374 if (raid_bdev->num_base_bdevs_discovered == 0) { 1375 /* There is no base bdev for this raid, so free the raid device. */ 1376 raid_bdev_cleanup_and_free(raid_bdev); 1377 if (cb_fn) { 1378 cb_fn(cb_arg, 0); 1379 } 1380 } else { 1381 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg); 1382 } 1383 } 1384 1385 static int 1386 raid_bdev_configure_base_bdev(struct raid_bdev *raid_bdev, struct raid_base_bdev_info *base_info) 1387 { 1388 struct spdk_bdev_desc *desc; 1389 struct spdk_bdev *bdev; 1390 int rc; 1391 1392 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1393 assert(base_info->name != NULL); 1394 assert(base_info->bdev == NULL); 1395 1396 rc = spdk_bdev_open_ext(base_info->name, true, raid_bdev_event_base_bdev, NULL, &desc); 1397 if (rc != 0) { 1398 if (rc != -ENODEV) { 1399 SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", base_info->name); 1400 } 1401 return rc; 1402 } 1403 1404 bdev = spdk_bdev_desc_get_bdev(desc); 1405 1406 rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if); 1407 if (rc != 0) { 1408 SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); 1409 spdk_bdev_close(desc); 1410 return rc; 1411 } 1412 1413 SPDK_DEBUGLOG(bdev_raid, "bdev %s is claimed\n", bdev->name); 1414 1415 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 1416 1417 base_info->bdev = bdev; 1418 base_info->desc = desc; 1419 base_info->blockcnt = bdev->blockcnt; 1420 raid_bdev->num_base_bdevs_discovered++; 1421 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1422 1423 if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) { 1424 rc = raid_bdev_configure(raid_bdev); 1425 if (rc != 0) { 1426 SPDK_ERRLOG("Failed to configure raid bdev\n"); 1427 return rc; 1428 } 1429 } 1430 1431 return 0; 1432 } 1433 1434 /* 1435 * brief: 1436 * raid_bdev_add_base_device function is the actual function which either adds 1437 * the nvme base device to existing raid bdev or create a new raid bdev. It also claims 1438 * the base device and keep the open descriptor. 1439 * params: 1440 * raid_bdev - pointer to raid bdev 1441 * name - name of the base bdev 1442 * slot - position to add base bdev 1443 * returns: 1444 * 0 - success 1445 * non zero - failure 1446 */ 1447 int 1448 raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name, uint8_t slot) 1449 { 1450 struct raid_base_bdev_info *base_info; 1451 int rc; 1452 1453 if (slot >= raid_bdev->num_base_bdevs) { 1454 return -EINVAL; 1455 } 1456 1457 base_info = &raid_bdev->base_bdev_info[slot]; 1458 1459 if (base_info->name != NULL) { 1460 SPDK_ERRLOG("Slot %u on raid bdev '%s' already assigned to bdev '%s'\n", 1461 slot, raid_bdev->bdev.name, base_info->name); 1462 return -EBUSY; 1463 } 1464 1465 base_info->name = strdup(name); 1466 if (base_info->name == NULL) { 1467 return -ENOMEM; 1468 } 1469 1470 rc = raid_bdev_configure_base_bdev(raid_bdev, base_info); 1471 if (rc != 0) { 1472 if (rc != -ENODEV) { 1473 SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", name); 1474 } 1475 return rc; 1476 } 1477 1478 return 0; 1479 } 1480 1481 /* 1482 * brief: 1483 * raid_bdev_examine function is the examine function call by the below layers 1484 * like bdev_nvme layer. This function will check if this base bdev can be 1485 * claimed by this raid bdev or not. 1486 * params: 1487 * bdev - pointer to base bdev 1488 * returns: 1489 * none 1490 */ 1491 static void 1492 raid_bdev_examine(struct spdk_bdev *bdev) 1493 { 1494 struct raid_bdev *raid_bdev; 1495 struct raid_base_bdev_info *base_info; 1496 1497 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1498 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1499 if (base_info->bdev == NULL && strcmp(bdev->name, base_info->name) == 0) { 1500 raid_bdev_configure_base_bdev(raid_bdev, base_info); 1501 break; 1502 } 1503 } 1504 } 1505 1506 spdk_bdev_module_examine_done(&g_raid_if); 1507 } 1508 1509 /* Log component for bdev raid bdev module */ 1510 SPDK_LOG_REGISTER_COMPONENT(bdev_raid) 1511