1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * * Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * * Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * * Neither the name of Intel Corporation nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #include "bdev_raid.h" 36 #include "spdk/env.h" 37 #include "spdk/thread.h" 38 #include "spdk/log.h" 39 #include "spdk/string.h" 40 #include "spdk/util.h" 41 #include "spdk/json.h" 42 #include "spdk/string.h" 43 44 static bool g_shutdown_started = false; 45 46 /* raid bdev config as read from config file */ 47 struct raid_config g_raid_config = { 48 .raid_bdev_config_head = TAILQ_HEAD_INITIALIZER(g_raid_config.raid_bdev_config_head), 49 }; 50 51 /* 52 * List of raid bdev in configured list, these raid bdevs are registered with 53 * bdev layer 54 */ 55 struct raid_configured_tailq g_raid_bdev_configured_list = TAILQ_HEAD_INITIALIZER( 56 g_raid_bdev_configured_list); 57 58 /* List of raid bdev in configuring list */ 59 struct raid_configuring_tailq g_raid_bdev_configuring_list = TAILQ_HEAD_INITIALIZER( 60 g_raid_bdev_configuring_list); 61 62 /* List of all raid bdevs */ 63 struct raid_all_tailq g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list); 64 65 /* List of all raid bdevs that are offline */ 66 struct raid_offline_tailq g_raid_bdev_offline_list = TAILQ_HEAD_INITIALIZER( 67 g_raid_bdev_offline_list); 68 69 static TAILQ_HEAD(, raid_bdev_module) g_raid_modules = TAILQ_HEAD_INITIALIZER(g_raid_modules); 70 71 static struct raid_bdev_module *raid_bdev_module_find(enum raid_level level) 72 { 73 struct raid_bdev_module *raid_module; 74 75 TAILQ_FOREACH(raid_module, &g_raid_modules, link) { 76 if (raid_module->level == level) { 77 return raid_module; 78 } 79 } 80 81 return NULL; 82 } 83 84 void raid_bdev_module_list_add(struct raid_bdev_module *raid_module) 85 { 86 if (raid_bdev_module_find(raid_module->level) != NULL) { 87 SPDK_ERRLOG("module for raid level '%s' already registered.\n", 88 raid_bdev_level_to_str(raid_module->level)); 89 assert(false); 90 } else { 91 TAILQ_INSERT_TAIL(&g_raid_modules, raid_module, link); 92 } 93 } 94 95 /* Function declarations */ 96 static void raid_bdev_examine(struct spdk_bdev *bdev); 97 static int raid_bdev_init(void); 98 static void raid_bdev_deconfigure(struct raid_bdev *raid_bdev, 99 raid_bdev_destruct_cb cb_fn, void *cb_arg); 100 static void raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 101 void *event_ctx); 102 103 /* 104 * brief: 105 * raid_bdev_create_cb function is a cb function for raid bdev which creates the 106 * hierarchy from raid bdev to base bdev io channels. It will be called per core 107 * params: 108 * io_device - pointer to raid bdev io device represented by raid_bdev 109 * ctx_buf - pointer to context buffer for raid bdev io channel 110 * returns: 111 * 0 - success 112 * non zero - failure 113 */ 114 static int 115 raid_bdev_create_cb(void *io_device, void *ctx_buf) 116 { 117 struct raid_bdev *raid_bdev = io_device; 118 struct raid_bdev_io_channel *raid_ch = ctx_buf; 119 uint8_t i; 120 121 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_create_cb, %p\n", raid_ch); 122 123 assert(raid_bdev != NULL); 124 assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); 125 126 raid_ch->num_channels = raid_bdev->num_base_bdevs; 127 128 raid_ch->base_channel = calloc(raid_ch->num_channels, 129 sizeof(struct spdk_io_channel *)); 130 if (!raid_ch->base_channel) { 131 SPDK_ERRLOG("Unable to allocate base bdevs io channel\n"); 132 return -ENOMEM; 133 } 134 for (i = 0; i < raid_ch->num_channels; i++) { 135 /* 136 * Get the spdk_io_channel for all the base bdevs. This is used during 137 * split logic to send the respective child bdev ios to respective base 138 * bdev io channel. 139 */ 140 raid_ch->base_channel[i] = spdk_bdev_get_io_channel( 141 raid_bdev->base_bdev_info[i].desc); 142 if (!raid_ch->base_channel[i]) { 143 uint8_t j; 144 145 for (j = 0; j < i; j++) { 146 spdk_put_io_channel(raid_ch->base_channel[j]); 147 } 148 free(raid_ch->base_channel); 149 raid_ch->base_channel = NULL; 150 SPDK_ERRLOG("Unable to create io channel for base bdev\n"); 151 return -ENOMEM; 152 } 153 } 154 155 return 0; 156 } 157 158 /* 159 * brief: 160 * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the 161 * hierarchy from raid bdev to base bdev io channels. It will be called per core 162 * params: 163 * io_device - pointer to raid bdev io device represented by raid_bdev 164 * ctx_buf - pointer to context buffer for raid bdev io channel 165 * returns: 166 * none 167 */ 168 static void 169 raid_bdev_destroy_cb(void *io_device, void *ctx_buf) 170 { 171 struct raid_bdev_io_channel *raid_ch = ctx_buf; 172 uint8_t i; 173 174 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destroy_cb\n"); 175 176 assert(raid_ch != NULL); 177 assert(raid_ch->base_channel); 178 for (i = 0; i < raid_ch->num_channels; i++) { 179 /* Free base bdev channels */ 180 assert(raid_ch->base_channel[i] != NULL); 181 spdk_put_io_channel(raid_ch->base_channel[i]); 182 } 183 free(raid_ch->base_channel); 184 raid_ch->base_channel = NULL; 185 } 186 187 /* 188 * brief: 189 * raid_bdev_cleanup is used to cleanup and free raid_bdev related data 190 * structures. 191 * params: 192 * raid_bdev - pointer to raid_bdev 193 * returns: 194 * none 195 */ 196 static void 197 raid_bdev_cleanup(struct raid_bdev *raid_bdev) 198 { 199 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_cleanup, %p name %s, state %u, config %p\n", 200 raid_bdev, 201 raid_bdev->bdev.name, raid_bdev->state, raid_bdev->config); 202 if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 203 TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link); 204 } else if (raid_bdev->state == RAID_BDEV_STATE_OFFLINE) { 205 TAILQ_REMOVE(&g_raid_bdev_offline_list, raid_bdev, state_link); 206 } else { 207 assert(0); 208 } 209 TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link); 210 free(raid_bdev->bdev.name); 211 free(raid_bdev->base_bdev_info); 212 if (raid_bdev->config) { 213 raid_bdev->config->raid_bdev = NULL; 214 } 215 free(raid_bdev); 216 } 217 218 /* 219 * brief: 220 * wrapper for the bdev close operation 221 * params: 222 * base_info - raid base bdev info 223 * returns: 224 */ 225 static void 226 _raid_bdev_free_base_bdev_resource(void *ctx) 227 { 228 struct spdk_bdev_desc *desc = ctx; 229 230 spdk_bdev_close(desc); 231 } 232 233 234 /* 235 * brief: 236 * free resource of base bdev for raid bdev 237 * params: 238 * raid_bdev - pointer to raid bdev 239 * base_info - raid base bdev info 240 * returns: 241 * 0 - success 242 * non zero - failure 243 */ 244 static void 245 raid_bdev_free_base_bdev_resource(struct raid_bdev *raid_bdev, 246 struct raid_base_bdev_info *base_info) 247 { 248 spdk_bdev_module_release_bdev(base_info->bdev); 249 if (base_info->thread && base_info->thread != spdk_get_thread()) { 250 spdk_thread_send_msg(base_info->thread, _raid_bdev_free_base_bdev_resource, base_info->desc); 251 } else { 252 spdk_bdev_close(base_info->desc); 253 } 254 base_info->desc = NULL; 255 base_info->bdev = NULL; 256 257 assert(raid_bdev->num_base_bdevs_discovered); 258 raid_bdev->num_base_bdevs_discovered--; 259 } 260 261 /* 262 * brief: 263 * raid_bdev_destruct is the destruct function table pointer for raid bdev 264 * params: 265 * ctxt - pointer to raid_bdev 266 * returns: 267 * 0 - success 268 * non zero - failure 269 */ 270 static int 271 raid_bdev_destruct(void *ctxt) 272 { 273 struct raid_bdev *raid_bdev = ctxt; 274 struct raid_base_bdev_info *base_info; 275 276 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destruct\n"); 277 278 raid_bdev->destruct_called = true; 279 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 280 /* 281 * Close all base bdev descriptors for which call has come from below 282 * layers. Also close the descriptors if we have started shutdown. 283 */ 284 if (g_shutdown_started || 285 ((base_info->remove_scheduled == true) && 286 (base_info->bdev != NULL))) { 287 raid_bdev_free_base_bdev_resource(raid_bdev, base_info); 288 } 289 } 290 291 if (g_shutdown_started) { 292 TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link); 293 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 294 TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link); 295 } 296 297 if (raid_bdev->module->stop != NULL) { 298 raid_bdev->module->stop(raid_bdev); 299 } 300 301 spdk_io_device_unregister(raid_bdev, NULL); 302 303 if (raid_bdev->num_base_bdevs_discovered == 0) { 304 /* Free raid_bdev when there are no base bdevs left */ 305 SPDK_DEBUGLOG(bdev_raid, "raid bdev base bdevs is 0, going to free all in destruct\n"); 306 raid_bdev_cleanup(raid_bdev); 307 } 308 309 return 0; 310 } 311 312 void 313 raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status) 314 { 315 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 316 317 spdk_bdev_io_complete(bdev_io, status); 318 } 319 320 /* 321 * brief: 322 * raid_bdev_io_complete_part - signal the completion of a part of the expected 323 * base bdev IOs and complete the raid_io if this is the final expected IO. 324 * The caller should first set raid_io->base_bdev_io_remaining. This function 325 * will decrement this counter by the value of the 'completed' parameter and 326 * complete the raid_io if the counter reaches 0. The caller is free to 327 * interpret the 'base_bdev_io_remaining' and 'completed' values as needed, 328 * it can represent e.g. blocks or IOs. 329 * params: 330 * raid_io - pointer to raid_bdev_io 331 * completed - the part of the raid_io that has been completed 332 * status - status of the base IO 333 * returns: 334 * true - if the raid_io is completed 335 * false - otherwise 336 */ 337 bool 338 raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed, 339 enum spdk_bdev_io_status status) 340 { 341 assert(raid_io->base_bdev_io_remaining >= completed); 342 raid_io->base_bdev_io_remaining -= completed; 343 344 if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { 345 raid_io->base_bdev_io_status = status; 346 } 347 348 if (raid_io->base_bdev_io_remaining == 0) { 349 raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); 350 return true; 351 } else { 352 return false; 353 } 354 } 355 356 /* 357 * brief: 358 * raid_bdev_queue_io_wait function processes the IO which failed to submit. 359 * It will try to queue the IOs after storing the context to bdev wait queue logic. 360 * params: 361 * raid_io - pointer to raid_bdev_io 362 * bdev - the block device that the IO is submitted to 363 * ch - io channel 364 * cb_fn - callback when the spdk_bdev_io for bdev becomes available 365 * returns: 366 * none 367 */ 368 void 369 raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, 370 struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn) 371 { 372 raid_io->waitq_entry.bdev = bdev; 373 raid_io->waitq_entry.cb_fn = cb_fn; 374 raid_io->waitq_entry.cb_arg = raid_io; 375 spdk_bdev_queue_io_wait(bdev, ch, &raid_io->waitq_entry); 376 } 377 378 static void 379 raid_base_bdev_reset_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 380 { 381 struct raid_bdev_io *raid_io = cb_arg; 382 383 spdk_bdev_free_io(bdev_io); 384 385 raid_bdev_io_complete_part(raid_io, 1, success ? 386 SPDK_BDEV_IO_STATUS_SUCCESS : 387 SPDK_BDEV_IO_STATUS_FAILED); 388 } 389 390 static void 391 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io); 392 393 static void 394 _raid_bdev_submit_reset_request(void *_raid_io) 395 { 396 struct raid_bdev_io *raid_io = _raid_io; 397 398 raid_bdev_submit_reset_request(raid_io); 399 } 400 401 /* 402 * brief: 403 * raid_bdev_submit_reset_request function submits reset requests 404 * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in 405 * which case it will queue it for later submission 406 * params: 407 * raid_io 408 * returns: 409 * none 410 */ 411 static void 412 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io) 413 { 414 struct raid_bdev *raid_bdev; 415 int ret; 416 uint8_t i; 417 struct raid_base_bdev_info *base_info; 418 struct spdk_io_channel *base_ch; 419 420 raid_bdev = raid_io->raid_bdev; 421 422 if (raid_io->base_bdev_io_remaining == 0) { 423 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; 424 } 425 426 while (raid_io->base_bdev_io_submitted < raid_bdev->num_base_bdevs) { 427 i = raid_io->base_bdev_io_submitted; 428 base_info = &raid_bdev->base_bdev_info[i]; 429 base_ch = raid_io->raid_ch->base_channel[i]; 430 ret = spdk_bdev_reset(base_info->desc, base_ch, 431 raid_base_bdev_reset_complete, raid_io); 432 if (ret == 0) { 433 raid_io->base_bdev_io_submitted++; 434 } else if (ret == -ENOMEM) { 435 raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, 436 _raid_bdev_submit_reset_request); 437 return; 438 } else { 439 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 440 assert(false); 441 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 442 return; 443 } 444 } 445 } 446 447 /* 448 * brief: 449 * Callback function to spdk_bdev_io_get_buf. 450 * params: 451 * ch - pointer to raid bdev io channel 452 * bdev_io - pointer to parent bdev_io on raid bdev device 453 * success - True if buffer is allocated or false otherwise. 454 * returns: 455 * none 456 */ 457 static void 458 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 459 bool success) 460 { 461 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 462 463 if (!success) { 464 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 465 return; 466 } 467 468 raid_io->raid_bdev->module->submit_rw_request(raid_io); 469 } 470 471 /* 472 * brief: 473 * raid_bdev_submit_request function is the submit_request function pointer of 474 * raid bdev function table. This is used to submit the io on raid_bdev to below 475 * layers. 476 * params: 477 * ch - pointer to raid bdev io channel 478 * bdev_io - pointer to parent bdev_io on raid bdev device 479 * returns: 480 * none 481 */ 482 static void 483 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 484 { 485 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 486 487 raid_io->raid_bdev = bdev_io->bdev->ctxt; 488 raid_io->raid_ch = spdk_io_channel_get_ctx(ch); 489 raid_io->base_bdev_io_remaining = 0; 490 raid_io->base_bdev_io_submitted = 0; 491 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 492 493 switch (bdev_io->type) { 494 case SPDK_BDEV_IO_TYPE_READ: 495 spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb, 496 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 497 break; 498 case SPDK_BDEV_IO_TYPE_WRITE: 499 raid_io->raid_bdev->module->submit_rw_request(raid_io); 500 break; 501 502 case SPDK_BDEV_IO_TYPE_RESET: 503 raid_bdev_submit_reset_request(raid_io); 504 break; 505 506 case SPDK_BDEV_IO_TYPE_FLUSH: 507 case SPDK_BDEV_IO_TYPE_UNMAP: 508 raid_io->raid_bdev->module->submit_null_payload_request(raid_io); 509 break; 510 511 default: 512 SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type); 513 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 514 break; 515 } 516 } 517 518 /* 519 * brief: 520 * _raid_bdev_io_type_supported checks whether io_type is supported in 521 * all base bdev modules of raid bdev module. If anyone among the base_bdevs 522 * doesn't support, the raid device doesn't supports. 523 * 524 * params: 525 * raid_bdev - pointer to raid bdev context 526 * io_type - io type 527 * returns: 528 * true - io_type is supported 529 * false - io_type is not supported 530 */ 531 inline static bool 532 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type) 533 { 534 struct raid_base_bdev_info *base_info; 535 536 if (io_type == SPDK_BDEV_IO_TYPE_FLUSH || 537 io_type == SPDK_BDEV_IO_TYPE_UNMAP) { 538 if (raid_bdev->module->submit_null_payload_request == NULL) { 539 return false; 540 } 541 } 542 543 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 544 if (base_info->bdev == NULL) { 545 assert(false); 546 continue; 547 } 548 549 if (spdk_bdev_io_type_supported(base_info->bdev, io_type) == false) { 550 return false; 551 } 552 } 553 554 return true; 555 } 556 557 /* 558 * brief: 559 * raid_bdev_io_type_supported is the io_supported function for bdev function 560 * table which returns whether the particular io type is supported or not by 561 * raid bdev module 562 * params: 563 * ctx - pointer to raid bdev context 564 * type - io type 565 * returns: 566 * true - io_type is supported 567 * false - io_type is not supported 568 */ 569 static bool 570 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 571 { 572 switch (io_type) { 573 case SPDK_BDEV_IO_TYPE_READ: 574 case SPDK_BDEV_IO_TYPE_WRITE: 575 return true; 576 577 case SPDK_BDEV_IO_TYPE_FLUSH: 578 case SPDK_BDEV_IO_TYPE_RESET: 579 case SPDK_BDEV_IO_TYPE_UNMAP: 580 return _raid_bdev_io_type_supported(ctx, io_type); 581 582 default: 583 return false; 584 } 585 586 return false; 587 } 588 589 /* 590 * brief: 591 * raid_bdev_get_io_channel is the get_io_channel function table pointer for 592 * raid bdev. This is used to return the io channel for this raid bdev 593 * params: 594 * ctxt - pointer to raid_bdev 595 * returns: 596 * pointer to io channel for raid bdev 597 */ 598 static struct spdk_io_channel * 599 raid_bdev_get_io_channel(void *ctxt) 600 { 601 struct raid_bdev *raid_bdev = ctxt; 602 603 return spdk_get_io_channel(raid_bdev); 604 } 605 606 /* 607 * brief: 608 * raid_bdev_dump_info_json is the function table pointer for raid bdev 609 * params: 610 * ctx - pointer to raid_bdev 611 * w - pointer to json context 612 * returns: 613 * 0 - success 614 * non zero - failure 615 */ 616 static int 617 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 618 { 619 struct raid_bdev *raid_bdev = ctx; 620 struct raid_base_bdev_info *base_info; 621 622 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_dump_config_json\n"); 623 assert(raid_bdev != NULL); 624 625 /* Dump the raid bdev configuration related information */ 626 spdk_json_write_named_object_begin(w, "raid"); 627 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 628 spdk_json_write_named_uint32(w, "state", raid_bdev->state); 629 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 630 spdk_json_write_named_uint32(w, "destruct_called", raid_bdev->destruct_called); 631 spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs); 632 spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered); 633 spdk_json_write_name(w, "base_bdevs_list"); 634 spdk_json_write_array_begin(w); 635 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 636 if (base_info->bdev) { 637 spdk_json_write_string(w, base_info->bdev->name); 638 } else { 639 spdk_json_write_null(w); 640 } 641 } 642 spdk_json_write_array_end(w); 643 spdk_json_write_object_end(w); 644 645 return 0; 646 } 647 648 /* 649 * brief: 650 * raid_bdev_write_config_json is the function table pointer for raid bdev 651 * params: 652 * bdev - pointer to spdk_bdev 653 * w - pointer to json context 654 * returns: 655 * none 656 */ 657 static void 658 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 659 { 660 struct raid_bdev *raid_bdev = bdev->ctxt; 661 struct raid_base_bdev_info *base_info; 662 663 spdk_json_write_object_begin(w); 664 665 spdk_json_write_named_string(w, "method", "bdev_raid_create"); 666 667 spdk_json_write_named_object_begin(w, "params"); 668 spdk_json_write_named_string(w, "name", bdev->name); 669 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 670 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 671 672 spdk_json_write_named_array_begin(w, "base_bdevs"); 673 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 674 if (base_info->bdev) { 675 spdk_json_write_string(w, base_info->bdev->name); 676 } 677 } 678 spdk_json_write_array_end(w); 679 spdk_json_write_object_end(w); 680 681 spdk_json_write_object_end(w); 682 } 683 684 static int 685 raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size) 686 { 687 struct raid_bdev *raid_bdev = ctx; 688 struct spdk_bdev *base_bdev; 689 uint32_t i; 690 int domains_count = 0, rc; 691 692 /* First loop to get the number of memory domains */ 693 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 694 base_bdev = raid_bdev->base_bdev_info[i].bdev; 695 rc = spdk_bdev_get_memory_domains(base_bdev, NULL, 0); 696 if (rc < 0) { 697 return rc; 698 } 699 domains_count += rc; 700 } 701 702 if (!domains || array_size < domains_count) { 703 return domains_count; 704 } 705 706 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 707 base_bdev = raid_bdev->base_bdev_info[i].bdev; 708 rc = spdk_bdev_get_memory_domains(base_bdev, domains, array_size); 709 if (rc < 0) { 710 return rc; 711 } 712 domains += rc; 713 array_size -= rc; 714 } 715 716 return domains_count; 717 } 718 719 /* g_raid_bdev_fn_table is the function table for raid bdev */ 720 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = { 721 .destruct = raid_bdev_destruct, 722 .submit_request = raid_bdev_submit_request, 723 .io_type_supported = raid_bdev_io_type_supported, 724 .get_io_channel = raid_bdev_get_io_channel, 725 .dump_info_json = raid_bdev_dump_info_json, 726 .write_config_json = raid_bdev_write_config_json, 727 .get_memory_domains = raid_bdev_get_memory_domains, 728 }; 729 730 /* 731 * brief: 732 * raid_bdev_config_cleanup function is used to free memory for one raid_bdev in configuration 733 * params: 734 * raid_cfg - pointer to raid_bdev_config structure 735 * returns: 736 * none 737 */ 738 void 739 raid_bdev_config_cleanup(struct raid_bdev_config *raid_cfg) 740 { 741 uint8_t i; 742 743 TAILQ_REMOVE(&g_raid_config.raid_bdev_config_head, raid_cfg, link); 744 g_raid_config.total_raid_bdev--; 745 746 if (raid_cfg->base_bdev) { 747 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 748 free(raid_cfg->base_bdev[i].name); 749 } 750 free(raid_cfg->base_bdev); 751 } 752 free(raid_cfg->name); 753 free(raid_cfg); 754 } 755 756 /* 757 * brief: 758 * raid_bdev_free is the raid bdev function table function pointer. This is 759 * called on bdev free path 760 * params: 761 * none 762 * returns: 763 * none 764 */ 765 static void 766 raid_bdev_free(void) 767 { 768 struct raid_bdev_config *raid_cfg, *tmp; 769 770 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_free\n"); 771 TAILQ_FOREACH_SAFE(raid_cfg, &g_raid_config.raid_bdev_config_head, link, tmp) { 772 raid_bdev_config_cleanup(raid_cfg); 773 } 774 } 775 776 /* brief 777 * raid_bdev_config_find_by_name is a helper function to find raid bdev config 778 * by name as key. 779 * 780 * params: 781 * raid_name - name for raid bdev. 782 */ 783 struct raid_bdev_config * 784 raid_bdev_config_find_by_name(const char *raid_name) 785 { 786 struct raid_bdev_config *raid_cfg; 787 788 TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) { 789 if (!strcmp(raid_cfg->name, raid_name)) { 790 return raid_cfg; 791 } 792 } 793 794 return raid_cfg; 795 } 796 797 /* 798 * brief 799 * raid_bdev_config_add function adds config for newly created raid bdev. 800 * 801 * params: 802 * raid_name - name for raid bdev. 803 * strip_size - strip size in KB 804 * num_base_bdevs - number of base bdevs. 805 * level - raid level. 806 * _raid_cfg - Pointer to newly added configuration 807 */ 808 int 809 raid_bdev_config_add(const char *raid_name, uint32_t strip_size, uint8_t num_base_bdevs, 810 enum raid_level level, struct raid_bdev_config **_raid_cfg) 811 { 812 struct raid_bdev_config *raid_cfg; 813 814 raid_cfg = raid_bdev_config_find_by_name(raid_name); 815 if (raid_cfg != NULL) { 816 SPDK_ERRLOG("Duplicate raid bdev name found in config file %s\n", 817 raid_name); 818 return -EEXIST; 819 } 820 821 if (spdk_u32_is_pow2(strip_size) == false) { 822 SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size); 823 return -EINVAL; 824 } 825 826 if (num_base_bdevs == 0) { 827 SPDK_ERRLOG("Invalid base device count %u\n", num_base_bdevs); 828 return -EINVAL; 829 } 830 831 raid_cfg = calloc(1, sizeof(*raid_cfg)); 832 if (raid_cfg == NULL) { 833 SPDK_ERRLOG("unable to allocate memory\n"); 834 return -ENOMEM; 835 } 836 837 raid_cfg->name = strdup(raid_name); 838 if (!raid_cfg->name) { 839 free(raid_cfg); 840 SPDK_ERRLOG("unable to allocate memory\n"); 841 return -ENOMEM; 842 } 843 raid_cfg->strip_size = strip_size; 844 raid_cfg->num_base_bdevs = num_base_bdevs; 845 raid_cfg->level = level; 846 847 raid_cfg->base_bdev = calloc(num_base_bdevs, sizeof(*raid_cfg->base_bdev)); 848 if (raid_cfg->base_bdev == NULL) { 849 free(raid_cfg->name); 850 free(raid_cfg); 851 SPDK_ERRLOG("unable to allocate memory\n"); 852 return -ENOMEM; 853 } 854 855 TAILQ_INSERT_TAIL(&g_raid_config.raid_bdev_config_head, raid_cfg, link); 856 g_raid_config.total_raid_bdev++; 857 858 *_raid_cfg = raid_cfg; 859 return 0; 860 } 861 862 /* 863 * brief: 864 * raid_bdev_config_add_base_bdev function add base bdev to raid bdev config. 865 * 866 * params: 867 * raid_cfg - pointer to raid bdev configuration 868 * base_bdev_name - name of base bdev 869 * slot - Position to add base bdev 870 */ 871 int 872 raid_bdev_config_add_base_bdev(struct raid_bdev_config *raid_cfg, const char *base_bdev_name, 873 uint8_t slot) 874 { 875 uint8_t i; 876 struct raid_bdev_config *tmp; 877 878 if (slot >= raid_cfg->num_base_bdevs) { 879 return -EINVAL; 880 } 881 882 TAILQ_FOREACH(tmp, &g_raid_config.raid_bdev_config_head, link) { 883 for (i = 0; i < tmp->num_base_bdevs; i++) { 884 if (tmp->base_bdev[i].name != NULL) { 885 if (!strcmp(tmp->base_bdev[i].name, base_bdev_name)) { 886 SPDK_ERRLOG("duplicate base bdev name %s mentioned\n", 887 base_bdev_name); 888 return -EEXIST; 889 } 890 } 891 } 892 } 893 894 raid_cfg->base_bdev[slot].name = strdup(base_bdev_name); 895 if (raid_cfg->base_bdev[slot].name == NULL) { 896 SPDK_ERRLOG("unable to allocate memory\n"); 897 return -ENOMEM; 898 } 899 900 return 0; 901 } 902 903 static struct { 904 const char *name; 905 enum raid_level value; 906 } g_raid_level_names[] = { 907 { "raid0", RAID0 }, 908 { "0", RAID0 }, 909 { "raid5", RAID5 }, 910 { "5", RAID5 }, 911 { "concat", CONCAT }, 912 { } 913 }; 914 915 enum raid_level raid_bdev_parse_raid_level(const char *str) 916 { 917 unsigned int i; 918 919 assert(str != NULL); 920 921 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 922 if (strcasecmp(g_raid_level_names[i].name, str) == 0) { 923 return g_raid_level_names[i].value; 924 } 925 } 926 927 return INVALID_RAID_LEVEL; 928 } 929 930 const char * 931 raid_bdev_level_to_str(enum raid_level level) 932 { 933 unsigned int i; 934 935 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 936 if (g_raid_level_names[i].value == level) { 937 return g_raid_level_names[i].name; 938 } 939 } 940 941 return ""; 942 } 943 944 /* 945 * brief: 946 * raid_bdev_fini_start is called when bdev layer is starting the 947 * shutdown process 948 * params: 949 * none 950 * returns: 951 * none 952 */ 953 static void 954 raid_bdev_fini_start(void) 955 { 956 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_fini_start\n"); 957 g_shutdown_started = true; 958 } 959 960 /* 961 * brief: 962 * raid_bdev_exit is called on raid bdev module exit time by bdev layer 963 * params: 964 * none 965 * returns: 966 * none 967 */ 968 static void 969 raid_bdev_exit(void) 970 { 971 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_exit\n"); 972 raid_bdev_free(); 973 } 974 975 /* 976 * brief: 977 * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid 978 * module 979 * params: 980 * none 981 * returns: 982 * size of spdk_bdev_io context for raid 983 */ 984 static int 985 raid_bdev_get_ctx_size(void) 986 { 987 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_get_ctx_size\n"); 988 return sizeof(struct raid_bdev_io); 989 } 990 991 /* 992 * brief: 993 * raid_bdev_can_claim_bdev is the function to check if this base_bdev can be 994 * claimed by raid bdev or not. 995 * params: 996 * bdev_name - represents base bdev name 997 * _raid_cfg - pointer to raid bdev config parsed from config file 998 * base_bdev_slot - if bdev can be claimed, it represents the base_bdev correct 999 * slot. This field is only valid if return value of this function is true 1000 * returns: 1001 * true - if bdev can be claimed 1002 * false - if bdev can't be claimed 1003 */ 1004 static bool 1005 raid_bdev_can_claim_bdev(const char *bdev_name, struct raid_bdev_config **_raid_cfg, 1006 uint8_t *base_bdev_slot) 1007 { 1008 struct raid_bdev_config *raid_cfg; 1009 uint8_t i; 1010 1011 TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) { 1012 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 1013 /* 1014 * Check if the base bdev name is part of raid bdev configuration. 1015 * If match is found then return true and the slot information where 1016 * this base bdev should be inserted in raid bdev 1017 */ 1018 if (!strcmp(bdev_name, raid_cfg->base_bdev[i].name)) { 1019 *_raid_cfg = raid_cfg; 1020 *base_bdev_slot = i; 1021 return true; 1022 } 1023 } 1024 } 1025 1026 return false; 1027 } 1028 1029 1030 static struct spdk_bdev_module g_raid_if = { 1031 .name = "raid", 1032 .module_init = raid_bdev_init, 1033 .fini_start = raid_bdev_fini_start, 1034 .module_fini = raid_bdev_exit, 1035 .get_ctx_size = raid_bdev_get_ctx_size, 1036 .examine_config = raid_bdev_examine, 1037 .async_init = false, 1038 .async_fini = false, 1039 }; 1040 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if) 1041 1042 /* 1043 * brief: 1044 * raid_bdev_init is the initialization function for raid bdev module 1045 * params: 1046 * none 1047 * returns: 1048 * 0 - success 1049 * non zero - failure 1050 */ 1051 static int 1052 raid_bdev_init(void) 1053 { 1054 return 0; 1055 } 1056 1057 /* 1058 * brief: 1059 * raid_bdev_create allocates raid bdev based on passed configuration 1060 * params: 1061 * raid_cfg - configuration of raid bdev 1062 * returns: 1063 * 0 - success 1064 * non zero - failure 1065 */ 1066 int 1067 raid_bdev_create(struct raid_bdev_config *raid_cfg) 1068 { 1069 struct raid_bdev *raid_bdev; 1070 struct spdk_bdev *raid_bdev_gen; 1071 struct raid_bdev_module *module; 1072 1073 module = raid_bdev_module_find(raid_cfg->level); 1074 if (module == NULL) { 1075 SPDK_ERRLOG("Unsupported raid level '%d'\n", raid_cfg->level); 1076 return -EINVAL; 1077 } 1078 1079 assert(module->base_bdevs_min != 0); 1080 if (raid_cfg->num_base_bdevs < module->base_bdevs_min) { 1081 SPDK_ERRLOG("At least %u base devices required for %s\n", 1082 module->base_bdevs_min, 1083 raid_bdev_level_to_str(raid_cfg->level)); 1084 return -EINVAL; 1085 } 1086 1087 raid_bdev = calloc(1, sizeof(*raid_bdev)); 1088 if (!raid_bdev) { 1089 SPDK_ERRLOG("Unable to allocate memory for raid bdev\n"); 1090 return -ENOMEM; 1091 } 1092 1093 raid_bdev->module = module; 1094 raid_bdev->num_base_bdevs = raid_cfg->num_base_bdevs; 1095 raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, 1096 sizeof(struct raid_base_bdev_info)); 1097 if (!raid_bdev->base_bdev_info) { 1098 SPDK_ERRLOG("Unable able to allocate base bdev info\n"); 1099 free(raid_bdev); 1100 return -ENOMEM; 1101 } 1102 1103 /* strip_size_kb is from the rpc param. strip_size is in blocks and used 1104 * internally and set later. 1105 */ 1106 raid_bdev->strip_size = 0; 1107 raid_bdev->strip_size_kb = raid_cfg->strip_size; 1108 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1109 raid_bdev->config = raid_cfg; 1110 raid_bdev->level = raid_cfg->level; 1111 1112 raid_bdev_gen = &raid_bdev->bdev; 1113 1114 raid_bdev_gen->name = strdup(raid_cfg->name); 1115 if (!raid_bdev_gen->name) { 1116 SPDK_ERRLOG("Unable to allocate name for raid\n"); 1117 free(raid_bdev->base_bdev_info); 1118 free(raid_bdev); 1119 return -ENOMEM; 1120 } 1121 1122 raid_bdev_gen->product_name = "Raid Volume"; 1123 raid_bdev_gen->ctxt = raid_bdev; 1124 raid_bdev_gen->fn_table = &g_raid_bdev_fn_table; 1125 raid_bdev_gen->module = &g_raid_if; 1126 raid_bdev_gen->write_cache = 0; 1127 1128 TAILQ_INSERT_TAIL(&g_raid_bdev_configuring_list, raid_bdev, state_link); 1129 TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link); 1130 1131 raid_cfg->raid_bdev = raid_bdev; 1132 1133 return 0; 1134 } 1135 1136 /* 1137 * brief 1138 * raid_bdev_alloc_base_bdev_resource allocates resource of base bdev. 1139 * params: 1140 * raid_bdev - pointer to raid bdev 1141 * bdev_name - base bdev name 1142 * base_bdev_slot - position to add base bdev 1143 * returns: 1144 * 0 - success 1145 * non zero - failure 1146 */ 1147 static int 1148 raid_bdev_alloc_base_bdev_resource(struct raid_bdev *raid_bdev, const char *bdev_name, 1149 uint8_t base_bdev_slot) 1150 { 1151 struct spdk_bdev_desc *desc; 1152 struct spdk_bdev *bdev; 1153 int rc; 1154 1155 rc = spdk_bdev_open_ext(bdev_name, true, raid_bdev_event_base_bdev, NULL, &desc); 1156 if (rc != 0) { 1157 if (rc != -ENODEV) { 1158 SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", bdev_name); 1159 } 1160 return rc; 1161 } 1162 1163 bdev = spdk_bdev_desc_get_bdev(desc); 1164 1165 rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if); 1166 if (rc != 0) { 1167 SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); 1168 spdk_bdev_close(desc); 1169 return rc; 1170 } 1171 1172 SPDK_DEBUGLOG(bdev_raid, "bdev %s is claimed\n", bdev_name); 1173 1174 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 1175 assert(base_bdev_slot < raid_bdev->num_base_bdevs); 1176 1177 raid_bdev->base_bdev_info[base_bdev_slot].thread = spdk_get_thread(); 1178 raid_bdev->base_bdev_info[base_bdev_slot].bdev = bdev; 1179 raid_bdev->base_bdev_info[base_bdev_slot].desc = desc; 1180 raid_bdev->num_base_bdevs_discovered++; 1181 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1182 1183 return 0; 1184 } 1185 1186 /* 1187 * brief: 1188 * If raid bdev config is complete, then only register the raid bdev to 1189 * bdev layer and remove this raid bdev from configuring list and 1190 * insert the raid bdev to configured list 1191 * params: 1192 * raid_bdev - pointer to raid bdev 1193 * returns: 1194 * 0 - success 1195 * non zero - failure 1196 */ 1197 static int 1198 raid_bdev_configure(struct raid_bdev *raid_bdev) 1199 { 1200 uint32_t blocklen = 0; 1201 struct spdk_bdev *raid_bdev_gen; 1202 struct raid_base_bdev_info *base_info; 1203 int rc = 0; 1204 1205 assert(raid_bdev->state == RAID_BDEV_STATE_CONFIGURING); 1206 assert(raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs); 1207 1208 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1209 /* Check blocklen for all base bdevs that it should be same */ 1210 if (blocklen == 0) { 1211 blocklen = base_info->bdev->blocklen; 1212 } else if (blocklen != base_info->bdev->blocklen) { 1213 /* 1214 * Assumption is that all the base bdevs for any raid bdev should 1215 * have same blocklen 1216 */ 1217 SPDK_ERRLOG("Blocklen of various bdevs not matching\n"); 1218 return -EINVAL; 1219 } 1220 } 1221 assert(blocklen > 0); 1222 1223 /* The strip_size_kb is read in from user in KB. Convert to blocks here for 1224 * internal use. 1225 */ 1226 raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / blocklen; 1227 raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size); 1228 raid_bdev->blocklen_shift = spdk_u32log2(blocklen); 1229 1230 raid_bdev_gen = &raid_bdev->bdev; 1231 raid_bdev_gen->blocklen = blocklen; 1232 1233 rc = raid_bdev->module->start(raid_bdev); 1234 if (rc != 0) { 1235 SPDK_ERRLOG("raid module startup callback failed\n"); 1236 return rc; 1237 } 1238 raid_bdev->state = RAID_BDEV_STATE_ONLINE; 1239 SPDK_DEBUGLOG(bdev_raid, "io device register %p\n", raid_bdev); 1240 SPDK_DEBUGLOG(bdev_raid, "blockcnt %" PRIu64 ", blocklen %u\n", 1241 raid_bdev_gen->blockcnt, raid_bdev_gen->blocklen); 1242 spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb, 1243 sizeof(struct raid_bdev_io_channel), 1244 raid_bdev->bdev.name); 1245 rc = spdk_bdev_register(raid_bdev_gen); 1246 if (rc != 0) { 1247 SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n"); 1248 if (raid_bdev->module->stop != NULL) { 1249 raid_bdev->module->stop(raid_bdev); 1250 } 1251 spdk_io_device_unregister(raid_bdev, NULL); 1252 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1253 return rc; 1254 } 1255 SPDK_DEBUGLOG(bdev_raid, "raid bdev generic %p\n", raid_bdev_gen); 1256 TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link); 1257 TAILQ_INSERT_TAIL(&g_raid_bdev_configured_list, raid_bdev, state_link); 1258 SPDK_DEBUGLOG(bdev_raid, "raid bdev is created with name %s, raid_bdev %p\n", 1259 raid_bdev_gen->name, raid_bdev); 1260 1261 return 0; 1262 } 1263 1264 /* 1265 * brief: 1266 * If raid bdev is online and registered, change the bdev state to 1267 * configuring and unregister this raid device. Queue this raid device 1268 * in configuring list 1269 * params: 1270 * raid_bdev - pointer to raid bdev 1271 * cb_fn - callback function 1272 * cb_arg - argument to callback function 1273 * returns: 1274 * none 1275 */ 1276 static void 1277 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, 1278 void *cb_arg) 1279 { 1280 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1281 if (cb_fn) { 1282 cb_fn(cb_arg, 0); 1283 } 1284 return; 1285 } 1286 1287 assert(raid_bdev->num_base_bdevs == raid_bdev->num_base_bdevs_discovered); 1288 TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link); 1289 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 1290 assert(raid_bdev->num_base_bdevs_discovered); 1291 TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link); 1292 SPDK_DEBUGLOG(bdev_raid, "raid bdev state changing from online to offline\n"); 1293 1294 spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg); 1295 } 1296 1297 /* 1298 * brief: 1299 * raid_bdev_find_by_base_bdev function finds the raid bdev which has 1300 * claimed the base bdev. 1301 * params: 1302 * base_bdev - pointer to base bdev pointer 1303 * _raid_bdev - Reference to pointer to raid bdev 1304 * _base_info - Reference to the raid base bdev info. 1305 * returns: 1306 * true - if the raid bdev is found. 1307 * false - if the raid bdev is not found. 1308 */ 1309 static bool 1310 raid_bdev_find_by_base_bdev(struct spdk_bdev *base_bdev, struct raid_bdev **_raid_bdev, 1311 struct raid_base_bdev_info **_base_info) 1312 { 1313 struct raid_bdev *raid_bdev; 1314 struct raid_base_bdev_info *base_info; 1315 1316 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1317 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1318 if (base_info->bdev == base_bdev) { 1319 *_raid_bdev = raid_bdev; 1320 *_base_info = base_info; 1321 return true; 1322 } 1323 } 1324 } 1325 1326 return false; 1327 } 1328 1329 /* 1330 * brief: 1331 * raid_bdev_remove_base_bdev function is called by below layers when base_bdev 1332 * is removed. This function checks if this base bdev is part of any raid bdev 1333 * or not. If yes, it takes necessary action on that particular raid bdev. 1334 * params: 1335 * base_bdev - pointer to base bdev pointer which got removed 1336 * returns: 1337 * none 1338 */ 1339 static void 1340 raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev) 1341 { 1342 struct raid_bdev *raid_bdev = NULL; 1343 struct raid_base_bdev_info *base_info; 1344 1345 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_remove_base_bdev\n"); 1346 1347 /* Find the raid_bdev which has claimed this base_bdev */ 1348 if (!raid_bdev_find_by_base_bdev(base_bdev, &raid_bdev, &base_info)) { 1349 SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); 1350 return; 1351 } 1352 1353 assert(base_info->desc); 1354 base_info->remove_scheduled = true; 1355 1356 if (raid_bdev->destruct_called == true || 1357 raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 1358 /* 1359 * As raid bdev is not registered yet or already unregistered, 1360 * so cleanup should be done here itself. 1361 */ 1362 raid_bdev_free_base_bdev_resource(raid_bdev, base_info); 1363 if (raid_bdev->num_base_bdevs_discovered == 0) { 1364 /* There is no base bdev for this raid, so free the raid device. */ 1365 raid_bdev_cleanup(raid_bdev); 1366 return; 1367 } 1368 } 1369 1370 raid_bdev_deconfigure(raid_bdev, NULL, NULL); 1371 } 1372 1373 /* 1374 * brief: 1375 * raid_bdev_event_base_bdev function is called by below layers when base_bdev 1376 * triggers asynchronous event. 1377 * params: 1378 * type - event details. 1379 * bdev - bdev that triggered event. 1380 * event_ctx - context for event. 1381 * returns: 1382 * none 1383 */ 1384 static void 1385 raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 1386 void *event_ctx) 1387 { 1388 switch (type) { 1389 case SPDK_BDEV_EVENT_REMOVE: 1390 raid_bdev_remove_base_bdev(bdev); 1391 break; 1392 default: 1393 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 1394 break; 1395 } 1396 } 1397 1398 /* 1399 * brief: 1400 * Remove base bdevs from the raid bdev one by one. Skip any base bdev which 1401 * doesn't exist. 1402 * params: 1403 * raid_cfg - pointer to raid bdev config. 1404 * cb_fn - callback function 1405 * cb_ctx - argument to callback function 1406 */ 1407 void 1408 raid_bdev_remove_base_devices(struct raid_bdev_config *raid_cfg, 1409 raid_bdev_destruct_cb cb_fn, void *cb_arg) 1410 { 1411 struct raid_bdev *raid_bdev; 1412 struct raid_base_bdev_info *base_info; 1413 1414 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_remove_base_devices\n"); 1415 1416 raid_bdev = raid_cfg->raid_bdev; 1417 if (raid_bdev == NULL) { 1418 SPDK_DEBUGLOG(bdev_raid, "raid bdev %s doesn't exist now\n", raid_cfg->name); 1419 if (cb_fn) { 1420 cb_fn(cb_arg, 0); 1421 } 1422 return; 1423 } 1424 1425 if (raid_bdev->destroy_started) { 1426 SPDK_DEBUGLOG(bdev_raid, "destroying raid bdev %s is already started\n", 1427 raid_cfg->name); 1428 if (cb_fn) { 1429 cb_fn(cb_arg, -EALREADY); 1430 } 1431 return; 1432 } 1433 1434 raid_bdev->destroy_started = true; 1435 1436 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1437 if (base_info->bdev == NULL) { 1438 continue; 1439 } 1440 1441 assert(base_info->desc); 1442 base_info->remove_scheduled = true; 1443 1444 if (raid_bdev->destruct_called == true || 1445 raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 1446 /* 1447 * As raid bdev is not registered yet or already unregistered, 1448 * so cleanup should be done here itself. 1449 */ 1450 raid_bdev_free_base_bdev_resource(raid_bdev, base_info); 1451 if (raid_bdev->num_base_bdevs_discovered == 0) { 1452 /* There is no base bdev for this raid, so free the raid device. */ 1453 raid_bdev_cleanup(raid_bdev); 1454 if (cb_fn) { 1455 cb_fn(cb_arg, 0); 1456 } 1457 return; 1458 } 1459 } 1460 } 1461 1462 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg); 1463 } 1464 1465 /* 1466 * brief: 1467 * raid_bdev_add_base_device function is the actual function which either adds 1468 * the nvme base device to existing raid bdev or create a new raid bdev. It also claims 1469 * the base device and keep the open descriptor. 1470 * params: 1471 * raid_cfg - pointer to raid bdev config 1472 * bdev - pointer to base bdev 1473 * base_bdev_slot - position to add base bdev 1474 * returns: 1475 * 0 - success 1476 * non zero - failure 1477 */ 1478 static int 1479 raid_bdev_add_base_device(struct raid_bdev_config *raid_cfg, const char *bdev_name, 1480 uint8_t base_bdev_slot) 1481 { 1482 struct raid_bdev *raid_bdev; 1483 int rc; 1484 1485 raid_bdev = raid_cfg->raid_bdev; 1486 if (!raid_bdev) { 1487 SPDK_ERRLOG("Raid bdev '%s' is not created yet\n", raid_cfg->name); 1488 return -ENODEV; 1489 } 1490 1491 rc = raid_bdev_alloc_base_bdev_resource(raid_bdev, bdev_name, base_bdev_slot); 1492 if (rc != 0) { 1493 if (rc != -ENODEV) { 1494 SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", bdev_name); 1495 } 1496 return rc; 1497 } 1498 1499 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1500 1501 if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) { 1502 rc = raid_bdev_configure(raid_bdev); 1503 if (rc != 0) { 1504 SPDK_ERRLOG("Failed to configure raid bdev\n"); 1505 return rc; 1506 } 1507 } 1508 1509 return 0; 1510 } 1511 1512 /* 1513 * brief: 1514 * Add base bdevs to the raid bdev one by one. Skip any base bdev which doesn't 1515 * exist or fails to add. If all base bdevs are successfully added, the raid bdev 1516 * moves to the configured state and becomes available. Otherwise, the raid bdev 1517 * stays at the configuring state with added base bdevs. 1518 * params: 1519 * raid_cfg - pointer to raid bdev config 1520 * returns: 1521 * 0 - The raid bdev moves to the configured state or stays at the configuring 1522 * state with added base bdevs due to any nonexistent base bdev. 1523 * non zero - Failed to add any base bdev and stays at the configuring state with 1524 * added base bdevs. 1525 */ 1526 int 1527 raid_bdev_add_base_devices(struct raid_bdev_config *raid_cfg) 1528 { 1529 uint8_t i; 1530 int rc = 0, _rc; 1531 1532 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 1533 _rc = raid_bdev_add_base_device(raid_cfg, raid_cfg->base_bdev[i].name, i); 1534 if (_rc == -ENODEV) { 1535 SPDK_DEBUGLOG(bdev_raid, "base bdev %s doesn't exist now\n", 1536 raid_cfg->base_bdev[i].name); 1537 } else if (_rc != 0) { 1538 SPDK_ERRLOG("Failed to add base bdev %s to RAID bdev %s: %s\n", 1539 raid_cfg->base_bdev[i].name, raid_cfg->name, 1540 spdk_strerror(-_rc)); 1541 if (rc == 0) { 1542 rc = _rc; 1543 } 1544 } 1545 } 1546 1547 return rc; 1548 } 1549 1550 /* 1551 * brief: 1552 * raid_bdev_examine function is the examine function call by the below layers 1553 * like bdev_nvme layer. This function will check if this base bdev can be 1554 * claimed by this raid bdev or not. 1555 * params: 1556 * bdev - pointer to base bdev 1557 * returns: 1558 * none 1559 */ 1560 static void 1561 raid_bdev_examine(struct spdk_bdev *bdev) 1562 { 1563 struct raid_bdev_config *raid_cfg; 1564 uint8_t base_bdev_slot; 1565 1566 if (raid_bdev_can_claim_bdev(bdev->name, &raid_cfg, &base_bdev_slot)) { 1567 raid_bdev_add_base_device(raid_cfg, bdev->name, base_bdev_slot); 1568 } else { 1569 SPDK_DEBUGLOG(bdev_raid, "bdev %s can't be claimed\n", 1570 bdev->name); 1571 } 1572 1573 spdk_bdev_module_examine_done(&g_raid_if); 1574 } 1575 1576 /* Log component for bdev raid bdev module */ 1577 SPDK_LOG_REGISTER_COMPONENT(bdev_raid) 1578