1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Intel Corporation. 3 * All rights reserved. 4 * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "bdev_raid.h" 8 #include "spdk/env.h" 9 #include "spdk/thread.h" 10 #include "spdk/log.h" 11 #include "spdk/string.h" 12 #include "spdk/util.h" 13 #include "spdk/json.h" 14 #include "spdk/string.h" 15 16 static bool g_shutdown_started = false; 17 18 /* raid bdev config as read from config file */ 19 struct raid_config g_raid_config = { 20 .raid_bdev_config_head = TAILQ_HEAD_INITIALIZER(g_raid_config.raid_bdev_config_head), 21 }; 22 23 /* 24 * List of raid bdev in configured list, these raid bdevs are registered with 25 * bdev layer 26 */ 27 struct raid_configured_tailq g_raid_bdev_configured_list = TAILQ_HEAD_INITIALIZER( 28 g_raid_bdev_configured_list); 29 30 /* List of raid bdev in configuring list */ 31 struct raid_configuring_tailq g_raid_bdev_configuring_list = TAILQ_HEAD_INITIALIZER( 32 g_raid_bdev_configuring_list); 33 34 /* List of all raid bdevs */ 35 struct raid_all_tailq g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list); 36 37 /* List of all raid bdevs that are offline */ 38 struct raid_offline_tailq g_raid_bdev_offline_list = TAILQ_HEAD_INITIALIZER( 39 g_raid_bdev_offline_list); 40 41 static TAILQ_HEAD(, raid_bdev_module) g_raid_modules = TAILQ_HEAD_INITIALIZER(g_raid_modules); 42 43 static struct raid_bdev_module *raid_bdev_module_find(enum raid_level level) 44 { 45 struct raid_bdev_module *raid_module; 46 47 TAILQ_FOREACH(raid_module, &g_raid_modules, link) { 48 if (raid_module->level == level) { 49 return raid_module; 50 } 51 } 52 53 return NULL; 54 } 55 56 void raid_bdev_module_list_add(struct raid_bdev_module *raid_module) 57 { 58 if (raid_bdev_module_find(raid_module->level) != NULL) { 59 SPDK_ERRLOG("module for raid level '%s' already registered.\n", 60 raid_bdev_level_to_str(raid_module->level)); 61 assert(false); 62 } else { 63 TAILQ_INSERT_TAIL(&g_raid_modules, raid_module, link); 64 } 65 } 66 67 /* Function declarations */ 68 static void raid_bdev_examine(struct spdk_bdev *bdev); 69 static int raid_bdev_init(void); 70 static void raid_bdev_deconfigure(struct raid_bdev *raid_bdev, 71 raid_bdev_destruct_cb cb_fn, void *cb_arg); 72 static void raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 73 void *event_ctx); 74 75 /* 76 * brief: 77 * raid_bdev_create_cb function is a cb function for raid bdev which creates the 78 * hierarchy from raid bdev to base bdev io channels. It will be called per core 79 * params: 80 * io_device - pointer to raid bdev io device represented by raid_bdev 81 * ctx_buf - pointer to context buffer for raid bdev io channel 82 * returns: 83 * 0 - success 84 * non zero - failure 85 */ 86 static int 87 raid_bdev_create_cb(void *io_device, void *ctx_buf) 88 { 89 struct raid_bdev *raid_bdev = io_device; 90 struct raid_bdev_io_channel *raid_ch = ctx_buf; 91 uint8_t i; 92 93 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_create_cb, %p\n", raid_ch); 94 95 assert(raid_bdev != NULL); 96 assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); 97 98 raid_ch->num_channels = raid_bdev->num_base_bdevs; 99 100 raid_ch->base_channel = calloc(raid_ch->num_channels, 101 sizeof(struct spdk_io_channel *)); 102 if (!raid_ch->base_channel) { 103 SPDK_ERRLOG("Unable to allocate base bdevs io channel\n"); 104 return -ENOMEM; 105 } 106 for (i = 0; i < raid_ch->num_channels; i++) { 107 /* 108 * Get the spdk_io_channel for all the base bdevs. This is used during 109 * split logic to send the respective child bdev ios to respective base 110 * bdev io channel. 111 */ 112 raid_ch->base_channel[i] = spdk_bdev_get_io_channel( 113 raid_bdev->base_bdev_info[i].desc); 114 if (!raid_ch->base_channel[i]) { 115 uint8_t j; 116 117 for (j = 0; j < i; j++) { 118 spdk_put_io_channel(raid_ch->base_channel[j]); 119 } 120 free(raid_ch->base_channel); 121 raid_ch->base_channel = NULL; 122 SPDK_ERRLOG("Unable to create io channel for base bdev\n"); 123 return -ENOMEM; 124 } 125 } 126 127 return 0; 128 } 129 130 /* 131 * brief: 132 * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the 133 * hierarchy from raid bdev to base bdev io channels. It will be called per core 134 * params: 135 * io_device - pointer to raid bdev io device represented by raid_bdev 136 * ctx_buf - pointer to context buffer for raid bdev io channel 137 * returns: 138 * none 139 */ 140 static void 141 raid_bdev_destroy_cb(void *io_device, void *ctx_buf) 142 { 143 struct raid_bdev_io_channel *raid_ch = ctx_buf; 144 uint8_t i; 145 146 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destroy_cb\n"); 147 148 assert(raid_ch != NULL); 149 assert(raid_ch->base_channel); 150 for (i = 0; i < raid_ch->num_channels; i++) { 151 /* Free base bdev channels */ 152 assert(raid_ch->base_channel[i] != NULL); 153 spdk_put_io_channel(raid_ch->base_channel[i]); 154 } 155 free(raid_ch->base_channel); 156 raid_ch->base_channel = NULL; 157 } 158 159 /* 160 * brief: 161 * raid_bdev_cleanup is used to cleanup and free raid_bdev related data 162 * structures. 163 * params: 164 * raid_bdev - pointer to raid_bdev 165 * returns: 166 * none 167 */ 168 static void 169 raid_bdev_cleanup(struct raid_bdev *raid_bdev) 170 { 171 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_cleanup, %p name %s, state %u, config %p\n", 172 raid_bdev, 173 raid_bdev->bdev.name, raid_bdev->state, raid_bdev->config); 174 if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 175 TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link); 176 } else if (raid_bdev->state == RAID_BDEV_STATE_OFFLINE) { 177 TAILQ_REMOVE(&g_raid_bdev_offline_list, raid_bdev, state_link); 178 } else { 179 assert(0); 180 } 181 TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link); 182 free(raid_bdev->bdev.name); 183 free(raid_bdev->base_bdev_info); 184 if (raid_bdev->config) { 185 raid_bdev->config->raid_bdev = NULL; 186 } 187 free(raid_bdev); 188 } 189 190 /* 191 * brief: 192 * wrapper for the bdev close operation 193 * params: 194 * base_info - raid base bdev info 195 * returns: 196 */ 197 static void 198 _raid_bdev_free_base_bdev_resource(void *ctx) 199 { 200 struct spdk_bdev_desc *desc = ctx; 201 202 spdk_bdev_close(desc); 203 } 204 205 206 /* 207 * brief: 208 * free resource of base bdev for raid bdev 209 * params: 210 * raid_bdev - pointer to raid bdev 211 * base_info - raid base bdev info 212 * returns: 213 * 0 - success 214 * non zero - failure 215 */ 216 static void 217 raid_bdev_free_base_bdev_resource(struct raid_bdev *raid_bdev, 218 struct raid_base_bdev_info *base_info) 219 { 220 spdk_bdev_module_release_bdev(base_info->bdev); 221 if (base_info->thread && base_info->thread != spdk_get_thread()) { 222 spdk_thread_send_msg(base_info->thread, _raid_bdev_free_base_bdev_resource, base_info->desc); 223 } else { 224 spdk_bdev_close(base_info->desc); 225 } 226 base_info->desc = NULL; 227 base_info->bdev = NULL; 228 229 assert(raid_bdev->num_base_bdevs_discovered); 230 raid_bdev->num_base_bdevs_discovered--; 231 } 232 233 /* 234 * brief: 235 * raid_bdev_destruct is the destruct function table pointer for raid bdev 236 * params: 237 * ctxt - pointer to raid_bdev 238 * returns: 239 * 0 - success 240 * non zero - failure 241 */ 242 static int 243 raid_bdev_destruct(void *ctxt) 244 { 245 struct raid_bdev *raid_bdev = ctxt; 246 struct raid_base_bdev_info *base_info; 247 248 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destruct\n"); 249 250 raid_bdev->destruct_called = true; 251 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 252 /* 253 * Close all base bdev descriptors for which call has come from below 254 * layers. Also close the descriptors if we have started shutdown. 255 */ 256 if (g_shutdown_started || 257 ((base_info->remove_scheduled == true) && 258 (base_info->bdev != NULL))) { 259 raid_bdev_free_base_bdev_resource(raid_bdev, base_info); 260 } 261 } 262 263 if (g_shutdown_started) { 264 TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link); 265 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 266 TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link); 267 } 268 269 if (raid_bdev->module->stop != NULL) { 270 raid_bdev->module->stop(raid_bdev); 271 } 272 273 spdk_io_device_unregister(raid_bdev, NULL); 274 275 if (raid_bdev->num_base_bdevs_discovered == 0) { 276 /* Free raid_bdev when there are no base bdevs left */ 277 SPDK_DEBUGLOG(bdev_raid, "raid bdev base bdevs is 0, going to free all in destruct\n"); 278 raid_bdev_cleanup(raid_bdev); 279 } 280 281 return 0; 282 } 283 284 void 285 raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status) 286 { 287 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 288 289 spdk_bdev_io_complete(bdev_io, status); 290 } 291 292 /* 293 * brief: 294 * raid_bdev_io_complete_part - signal the completion of a part of the expected 295 * base bdev IOs and complete the raid_io if this is the final expected IO. 296 * The caller should first set raid_io->base_bdev_io_remaining. This function 297 * will decrement this counter by the value of the 'completed' parameter and 298 * complete the raid_io if the counter reaches 0. The caller is free to 299 * interpret the 'base_bdev_io_remaining' and 'completed' values as needed, 300 * it can represent e.g. blocks or IOs. 301 * params: 302 * raid_io - pointer to raid_bdev_io 303 * completed - the part of the raid_io that has been completed 304 * status - status of the base IO 305 * returns: 306 * true - if the raid_io is completed 307 * false - otherwise 308 */ 309 bool 310 raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed, 311 enum spdk_bdev_io_status status) 312 { 313 assert(raid_io->base_bdev_io_remaining >= completed); 314 raid_io->base_bdev_io_remaining -= completed; 315 316 if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { 317 raid_io->base_bdev_io_status = status; 318 } 319 320 if (raid_io->base_bdev_io_remaining == 0) { 321 raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); 322 return true; 323 } else { 324 return false; 325 } 326 } 327 328 /* 329 * brief: 330 * raid_bdev_queue_io_wait function processes the IO which failed to submit. 331 * It will try to queue the IOs after storing the context to bdev wait queue logic. 332 * params: 333 * raid_io - pointer to raid_bdev_io 334 * bdev - the block device that the IO is submitted to 335 * ch - io channel 336 * cb_fn - callback when the spdk_bdev_io for bdev becomes available 337 * returns: 338 * none 339 */ 340 void 341 raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, 342 struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn) 343 { 344 raid_io->waitq_entry.bdev = bdev; 345 raid_io->waitq_entry.cb_fn = cb_fn; 346 raid_io->waitq_entry.cb_arg = raid_io; 347 spdk_bdev_queue_io_wait(bdev, ch, &raid_io->waitq_entry); 348 } 349 350 static void 351 raid_base_bdev_reset_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 352 { 353 struct raid_bdev_io *raid_io = cb_arg; 354 355 spdk_bdev_free_io(bdev_io); 356 357 raid_bdev_io_complete_part(raid_io, 1, success ? 358 SPDK_BDEV_IO_STATUS_SUCCESS : 359 SPDK_BDEV_IO_STATUS_FAILED); 360 } 361 362 static void 363 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io); 364 365 static void 366 _raid_bdev_submit_reset_request(void *_raid_io) 367 { 368 struct raid_bdev_io *raid_io = _raid_io; 369 370 raid_bdev_submit_reset_request(raid_io); 371 } 372 373 /* 374 * brief: 375 * raid_bdev_submit_reset_request function submits reset requests 376 * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in 377 * which case it will queue it for later submission 378 * params: 379 * raid_io 380 * returns: 381 * none 382 */ 383 static void 384 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io) 385 { 386 struct raid_bdev *raid_bdev; 387 int ret; 388 uint8_t i; 389 struct raid_base_bdev_info *base_info; 390 struct spdk_io_channel *base_ch; 391 392 raid_bdev = raid_io->raid_bdev; 393 394 if (raid_io->base_bdev_io_remaining == 0) { 395 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; 396 } 397 398 while (raid_io->base_bdev_io_submitted < raid_bdev->num_base_bdevs) { 399 i = raid_io->base_bdev_io_submitted; 400 base_info = &raid_bdev->base_bdev_info[i]; 401 base_ch = raid_io->raid_ch->base_channel[i]; 402 ret = spdk_bdev_reset(base_info->desc, base_ch, 403 raid_base_bdev_reset_complete, raid_io); 404 if (ret == 0) { 405 raid_io->base_bdev_io_submitted++; 406 } else if (ret == -ENOMEM) { 407 raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, 408 _raid_bdev_submit_reset_request); 409 return; 410 } else { 411 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 412 assert(false); 413 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 414 return; 415 } 416 } 417 } 418 419 /* 420 * brief: 421 * Callback function to spdk_bdev_io_get_buf. 422 * params: 423 * ch - pointer to raid bdev io channel 424 * bdev_io - pointer to parent bdev_io on raid bdev device 425 * success - True if buffer is allocated or false otherwise. 426 * returns: 427 * none 428 */ 429 static void 430 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 431 bool success) 432 { 433 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 434 435 if (!success) { 436 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 437 return; 438 } 439 440 raid_io->raid_bdev->module->submit_rw_request(raid_io); 441 } 442 443 /* 444 * brief: 445 * raid_bdev_submit_request function is the submit_request function pointer of 446 * raid bdev function table. This is used to submit the io on raid_bdev to below 447 * layers. 448 * params: 449 * ch - pointer to raid bdev io channel 450 * bdev_io - pointer to parent bdev_io on raid bdev device 451 * returns: 452 * none 453 */ 454 static void 455 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 456 { 457 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 458 459 raid_io->raid_bdev = bdev_io->bdev->ctxt; 460 raid_io->raid_ch = spdk_io_channel_get_ctx(ch); 461 raid_io->base_bdev_io_remaining = 0; 462 raid_io->base_bdev_io_submitted = 0; 463 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 464 465 switch (bdev_io->type) { 466 case SPDK_BDEV_IO_TYPE_READ: 467 spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb, 468 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 469 break; 470 case SPDK_BDEV_IO_TYPE_WRITE: 471 raid_io->raid_bdev->module->submit_rw_request(raid_io); 472 break; 473 474 case SPDK_BDEV_IO_TYPE_RESET: 475 raid_bdev_submit_reset_request(raid_io); 476 break; 477 478 case SPDK_BDEV_IO_TYPE_FLUSH: 479 case SPDK_BDEV_IO_TYPE_UNMAP: 480 raid_io->raid_bdev->module->submit_null_payload_request(raid_io); 481 break; 482 483 default: 484 SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type); 485 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 486 break; 487 } 488 } 489 490 /* 491 * brief: 492 * _raid_bdev_io_type_supported checks whether io_type is supported in 493 * all base bdev modules of raid bdev module. If anyone among the base_bdevs 494 * doesn't support, the raid device doesn't supports. 495 * 496 * params: 497 * raid_bdev - pointer to raid bdev context 498 * io_type - io type 499 * returns: 500 * true - io_type is supported 501 * false - io_type is not supported 502 */ 503 inline static bool 504 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type) 505 { 506 struct raid_base_bdev_info *base_info; 507 508 if (io_type == SPDK_BDEV_IO_TYPE_FLUSH || 509 io_type == SPDK_BDEV_IO_TYPE_UNMAP) { 510 if (raid_bdev->module->submit_null_payload_request == NULL) { 511 return false; 512 } 513 } 514 515 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 516 if (base_info->bdev == NULL) { 517 assert(false); 518 continue; 519 } 520 521 if (spdk_bdev_io_type_supported(base_info->bdev, io_type) == false) { 522 return false; 523 } 524 } 525 526 return true; 527 } 528 529 /* 530 * brief: 531 * raid_bdev_io_type_supported is the io_supported function for bdev function 532 * table which returns whether the particular io type is supported or not by 533 * raid bdev module 534 * params: 535 * ctx - pointer to raid bdev context 536 * type - io type 537 * returns: 538 * true - io_type is supported 539 * false - io_type is not supported 540 */ 541 static bool 542 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 543 { 544 switch (io_type) { 545 case SPDK_BDEV_IO_TYPE_READ: 546 case SPDK_BDEV_IO_TYPE_WRITE: 547 return true; 548 549 case SPDK_BDEV_IO_TYPE_FLUSH: 550 case SPDK_BDEV_IO_TYPE_RESET: 551 case SPDK_BDEV_IO_TYPE_UNMAP: 552 return _raid_bdev_io_type_supported(ctx, io_type); 553 554 default: 555 return false; 556 } 557 558 return false; 559 } 560 561 /* 562 * brief: 563 * raid_bdev_get_io_channel is the get_io_channel function table pointer for 564 * raid bdev. This is used to return the io channel for this raid bdev 565 * params: 566 * ctxt - pointer to raid_bdev 567 * returns: 568 * pointer to io channel for raid bdev 569 */ 570 static struct spdk_io_channel * 571 raid_bdev_get_io_channel(void *ctxt) 572 { 573 struct raid_bdev *raid_bdev = ctxt; 574 575 return spdk_get_io_channel(raid_bdev); 576 } 577 578 /* 579 * brief: 580 * raid_bdev_dump_info_json is the function table pointer for raid bdev 581 * params: 582 * ctx - pointer to raid_bdev 583 * w - pointer to json context 584 * returns: 585 * 0 - success 586 * non zero - failure 587 */ 588 static int 589 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 590 { 591 struct raid_bdev *raid_bdev = ctx; 592 struct raid_base_bdev_info *base_info; 593 594 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_dump_config_json\n"); 595 assert(raid_bdev != NULL); 596 597 /* Dump the raid bdev configuration related information */ 598 spdk_json_write_named_object_begin(w, "raid"); 599 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 600 spdk_json_write_named_uint32(w, "state", raid_bdev->state); 601 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 602 spdk_json_write_named_uint32(w, "destruct_called", raid_bdev->destruct_called); 603 spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs); 604 spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered); 605 spdk_json_write_name(w, "base_bdevs_list"); 606 spdk_json_write_array_begin(w); 607 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 608 if (base_info->bdev) { 609 spdk_json_write_string(w, base_info->bdev->name); 610 } else { 611 spdk_json_write_null(w); 612 } 613 } 614 spdk_json_write_array_end(w); 615 spdk_json_write_object_end(w); 616 617 return 0; 618 } 619 620 /* 621 * brief: 622 * raid_bdev_write_config_json is the function table pointer for raid bdev 623 * params: 624 * bdev - pointer to spdk_bdev 625 * w - pointer to json context 626 * returns: 627 * none 628 */ 629 static void 630 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 631 { 632 struct raid_bdev *raid_bdev = bdev->ctxt; 633 struct raid_base_bdev_info *base_info; 634 635 spdk_json_write_object_begin(w); 636 637 spdk_json_write_named_string(w, "method", "bdev_raid_create"); 638 639 spdk_json_write_named_object_begin(w, "params"); 640 spdk_json_write_named_string(w, "name", bdev->name); 641 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 642 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 643 644 spdk_json_write_named_array_begin(w, "base_bdevs"); 645 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 646 if (base_info->bdev) { 647 spdk_json_write_string(w, base_info->bdev->name); 648 } 649 } 650 spdk_json_write_array_end(w); 651 spdk_json_write_object_end(w); 652 653 spdk_json_write_object_end(w); 654 } 655 656 static int 657 raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size) 658 { 659 struct raid_bdev *raid_bdev = ctx; 660 struct spdk_bdev *base_bdev; 661 uint32_t i; 662 int domains_count = 0, rc; 663 664 /* First loop to get the number of memory domains */ 665 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 666 base_bdev = raid_bdev->base_bdev_info[i].bdev; 667 rc = spdk_bdev_get_memory_domains(base_bdev, NULL, 0); 668 if (rc < 0) { 669 return rc; 670 } 671 domains_count += rc; 672 } 673 674 if (!domains || array_size < domains_count) { 675 return domains_count; 676 } 677 678 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 679 base_bdev = raid_bdev->base_bdev_info[i].bdev; 680 rc = spdk_bdev_get_memory_domains(base_bdev, domains, array_size); 681 if (rc < 0) { 682 return rc; 683 } 684 domains += rc; 685 array_size -= rc; 686 } 687 688 return domains_count; 689 } 690 691 /* g_raid_bdev_fn_table is the function table for raid bdev */ 692 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = { 693 .destruct = raid_bdev_destruct, 694 .submit_request = raid_bdev_submit_request, 695 .io_type_supported = raid_bdev_io_type_supported, 696 .get_io_channel = raid_bdev_get_io_channel, 697 .dump_info_json = raid_bdev_dump_info_json, 698 .write_config_json = raid_bdev_write_config_json, 699 .get_memory_domains = raid_bdev_get_memory_domains, 700 }; 701 702 /* 703 * brief: 704 * raid_bdev_config_cleanup function is used to free memory for one raid_bdev in configuration 705 * params: 706 * raid_cfg - pointer to raid_bdev_config structure 707 * returns: 708 * none 709 */ 710 void 711 raid_bdev_config_cleanup(struct raid_bdev_config *raid_cfg) 712 { 713 uint8_t i; 714 715 TAILQ_REMOVE(&g_raid_config.raid_bdev_config_head, raid_cfg, link); 716 g_raid_config.total_raid_bdev--; 717 718 if (raid_cfg->base_bdev) { 719 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 720 free(raid_cfg->base_bdev[i].name); 721 } 722 free(raid_cfg->base_bdev); 723 } 724 free(raid_cfg->name); 725 free(raid_cfg); 726 } 727 728 /* 729 * brief: 730 * raid_bdev_free is the raid bdev function table function pointer. This is 731 * called on bdev free path 732 * params: 733 * none 734 * returns: 735 * none 736 */ 737 static void 738 raid_bdev_free(void) 739 { 740 struct raid_bdev_config *raid_cfg, *tmp; 741 742 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_free\n"); 743 TAILQ_FOREACH_SAFE(raid_cfg, &g_raid_config.raid_bdev_config_head, link, tmp) { 744 raid_bdev_config_cleanup(raid_cfg); 745 } 746 } 747 748 /* brief 749 * raid_bdev_config_find_by_name is a helper function to find raid bdev config 750 * by name as key. 751 * 752 * params: 753 * raid_name - name for raid bdev. 754 */ 755 struct raid_bdev_config * 756 raid_bdev_config_find_by_name(const char *raid_name) 757 { 758 struct raid_bdev_config *raid_cfg; 759 760 TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) { 761 if (!strcmp(raid_cfg->name, raid_name)) { 762 return raid_cfg; 763 } 764 } 765 766 return raid_cfg; 767 } 768 769 /* 770 * brief 771 * raid_bdev_config_add function adds config for newly created raid bdev. 772 * 773 * params: 774 * raid_name - name for raid bdev. 775 * strip_size - strip size in KB 776 * num_base_bdevs - number of base bdevs. 777 * level - raid level. 778 * _raid_cfg - Pointer to newly added configuration 779 */ 780 int 781 raid_bdev_config_add(const char *raid_name, uint32_t strip_size, uint8_t num_base_bdevs, 782 enum raid_level level, struct raid_bdev_config **_raid_cfg) 783 { 784 struct raid_bdev_config *raid_cfg; 785 786 raid_cfg = raid_bdev_config_find_by_name(raid_name); 787 if (raid_cfg != NULL) { 788 SPDK_ERRLOG("Duplicate raid bdev name found in config file %s\n", 789 raid_name); 790 return -EEXIST; 791 } 792 793 if (spdk_u32_is_pow2(strip_size) == false) { 794 SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size); 795 return -EINVAL; 796 } 797 798 if (num_base_bdevs == 0) { 799 SPDK_ERRLOG("Invalid base device count %u\n", num_base_bdevs); 800 return -EINVAL; 801 } 802 803 raid_cfg = calloc(1, sizeof(*raid_cfg)); 804 if (raid_cfg == NULL) { 805 SPDK_ERRLOG("unable to allocate memory\n"); 806 return -ENOMEM; 807 } 808 809 raid_cfg->name = strdup(raid_name); 810 if (!raid_cfg->name) { 811 free(raid_cfg); 812 SPDK_ERRLOG("unable to allocate memory\n"); 813 return -ENOMEM; 814 } 815 raid_cfg->strip_size = strip_size; 816 raid_cfg->num_base_bdevs = num_base_bdevs; 817 raid_cfg->level = level; 818 819 raid_cfg->base_bdev = calloc(num_base_bdevs, sizeof(*raid_cfg->base_bdev)); 820 if (raid_cfg->base_bdev == NULL) { 821 free(raid_cfg->name); 822 free(raid_cfg); 823 SPDK_ERRLOG("unable to allocate memory\n"); 824 return -ENOMEM; 825 } 826 827 TAILQ_INSERT_TAIL(&g_raid_config.raid_bdev_config_head, raid_cfg, link); 828 g_raid_config.total_raid_bdev++; 829 830 *_raid_cfg = raid_cfg; 831 return 0; 832 } 833 834 /* 835 * brief: 836 * raid_bdev_config_add_base_bdev function add base bdev to raid bdev config. 837 * 838 * params: 839 * raid_cfg - pointer to raid bdev configuration 840 * base_bdev_name - name of base bdev 841 * slot - Position to add base bdev 842 */ 843 int 844 raid_bdev_config_add_base_bdev(struct raid_bdev_config *raid_cfg, const char *base_bdev_name, 845 uint8_t slot) 846 { 847 uint8_t i; 848 struct raid_bdev_config *tmp; 849 850 if (slot >= raid_cfg->num_base_bdevs) { 851 return -EINVAL; 852 } 853 854 TAILQ_FOREACH(tmp, &g_raid_config.raid_bdev_config_head, link) { 855 for (i = 0; i < tmp->num_base_bdevs; i++) { 856 if (tmp->base_bdev[i].name != NULL) { 857 if (!strcmp(tmp->base_bdev[i].name, base_bdev_name)) { 858 SPDK_ERRLOG("duplicate base bdev name %s mentioned\n", 859 base_bdev_name); 860 return -EEXIST; 861 } 862 } 863 } 864 } 865 866 raid_cfg->base_bdev[slot].name = strdup(base_bdev_name); 867 if (raid_cfg->base_bdev[slot].name == NULL) { 868 SPDK_ERRLOG("unable to allocate memory\n"); 869 return -ENOMEM; 870 } 871 872 return 0; 873 } 874 875 static struct { 876 const char *name; 877 enum raid_level value; 878 } g_raid_level_names[] = { 879 { "raid0", RAID0 }, 880 { "0", RAID0 }, 881 { "raid5", RAID5 }, 882 { "5", RAID5 }, 883 { "concat", CONCAT }, 884 { } 885 }; 886 887 enum raid_level raid_bdev_parse_raid_level(const char *str) 888 { 889 unsigned int i; 890 891 assert(str != NULL); 892 893 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 894 if (strcasecmp(g_raid_level_names[i].name, str) == 0) { 895 return g_raid_level_names[i].value; 896 } 897 } 898 899 return INVALID_RAID_LEVEL; 900 } 901 902 const char * 903 raid_bdev_level_to_str(enum raid_level level) 904 { 905 unsigned int i; 906 907 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 908 if (g_raid_level_names[i].value == level) { 909 return g_raid_level_names[i].name; 910 } 911 } 912 913 return ""; 914 } 915 916 /* 917 * brief: 918 * raid_bdev_fini_start is called when bdev layer is starting the 919 * shutdown process 920 * params: 921 * none 922 * returns: 923 * none 924 */ 925 static void 926 raid_bdev_fini_start(void) 927 { 928 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_fini_start\n"); 929 g_shutdown_started = true; 930 } 931 932 /* 933 * brief: 934 * raid_bdev_exit is called on raid bdev module exit time by bdev layer 935 * params: 936 * none 937 * returns: 938 * none 939 */ 940 static void 941 raid_bdev_exit(void) 942 { 943 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_exit\n"); 944 raid_bdev_free(); 945 } 946 947 /* 948 * brief: 949 * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid 950 * module 951 * params: 952 * none 953 * returns: 954 * size of spdk_bdev_io context for raid 955 */ 956 static int 957 raid_bdev_get_ctx_size(void) 958 { 959 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_get_ctx_size\n"); 960 return sizeof(struct raid_bdev_io); 961 } 962 963 /* 964 * brief: 965 * raid_bdev_can_claim_bdev is the function to check if this base_bdev can be 966 * claimed by raid bdev or not. 967 * params: 968 * bdev_name - represents base bdev name 969 * _raid_cfg - pointer to raid bdev config parsed from config file 970 * base_bdev_slot - if bdev can be claimed, it represents the base_bdev correct 971 * slot. This field is only valid if return value of this function is true 972 * returns: 973 * true - if bdev can be claimed 974 * false - if bdev can't be claimed 975 */ 976 static bool 977 raid_bdev_can_claim_bdev(const char *bdev_name, struct raid_bdev_config **_raid_cfg, 978 uint8_t *base_bdev_slot) 979 { 980 struct raid_bdev_config *raid_cfg; 981 uint8_t i; 982 983 TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) { 984 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 985 /* 986 * Check if the base bdev name is part of raid bdev configuration. 987 * If match is found then return true and the slot information where 988 * this base bdev should be inserted in raid bdev 989 */ 990 if (!strcmp(bdev_name, raid_cfg->base_bdev[i].name)) { 991 *_raid_cfg = raid_cfg; 992 *base_bdev_slot = i; 993 return true; 994 } 995 } 996 } 997 998 return false; 999 } 1000 1001 1002 static struct spdk_bdev_module g_raid_if = { 1003 .name = "raid", 1004 .module_init = raid_bdev_init, 1005 .fini_start = raid_bdev_fini_start, 1006 .module_fini = raid_bdev_exit, 1007 .get_ctx_size = raid_bdev_get_ctx_size, 1008 .examine_config = raid_bdev_examine, 1009 .async_init = false, 1010 .async_fini = false, 1011 }; 1012 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if) 1013 1014 /* 1015 * brief: 1016 * raid_bdev_init is the initialization function for raid bdev module 1017 * params: 1018 * none 1019 * returns: 1020 * 0 - success 1021 * non zero - failure 1022 */ 1023 static int 1024 raid_bdev_init(void) 1025 { 1026 return 0; 1027 } 1028 1029 /* 1030 * brief: 1031 * raid_bdev_create allocates raid bdev based on passed configuration 1032 * params: 1033 * raid_cfg - configuration of raid bdev 1034 * returns: 1035 * 0 - success 1036 * non zero - failure 1037 */ 1038 int 1039 raid_bdev_create(struct raid_bdev_config *raid_cfg) 1040 { 1041 struct raid_bdev *raid_bdev; 1042 struct spdk_bdev *raid_bdev_gen; 1043 struct raid_bdev_module *module; 1044 1045 module = raid_bdev_module_find(raid_cfg->level); 1046 if (module == NULL) { 1047 SPDK_ERRLOG("Unsupported raid level '%d'\n", raid_cfg->level); 1048 return -EINVAL; 1049 } 1050 1051 assert(module->base_bdevs_min != 0); 1052 if (raid_cfg->num_base_bdevs < module->base_bdevs_min) { 1053 SPDK_ERRLOG("At least %u base devices required for %s\n", 1054 module->base_bdevs_min, 1055 raid_bdev_level_to_str(raid_cfg->level)); 1056 return -EINVAL; 1057 } 1058 1059 raid_bdev = calloc(1, sizeof(*raid_bdev)); 1060 if (!raid_bdev) { 1061 SPDK_ERRLOG("Unable to allocate memory for raid bdev\n"); 1062 return -ENOMEM; 1063 } 1064 1065 raid_bdev->module = module; 1066 raid_bdev->num_base_bdevs = raid_cfg->num_base_bdevs; 1067 raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, 1068 sizeof(struct raid_base_bdev_info)); 1069 if (!raid_bdev->base_bdev_info) { 1070 SPDK_ERRLOG("Unable able to allocate base bdev info\n"); 1071 free(raid_bdev); 1072 return -ENOMEM; 1073 } 1074 1075 /* strip_size_kb is from the rpc param. strip_size is in blocks and used 1076 * internally and set later. 1077 */ 1078 raid_bdev->strip_size = 0; 1079 raid_bdev->strip_size_kb = raid_cfg->strip_size; 1080 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1081 raid_bdev->config = raid_cfg; 1082 raid_bdev->level = raid_cfg->level; 1083 1084 raid_bdev_gen = &raid_bdev->bdev; 1085 1086 raid_bdev_gen->name = strdup(raid_cfg->name); 1087 if (!raid_bdev_gen->name) { 1088 SPDK_ERRLOG("Unable to allocate name for raid\n"); 1089 free(raid_bdev->base_bdev_info); 1090 free(raid_bdev); 1091 return -ENOMEM; 1092 } 1093 1094 raid_bdev_gen->product_name = "Raid Volume"; 1095 raid_bdev_gen->ctxt = raid_bdev; 1096 raid_bdev_gen->fn_table = &g_raid_bdev_fn_table; 1097 raid_bdev_gen->module = &g_raid_if; 1098 raid_bdev_gen->write_cache = 0; 1099 1100 TAILQ_INSERT_TAIL(&g_raid_bdev_configuring_list, raid_bdev, state_link); 1101 TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link); 1102 1103 raid_cfg->raid_bdev = raid_bdev; 1104 1105 return 0; 1106 } 1107 1108 /* 1109 * brief 1110 * raid_bdev_alloc_base_bdev_resource allocates resource of base bdev. 1111 * params: 1112 * raid_bdev - pointer to raid bdev 1113 * bdev_name - base bdev name 1114 * base_bdev_slot - position to add base bdev 1115 * returns: 1116 * 0 - success 1117 * non zero - failure 1118 */ 1119 static int 1120 raid_bdev_alloc_base_bdev_resource(struct raid_bdev *raid_bdev, const char *bdev_name, 1121 uint8_t base_bdev_slot) 1122 { 1123 struct spdk_bdev_desc *desc; 1124 struct spdk_bdev *bdev; 1125 int rc; 1126 1127 rc = spdk_bdev_open_ext(bdev_name, true, raid_bdev_event_base_bdev, NULL, &desc); 1128 if (rc != 0) { 1129 if (rc != -ENODEV) { 1130 SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", bdev_name); 1131 } 1132 return rc; 1133 } 1134 1135 bdev = spdk_bdev_desc_get_bdev(desc); 1136 1137 rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if); 1138 if (rc != 0) { 1139 SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); 1140 spdk_bdev_close(desc); 1141 return rc; 1142 } 1143 1144 SPDK_DEBUGLOG(bdev_raid, "bdev %s is claimed\n", bdev_name); 1145 1146 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 1147 assert(base_bdev_slot < raid_bdev->num_base_bdevs); 1148 1149 raid_bdev->base_bdev_info[base_bdev_slot].thread = spdk_get_thread(); 1150 raid_bdev->base_bdev_info[base_bdev_slot].bdev = bdev; 1151 raid_bdev->base_bdev_info[base_bdev_slot].desc = desc; 1152 raid_bdev->num_base_bdevs_discovered++; 1153 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1154 1155 return 0; 1156 } 1157 1158 /* 1159 * brief: 1160 * If raid bdev config is complete, then only register the raid bdev to 1161 * bdev layer and remove this raid bdev from configuring list and 1162 * insert the raid bdev to configured list 1163 * params: 1164 * raid_bdev - pointer to raid bdev 1165 * returns: 1166 * 0 - success 1167 * non zero - failure 1168 */ 1169 static int 1170 raid_bdev_configure(struct raid_bdev *raid_bdev) 1171 { 1172 uint32_t blocklen = 0; 1173 struct spdk_bdev *raid_bdev_gen; 1174 struct raid_base_bdev_info *base_info; 1175 int rc = 0; 1176 1177 assert(raid_bdev->state == RAID_BDEV_STATE_CONFIGURING); 1178 assert(raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs); 1179 1180 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1181 /* Check blocklen for all base bdevs that it should be same */ 1182 if (blocklen == 0) { 1183 blocklen = base_info->bdev->blocklen; 1184 } else if (blocklen != base_info->bdev->blocklen) { 1185 /* 1186 * Assumption is that all the base bdevs for any raid bdev should 1187 * have same blocklen 1188 */ 1189 SPDK_ERRLOG("Blocklen of various bdevs not matching\n"); 1190 return -EINVAL; 1191 } 1192 } 1193 assert(blocklen > 0); 1194 1195 /* The strip_size_kb is read in from user in KB. Convert to blocks here for 1196 * internal use. 1197 */ 1198 raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / blocklen; 1199 raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size); 1200 raid_bdev->blocklen_shift = spdk_u32log2(blocklen); 1201 1202 raid_bdev_gen = &raid_bdev->bdev; 1203 raid_bdev_gen->blocklen = blocklen; 1204 1205 rc = raid_bdev->module->start(raid_bdev); 1206 if (rc != 0) { 1207 SPDK_ERRLOG("raid module startup callback failed\n"); 1208 return rc; 1209 } 1210 raid_bdev->state = RAID_BDEV_STATE_ONLINE; 1211 SPDK_DEBUGLOG(bdev_raid, "io device register %p\n", raid_bdev); 1212 SPDK_DEBUGLOG(bdev_raid, "blockcnt %" PRIu64 ", blocklen %u\n", 1213 raid_bdev_gen->blockcnt, raid_bdev_gen->blocklen); 1214 spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb, 1215 sizeof(struct raid_bdev_io_channel), 1216 raid_bdev->bdev.name); 1217 rc = spdk_bdev_register(raid_bdev_gen); 1218 if (rc != 0) { 1219 SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n"); 1220 if (raid_bdev->module->stop != NULL) { 1221 raid_bdev->module->stop(raid_bdev); 1222 } 1223 spdk_io_device_unregister(raid_bdev, NULL); 1224 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1225 return rc; 1226 } 1227 SPDK_DEBUGLOG(bdev_raid, "raid bdev generic %p\n", raid_bdev_gen); 1228 TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link); 1229 TAILQ_INSERT_TAIL(&g_raid_bdev_configured_list, raid_bdev, state_link); 1230 SPDK_DEBUGLOG(bdev_raid, "raid bdev is created with name %s, raid_bdev %p\n", 1231 raid_bdev_gen->name, raid_bdev); 1232 1233 return 0; 1234 } 1235 1236 /* 1237 * brief: 1238 * If raid bdev is online and registered, change the bdev state to 1239 * configuring and unregister this raid device. Queue this raid device 1240 * in configuring list 1241 * params: 1242 * raid_bdev - pointer to raid bdev 1243 * cb_fn - callback function 1244 * cb_arg - argument to callback function 1245 * returns: 1246 * none 1247 */ 1248 static void 1249 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, 1250 void *cb_arg) 1251 { 1252 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1253 if (cb_fn) { 1254 cb_fn(cb_arg, 0); 1255 } 1256 return; 1257 } 1258 1259 assert(raid_bdev->num_base_bdevs == raid_bdev->num_base_bdevs_discovered); 1260 TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link); 1261 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 1262 assert(raid_bdev->num_base_bdevs_discovered); 1263 TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link); 1264 SPDK_DEBUGLOG(bdev_raid, "raid bdev state changing from online to offline\n"); 1265 1266 spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg); 1267 } 1268 1269 /* 1270 * brief: 1271 * raid_bdev_find_by_base_bdev function finds the raid bdev which has 1272 * claimed the base bdev. 1273 * params: 1274 * base_bdev - pointer to base bdev pointer 1275 * _raid_bdev - Reference to pointer to raid bdev 1276 * _base_info - Reference to the raid base bdev info. 1277 * returns: 1278 * true - if the raid bdev is found. 1279 * false - if the raid bdev is not found. 1280 */ 1281 static bool 1282 raid_bdev_find_by_base_bdev(struct spdk_bdev *base_bdev, struct raid_bdev **_raid_bdev, 1283 struct raid_base_bdev_info **_base_info) 1284 { 1285 struct raid_bdev *raid_bdev; 1286 struct raid_base_bdev_info *base_info; 1287 1288 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1289 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1290 if (base_info->bdev == base_bdev) { 1291 *_raid_bdev = raid_bdev; 1292 *_base_info = base_info; 1293 return true; 1294 } 1295 } 1296 } 1297 1298 return false; 1299 } 1300 1301 /* 1302 * brief: 1303 * raid_bdev_remove_base_bdev function is called by below layers when base_bdev 1304 * is removed. This function checks if this base bdev is part of any raid bdev 1305 * or not. If yes, it takes necessary action on that particular raid bdev. 1306 * params: 1307 * base_bdev - pointer to base bdev pointer which got removed 1308 * returns: 1309 * none 1310 */ 1311 static void 1312 raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev) 1313 { 1314 struct raid_bdev *raid_bdev = NULL; 1315 struct raid_base_bdev_info *base_info; 1316 1317 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_remove_base_bdev\n"); 1318 1319 /* Find the raid_bdev which has claimed this base_bdev */ 1320 if (!raid_bdev_find_by_base_bdev(base_bdev, &raid_bdev, &base_info)) { 1321 SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); 1322 return; 1323 } 1324 1325 assert(base_info->desc); 1326 base_info->remove_scheduled = true; 1327 1328 if (raid_bdev->destruct_called == true || 1329 raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 1330 /* 1331 * As raid bdev is not registered yet or already unregistered, 1332 * so cleanup should be done here itself. 1333 */ 1334 raid_bdev_free_base_bdev_resource(raid_bdev, base_info); 1335 if (raid_bdev->num_base_bdevs_discovered == 0) { 1336 /* There is no base bdev for this raid, so free the raid device. */ 1337 raid_bdev_cleanup(raid_bdev); 1338 return; 1339 } 1340 } 1341 1342 raid_bdev_deconfigure(raid_bdev, NULL, NULL); 1343 } 1344 1345 /* 1346 * brief: 1347 * raid_bdev_event_base_bdev function is called by below layers when base_bdev 1348 * triggers asynchronous event. 1349 * params: 1350 * type - event details. 1351 * bdev - bdev that triggered event. 1352 * event_ctx - context for event. 1353 * returns: 1354 * none 1355 */ 1356 static void 1357 raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 1358 void *event_ctx) 1359 { 1360 switch (type) { 1361 case SPDK_BDEV_EVENT_REMOVE: 1362 raid_bdev_remove_base_bdev(bdev); 1363 break; 1364 default: 1365 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 1366 break; 1367 } 1368 } 1369 1370 /* 1371 * brief: 1372 * Remove base bdevs from the raid bdev one by one. Skip any base bdev which 1373 * doesn't exist. 1374 * params: 1375 * raid_cfg - pointer to raid bdev config. 1376 * cb_fn - callback function 1377 * cb_ctx - argument to callback function 1378 */ 1379 void 1380 raid_bdev_remove_base_devices(struct raid_bdev_config *raid_cfg, 1381 raid_bdev_destruct_cb cb_fn, void *cb_arg) 1382 { 1383 struct raid_bdev *raid_bdev; 1384 struct raid_base_bdev_info *base_info; 1385 1386 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_remove_base_devices\n"); 1387 1388 raid_bdev = raid_cfg->raid_bdev; 1389 if (raid_bdev == NULL) { 1390 SPDK_DEBUGLOG(bdev_raid, "raid bdev %s doesn't exist now\n", raid_cfg->name); 1391 if (cb_fn) { 1392 cb_fn(cb_arg, 0); 1393 } 1394 return; 1395 } 1396 1397 if (raid_bdev->destroy_started) { 1398 SPDK_DEBUGLOG(bdev_raid, "destroying raid bdev %s is already started\n", 1399 raid_cfg->name); 1400 if (cb_fn) { 1401 cb_fn(cb_arg, -EALREADY); 1402 } 1403 return; 1404 } 1405 1406 raid_bdev->destroy_started = true; 1407 1408 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1409 if (base_info->bdev == NULL) { 1410 continue; 1411 } 1412 1413 assert(base_info->desc); 1414 base_info->remove_scheduled = true; 1415 1416 if (raid_bdev->destruct_called == true || 1417 raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 1418 /* 1419 * As raid bdev is not registered yet or already unregistered, 1420 * so cleanup should be done here itself. 1421 */ 1422 raid_bdev_free_base_bdev_resource(raid_bdev, base_info); 1423 } 1424 } 1425 1426 if (raid_bdev->num_base_bdevs_discovered == 0) { 1427 /* There is no base bdev for this raid, so free the raid device. */ 1428 raid_bdev_cleanup(raid_bdev); 1429 if (cb_fn) { 1430 cb_fn(cb_arg, 0); 1431 } 1432 } else { 1433 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg); 1434 } 1435 } 1436 1437 /* 1438 * brief: 1439 * raid_bdev_add_base_device function is the actual function which either adds 1440 * the nvme base device to existing raid bdev or create a new raid bdev. It also claims 1441 * the base device and keep the open descriptor. 1442 * params: 1443 * raid_cfg - pointer to raid bdev config 1444 * bdev - pointer to base bdev 1445 * base_bdev_slot - position to add base bdev 1446 * returns: 1447 * 0 - success 1448 * non zero - failure 1449 */ 1450 static int 1451 raid_bdev_add_base_device(struct raid_bdev_config *raid_cfg, const char *bdev_name, 1452 uint8_t base_bdev_slot) 1453 { 1454 struct raid_bdev *raid_bdev; 1455 int rc; 1456 1457 raid_bdev = raid_cfg->raid_bdev; 1458 if (!raid_bdev) { 1459 SPDK_ERRLOG("Raid bdev '%s' is not created yet\n", raid_cfg->name); 1460 return -ENODEV; 1461 } 1462 1463 rc = raid_bdev_alloc_base_bdev_resource(raid_bdev, bdev_name, base_bdev_slot); 1464 if (rc != 0) { 1465 if (rc != -ENODEV) { 1466 SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", bdev_name); 1467 } 1468 return rc; 1469 } 1470 1471 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1472 1473 if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) { 1474 rc = raid_bdev_configure(raid_bdev); 1475 if (rc != 0) { 1476 SPDK_ERRLOG("Failed to configure raid bdev\n"); 1477 return rc; 1478 } 1479 } 1480 1481 return 0; 1482 } 1483 1484 /* 1485 * brief: 1486 * Add base bdevs to the raid bdev one by one. Skip any base bdev which doesn't 1487 * exist or fails to add. If all base bdevs are successfully added, the raid bdev 1488 * moves to the configured state and becomes available. Otherwise, the raid bdev 1489 * stays at the configuring state with added base bdevs. 1490 * params: 1491 * raid_cfg - pointer to raid bdev config 1492 * returns: 1493 * 0 - The raid bdev moves to the configured state or stays at the configuring 1494 * state with added base bdevs due to any nonexistent base bdev. 1495 * non zero - Failed to add any base bdev and stays at the configuring state with 1496 * added base bdevs. 1497 */ 1498 int 1499 raid_bdev_add_base_devices(struct raid_bdev_config *raid_cfg) 1500 { 1501 uint8_t i; 1502 int rc = 0, _rc; 1503 1504 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 1505 _rc = raid_bdev_add_base_device(raid_cfg, raid_cfg->base_bdev[i].name, i); 1506 if (_rc == -ENODEV) { 1507 SPDK_DEBUGLOG(bdev_raid, "base bdev %s doesn't exist now\n", 1508 raid_cfg->base_bdev[i].name); 1509 } else if (_rc != 0) { 1510 SPDK_ERRLOG("Failed to add base bdev %s to RAID bdev %s: %s\n", 1511 raid_cfg->base_bdev[i].name, raid_cfg->name, 1512 spdk_strerror(-_rc)); 1513 if (rc == 0) { 1514 rc = _rc; 1515 } 1516 } 1517 } 1518 1519 return rc; 1520 } 1521 1522 /* 1523 * brief: 1524 * raid_bdev_examine function is the examine function call by the below layers 1525 * like bdev_nvme layer. This function will check if this base bdev can be 1526 * claimed by this raid bdev or not. 1527 * params: 1528 * bdev - pointer to base bdev 1529 * returns: 1530 * none 1531 */ 1532 static void 1533 raid_bdev_examine(struct spdk_bdev *bdev) 1534 { 1535 struct raid_bdev_config *raid_cfg; 1536 uint8_t base_bdev_slot; 1537 1538 if (raid_bdev_can_claim_bdev(bdev->name, &raid_cfg, &base_bdev_slot)) { 1539 raid_bdev_add_base_device(raid_cfg, bdev->name, base_bdev_slot); 1540 } else { 1541 SPDK_DEBUGLOG(bdev_raid, "bdev %s can't be claimed\n", 1542 bdev->name); 1543 } 1544 1545 spdk_bdev_module_examine_done(&g_raid_if); 1546 } 1547 1548 /* Log component for bdev raid bdev module */ 1549 SPDK_LOG_REGISTER_COMPONENT(bdev_raid) 1550