1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Intel Corporation. 3 * All rights reserved. 4 * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "bdev_raid.h" 8 #include "spdk/env.h" 9 #include "spdk/thread.h" 10 #include "spdk/log.h" 11 #include "spdk/string.h" 12 #include "spdk/util.h" 13 #include "spdk/json.h" 14 #include "spdk/string.h" 15 16 static bool g_shutdown_started = false; 17 18 /* raid bdev config as read from config file */ 19 struct raid_config g_raid_config = { 20 .raid_bdev_config_head = TAILQ_HEAD_INITIALIZER(g_raid_config.raid_bdev_config_head), 21 }; 22 23 /* 24 * List of raid bdev in configured list, these raid bdevs are registered with 25 * bdev layer 26 */ 27 struct raid_configured_tailq g_raid_bdev_configured_list = TAILQ_HEAD_INITIALIZER( 28 g_raid_bdev_configured_list); 29 30 /* List of raid bdev in configuring list */ 31 struct raid_configuring_tailq g_raid_bdev_configuring_list = TAILQ_HEAD_INITIALIZER( 32 g_raid_bdev_configuring_list); 33 34 /* List of all raid bdevs */ 35 struct raid_all_tailq g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list); 36 37 /* List of all raid bdevs that are offline */ 38 struct raid_offline_tailq g_raid_bdev_offline_list = TAILQ_HEAD_INITIALIZER( 39 g_raid_bdev_offline_list); 40 41 static TAILQ_HEAD(, raid_bdev_module) g_raid_modules = TAILQ_HEAD_INITIALIZER(g_raid_modules); 42 43 static struct raid_bdev_module * 44 raid_bdev_module_find(enum raid_level level) 45 { 46 struct raid_bdev_module *raid_module; 47 48 TAILQ_FOREACH(raid_module, &g_raid_modules, link) { 49 if (raid_module->level == level) { 50 return raid_module; 51 } 52 } 53 54 return NULL; 55 } 56 57 void 58 raid_bdev_module_list_add(struct raid_bdev_module *raid_module) 59 { 60 if (raid_bdev_module_find(raid_module->level) != NULL) { 61 SPDK_ERRLOG("module for raid level '%s' already registered.\n", 62 raid_bdev_level_to_str(raid_module->level)); 63 assert(false); 64 } else { 65 TAILQ_INSERT_TAIL(&g_raid_modules, raid_module, link); 66 } 67 } 68 69 /* Function declarations */ 70 static void raid_bdev_examine(struct spdk_bdev *bdev); 71 static int raid_bdev_init(void); 72 static void raid_bdev_deconfigure(struct raid_bdev *raid_bdev, 73 raid_bdev_destruct_cb cb_fn, void *cb_arg); 74 static void raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 75 void *event_ctx); 76 77 /* 78 * brief: 79 * raid_bdev_create_cb function is a cb function for raid bdev which creates the 80 * hierarchy from raid bdev to base bdev io channels. It will be called per core 81 * params: 82 * io_device - pointer to raid bdev io device represented by raid_bdev 83 * ctx_buf - pointer to context buffer for raid bdev io channel 84 * returns: 85 * 0 - success 86 * non zero - failure 87 */ 88 static int 89 raid_bdev_create_cb(void *io_device, void *ctx_buf) 90 { 91 struct raid_bdev *raid_bdev = io_device; 92 struct raid_bdev_io_channel *raid_ch = ctx_buf; 93 uint8_t i; 94 95 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_create_cb, %p\n", raid_ch); 96 97 assert(raid_bdev != NULL); 98 assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); 99 100 raid_ch->num_channels = raid_bdev->num_base_bdevs; 101 102 raid_ch->base_channel = calloc(raid_ch->num_channels, 103 sizeof(struct spdk_io_channel *)); 104 if (!raid_ch->base_channel) { 105 SPDK_ERRLOG("Unable to allocate base bdevs io channel\n"); 106 return -ENOMEM; 107 } 108 for (i = 0; i < raid_ch->num_channels; i++) { 109 /* 110 * Get the spdk_io_channel for all the base bdevs. This is used during 111 * split logic to send the respective child bdev ios to respective base 112 * bdev io channel. 113 */ 114 raid_ch->base_channel[i] = spdk_bdev_get_io_channel( 115 raid_bdev->base_bdev_info[i].desc); 116 if (!raid_ch->base_channel[i]) { 117 uint8_t j; 118 119 for (j = 0; j < i; j++) { 120 spdk_put_io_channel(raid_ch->base_channel[j]); 121 } 122 free(raid_ch->base_channel); 123 raid_ch->base_channel = NULL; 124 SPDK_ERRLOG("Unable to create io channel for base bdev\n"); 125 return -ENOMEM; 126 } 127 } 128 129 return 0; 130 } 131 132 /* 133 * brief: 134 * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the 135 * hierarchy from raid bdev to base bdev io channels. It will be called per core 136 * params: 137 * io_device - pointer to raid bdev io device represented by raid_bdev 138 * ctx_buf - pointer to context buffer for raid bdev io channel 139 * returns: 140 * none 141 */ 142 static void 143 raid_bdev_destroy_cb(void *io_device, void *ctx_buf) 144 { 145 struct raid_bdev_io_channel *raid_ch = ctx_buf; 146 uint8_t i; 147 148 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destroy_cb\n"); 149 150 assert(raid_ch != NULL); 151 assert(raid_ch->base_channel); 152 for (i = 0; i < raid_ch->num_channels; i++) { 153 /* Free base bdev channels */ 154 assert(raid_ch->base_channel[i] != NULL); 155 spdk_put_io_channel(raid_ch->base_channel[i]); 156 } 157 free(raid_ch->base_channel); 158 raid_ch->base_channel = NULL; 159 } 160 161 /* 162 * brief: 163 * raid_bdev_cleanup is used to cleanup and free raid_bdev related data 164 * structures. 165 * params: 166 * raid_bdev - pointer to raid_bdev 167 * returns: 168 * none 169 */ 170 static void 171 raid_bdev_cleanup(struct raid_bdev *raid_bdev) 172 { 173 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_cleanup, %p name %s, state %u, config %p\n", 174 raid_bdev, 175 raid_bdev->bdev.name, raid_bdev->state, raid_bdev->config); 176 if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 177 TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link); 178 } else if (raid_bdev->state == RAID_BDEV_STATE_OFFLINE) { 179 TAILQ_REMOVE(&g_raid_bdev_offline_list, raid_bdev, state_link); 180 } else { 181 assert(0); 182 } 183 TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link); 184 free(raid_bdev->bdev.name); 185 free(raid_bdev->base_bdev_info); 186 if (raid_bdev->config) { 187 raid_bdev->config->raid_bdev = NULL; 188 } 189 free(raid_bdev); 190 } 191 192 /* 193 * brief: 194 * wrapper for the bdev close operation 195 * params: 196 * base_info - raid base bdev info 197 * returns: 198 */ 199 static void 200 _raid_bdev_free_base_bdev_resource(void *ctx) 201 { 202 struct spdk_bdev_desc *desc = ctx; 203 204 spdk_bdev_close(desc); 205 } 206 207 208 /* 209 * brief: 210 * free resource of base bdev for raid bdev 211 * params: 212 * raid_bdev - pointer to raid bdev 213 * base_info - raid base bdev info 214 * returns: 215 * 0 - success 216 * non zero - failure 217 */ 218 static void 219 raid_bdev_free_base_bdev_resource(struct raid_bdev *raid_bdev, 220 struct raid_base_bdev_info *base_info) 221 { 222 spdk_bdev_module_release_bdev(base_info->bdev); 223 if (base_info->thread && base_info->thread != spdk_get_thread()) { 224 spdk_thread_send_msg(base_info->thread, _raid_bdev_free_base_bdev_resource, base_info->desc); 225 } else { 226 spdk_bdev_close(base_info->desc); 227 } 228 base_info->desc = NULL; 229 base_info->bdev = NULL; 230 231 assert(raid_bdev->num_base_bdevs_discovered); 232 raid_bdev->num_base_bdevs_discovered--; 233 } 234 235 /* 236 * brief: 237 * raid_bdev_destruct is the destruct function table pointer for raid bdev 238 * params: 239 * ctxt - pointer to raid_bdev 240 * returns: 241 * 0 - success 242 * non zero - failure 243 */ 244 static int 245 raid_bdev_destruct(void *ctxt) 246 { 247 struct raid_bdev *raid_bdev = ctxt; 248 struct raid_base_bdev_info *base_info; 249 250 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destruct\n"); 251 252 raid_bdev->destruct_called = true; 253 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 254 /* 255 * Close all base bdev descriptors for which call has come from below 256 * layers. Also close the descriptors if we have started shutdown. 257 */ 258 if (g_shutdown_started || 259 ((base_info->remove_scheduled == true) && 260 (base_info->bdev != NULL))) { 261 raid_bdev_free_base_bdev_resource(raid_bdev, base_info); 262 } 263 } 264 265 if (g_shutdown_started) { 266 TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link); 267 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 268 TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link); 269 } 270 271 if (raid_bdev->module->stop != NULL) { 272 raid_bdev->module->stop(raid_bdev); 273 } 274 275 spdk_io_device_unregister(raid_bdev, NULL); 276 277 if (raid_bdev->num_base_bdevs_discovered == 0) { 278 /* Free raid_bdev when there are no base bdevs left */ 279 SPDK_DEBUGLOG(bdev_raid, "raid bdev base bdevs is 0, going to free all in destruct\n"); 280 raid_bdev_cleanup(raid_bdev); 281 } 282 283 return 0; 284 } 285 286 void 287 raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status) 288 { 289 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 290 291 spdk_bdev_io_complete(bdev_io, status); 292 } 293 294 /* 295 * brief: 296 * raid_bdev_io_complete_part - signal the completion of a part of the expected 297 * base bdev IOs and complete the raid_io if this is the final expected IO. 298 * The caller should first set raid_io->base_bdev_io_remaining. This function 299 * will decrement this counter by the value of the 'completed' parameter and 300 * complete the raid_io if the counter reaches 0. The caller is free to 301 * interpret the 'base_bdev_io_remaining' and 'completed' values as needed, 302 * it can represent e.g. blocks or IOs. 303 * params: 304 * raid_io - pointer to raid_bdev_io 305 * completed - the part of the raid_io that has been completed 306 * status - status of the base IO 307 * returns: 308 * true - if the raid_io is completed 309 * false - otherwise 310 */ 311 bool 312 raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed, 313 enum spdk_bdev_io_status status) 314 { 315 assert(raid_io->base_bdev_io_remaining >= completed); 316 raid_io->base_bdev_io_remaining -= completed; 317 318 if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { 319 raid_io->base_bdev_io_status = status; 320 } 321 322 if (raid_io->base_bdev_io_remaining == 0) { 323 raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); 324 return true; 325 } else { 326 return false; 327 } 328 } 329 330 /* 331 * brief: 332 * raid_bdev_queue_io_wait function processes the IO which failed to submit. 333 * It will try to queue the IOs after storing the context to bdev wait queue logic. 334 * params: 335 * raid_io - pointer to raid_bdev_io 336 * bdev - the block device that the IO is submitted to 337 * ch - io channel 338 * cb_fn - callback when the spdk_bdev_io for bdev becomes available 339 * returns: 340 * none 341 */ 342 void 343 raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, 344 struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn) 345 { 346 raid_io->waitq_entry.bdev = bdev; 347 raid_io->waitq_entry.cb_fn = cb_fn; 348 raid_io->waitq_entry.cb_arg = raid_io; 349 spdk_bdev_queue_io_wait(bdev, ch, &raid_io->waitq_entry); 350 } 351 352 static void 353 raid_base_bdev_reset_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 354 { 355 struct raid_bdev_io *raid_io = cb_arg; 356 357 spdk_bdev_free_io(bdev_io); 358 359 raid_bdev_io_complete_part(raid_io, 1, success ? 360 SPDK_BDEV_IO_STATUS_SUCCESS : 361 SPDK_BDEV_IO_STATUS_FAILED); 362 } 363 364 static void raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io); 365 366 static void 367 _raid_bdev_submit_reset_request(void *_raid_io) 368 { 369 struct raid_bdev_io *raid_io = _raid_io; 370 371 raid_bdev_submit_reset_request(raid_io); 372 } 373 374 /* 375 * brief: 376 * raid_bdev_submit_reset_request function submits reset requests 377 * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in 378 * which case it will queue it for later submission 379 * params: 380 * raid_io 381 * returns: 382 * none 383 */ 384 static void 385 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io) 386 { 387 struct raid_bdev *raid_bdev; 388 int ret; 389 uint8_t i; 390 struct raid_base_bdev_info *base_info; 391 struct spdk_io_channel *base_ch; 392 393 raid_bdev = raid_io->raid_bdev; 394 395 if (raid_io->base_bdev_io_remaining == 0) { 396 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; 397 } 398 399 while (raid_io->base_bdev_io_submitted < raid_bdev->num_base_bdevs) { 400 i = raid_io->base_bdev_io_submitted; 401 base_info = &raid_bdev->base_bdev_info[i]; 402 base_ch = raid_io->raid_ch->base_channel[i]; 403 ret = spdk_bdev_reset(base_info->desc, base_ch, 404 raid_base_bdev_reset_complete, raid_io); 405 if (ret == 0) { 406 raid_io->base_bdev_io_submitted++; 407 } else if (ret == -ENOMEM) { 408 raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, 409 _raid_bdev_submit_reset_request); 410 return; 411 } else { 412 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 413 assert(false); 414 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 415 return; 416 } 417 } 418 } 419 420 /* 421 * brief: 422 * Callback function to spdk_bdev_io_get_buf. 423 * params: 424 * ch - pointer to raid bdev io channel 425 * bdev_io - pointer to parent bdev_io on raid bdev device 426 * success - True if buffer is allocated or false otherwise. 427 * returns: 428 * none 429 */ 430 static void 431 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 432 bool success) 433 { 434 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 435 436 if (!success) { 437 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 438 return; 439 } 440 441 raid_io->raid_bdev->module->submit_rw_request(raid_io); 442 } 443 444 /* 445 * brief: 446 * raid_bdev_submit_request function is the submit_request function pointer of 447 * raid bdev function table. This is used to submit the io on raid_bdev to below 448 * layers. 449 * params: 450 * ch - pointer to raid bdev io channel 451 * bdev_io - pointer to parent bdev_io on raid bdev device 452 * returns: 453 * none 454 */ 455 static void 456 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 457 { 458 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 459 460 raid_io->raid_bdev = bdev_io->bdev->ctxt; 461 raid_io->raid_ch = spdk_io_channel_get_ctx(ch); 462 raid_io->base_bdev_io_remaining = 0; 463 raid_io->base_bdev_io_submitted = 0; 464 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 465 466 switch (bdev_io->type) { 467 case SPDK_BDEV_IO_TYPE_READ: 468 spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb, 469 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 470 break; 471 case SPDK_BDEV_IO_TYPE_WRITE: 472 raid_io->raid_bdev->module->submit_rw_request(raid_io); 473 break; 474 475 case SPDK_BDEV_IO_TYPE_RESET: 476 raid_bdev_submit_reset_request(raid_io); 477 break; 478 479 case SPDK_BDEV_IO_TYPE_FLUSH: 480 case SPDK_BDEV_IO_TYPE_UNMAP: 481 raid_io->raid_bdev->module->submit_null_payload_request(raid_io); 482 break; 483 484 default: 485 SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type); 486 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 487 break; 488 } 489 } 490 491 /* 492 * brief: 493 * _raid_bdev_io_type_supported checks whether io_type is supported in 494 * all base bdev modules of raid bdev module. If anyone among the base_bdevs 495 * doesn't support, the raid device doesn't supports. 496 * 497 * params: 498 * raid_bdev - pointer to raid bdev context 499 * io_type - io type 500 * returns: 501 * true - io_type is supported 502 * false - io_type is not supported 503 */ 504 inline static bool 505 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type) 506 { 507 struct raid_base_bdev_info *base_info; 508 509 if (io_type == SPDK_BDEV_IO_TYPE_FLUSH || 510 io_type == SPDK_BDEV_IO_TYPE_UNMAP) { 511 if (raid_bdev->module->submit_null_payload_request == NULL) { 512 return false; 513 } 514 } 515 516 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 517 if (base_info->bdev == NULL) { 518 assert(false); 519 continue; 520 } 521 522 if (spdk_bdev_io_type_supported(base_info->bdev, io_type) == false) { 523 return false; 524 } 525 } 526 527 return true; 528 } 529 530 /* 531 * brief: 532 * raid_bdev_io_type_supported is the io_supported function for bdev function 533 * table which returns whether the particular io type is supported or not by 534 * raid bdev module 535 * params: 536 * ctx - pointer to raid bdev context 537 * type - io type 538 * returns: 539 * true - io_type is supported 540 * false - io_type is not supported 541 */ 542 static bool 543 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 544 { 545 switch (io_type) { 546 case SPDK_BDEV_IO_TYPE_READ: 547 case SPDK_BDEV_IO_TYPE_WRITE: 548 return true; 549 550 case SPDK_BDEV_IO_TYPE_FLUSH: 551 case SPDK_BDEV_IO_TYPE_RESET: 552 case SPDK_BDEV_IO_TYPE_UNMAP: 553 return _raid_bdev_io_type_supported(ctx, io_type); 554 555 default: 556 return false; 557 } 558 559 return false; 560 } 561 562 /* 563 * brief: 564 * raid_bdev_get_io_channel is the get_io_channel function table pointer for 565 * raid bdev. This is used to return the io channel for this raid bdev 566 * params: 567 * ctxt - pointer to raid_bdev 568 * returns: 569 * pointer to io channel for raid bdev 570 */ 571 static struct spdk_io_channel * 572 raid_bdev_get_io_channel(void *ctxt) 573 { 574 struct raid_bdev *raid_bdev = ctxt; 575 576 return spdk_get_io_channel(raid_bdev); 577 } 578 579 /* 580 * brief: 581 * raid_bdev_dump_info_json is the function table pointer for raid bdev 582 * params: 583 * ctx - pointer to raid_bdev 584 * w - pointer to json context 585 * returns: 586 * 0 - success 587 * non zero - failure 588 */ 589 static int 590 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 591 { 592 struct raid_bdev *raid_bdev = ctx; 593 struct raid_base_bdev_info *base_info; 594 595 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_dump_config_json\n"); 596 assert(raid_bdev != NULL); 597 598 /* Dump the raid bdev configuration related information */ 599 spdk_json_write_named_object_begin(w, "raid"); 600 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 601 spdk_json_write_named_uint32(w, "state", raid_bdev->state); 602 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 603 spdk_json_write_named_uint32(w, "destruct_called", raid_bdev->destruct_called); 604 spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs); 605 spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered); 606 spdk_json_write_name(w, "base_bdevs_list"); 607 spdk_json_write_array_begin(w); 608 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 609 if (base_info->bdev) { 610 spdk_json_write_string(w, base_info->bdev->name); 611 } else { 612 spdk_json_write_null(w); 613 } 614 } 615 spdk_json_write_array_end(w); 616 spdk_json_write_object_end(w); 617 618 return 0; 619 } 620 621 /* 622 * brief: 623 * raid_bdev_write_config_json is the function table pointer for raid bdev 624 * params: 625 * bdev - pointer to spdk_bdev 626 * w - pointer to json context 627 * returns: 628 * none 629 */ 630 static void 631 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 632 { 633 struct raid_bdev *raid_bdev = bdev->ctxt; 634 struct raid_base_bdev_info *base_info; 635 636 spdk_json_write_object_begin(w); 637 638 spdk_json_write_named_string(w, "method", "bdev_raid_create"); 639 640 spdk_json_write_named_object_begin(w, "params"); 641 spdk_json_write_named_string(w, "name", bdev->name); 642 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 643 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 644 645 spdk_json_write_named_array_begin(w, "base_bdevs"); 646 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 647 if (base_info->bdev) { 648 spdk_json_write_string(w, base_info->bdev->name); 649 } 650 } 651 spdk_json_write_array_end(w); 652 spdk_json_write_object_end(w); 653 654 spdk_json_write_object_end(w); 655 } 656 657 static int 658 raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size) 659 { 660 struct raid_bdev *raid_bdev = ctx; 661 struct spdk_bdev *base_bdev; 662 uint32_t i; 663 int domains_count = 0, rc; 664 665 /* First loop to get the number of memory domains */ 666 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 667 base_bdev = raid_bdev->base_bdev_info[i].bdev; 668 rc = spdk_bdev_get_memory_domains(base_bdev, NULL, 0); 669 if (rc < 0) { 670 return rc; 671 } 672 domains_count += rc; 673 } 674 675 if (!domains || array_size < domains_count) { 676 return domains_count; 677 } 678 679 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 680 base_bdev = raid_bdev->base_bdev_info[i].bdev; 681 rc = spdk_bdev_get_memory_domains(base_bdev, domains, array_size); 682 if (rc < 0) { 683 return rc; 684 } 685 domains += rc; 686 array_size -= rc; 687 } 688 689 return domains_count; 690 } 691 692 /* g_raid_bdev_fn_table is the function table for raid bdev */ 693 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = { 694 .destruct = raid_bdev_destruct, 695 .submit_request = raid_bdev_submit_request, 696 .io_type_supported = raid_bdev_io_type_supported, 697 .get_io_channel = raid_bdev_get_io_channel, 698 .dump_info_json = raid_bdev_dump_info_json, 699 .write_config_json = raid_bdev_write_config_json, 700 .get_memory_domains = raid_bdev_get_memory_domains, 701 }; 702 703 /* 704 * brief: 705 * raid_bdev_config_cleanup function is used to free memory for one raid_bdev in configuration 706 * params: 707 * raid_cfg - pointer to raid_bdev_config structure 708 * returns: 709 * none 710 */ 711 void 712 raid_bdev_config_cleanup(struct raid_bdev_config *raid_cfg) 713 { 714 uint8_t i; 715 716 TAILQ_REMOVE(&g_raid_config.raid_bdev_config_head, raid_cfg, link); 717 g_raid_config.total_raid_bdev--; 718 719 if (raid_cfg->base_bdev) { 720 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 721 free(raid_cfg->base_bdev[i].name); 722 } 723 free(raid_cfg->base_bdev); 724 } 725 free(raid_cfg->name); 726 free(raid_cfg); 727 } 728 729 /* 730 * brief: 731 * raid_bdev_free is the raid bdev function table function pointer. This is 732 * called on bdev free path 733 * params: 734 * none 735 * returns: 736 * none 737 */ 738 static void 739 raid_bdev_free(void) 740 { 741 struct raid_bdev_config *raid_cfg, *tmp; 742 743 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_free\n"); 744 TAILQ_FOREACH_SAFE(raid_cfg, &g_raid_config.raid_bdev_config_head, link, tmp) { 745 raid_bdev_config_cleanup(raid_cfg); 746 } 747 } 748 749 /* brief 750 * raid_bdev_config_find_by_name is a helper function to find raid bdev config 751 * by name as key. 752 * 753 * params: 754 * raid_name - name for raid bdev. 755 */ 756 struct raid_bdev_config * 757 raid_bdev_config_find_by_name(const char *raid_name) 758 { 759 struct raid_bdev_config *raid_cfg; 760 761 TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) { 762 if (!strcmp(raid_cfg->name, raid_name)) { 763 return raid_cfg; 764 } 765 } 766 767 return raid_cfg; 768 } 769 770 /* 771 * brief 772 * raid_bdev_config_add function adds config for newly created raid bdev. 773 * 774 * params: 775 * raid_name - name for raid bdev. 776 * strip_size - strip size in KB 777 * num_base_bdevs - number of base bdevs. 778 * level - raid level. 779 * _raid_cfg - Pointer to newly added configuration 780 */ 781 int 782 raid_bdev_config_add(const char *raid_name, uint32_t strip_size, uint8_t num_base_bdevs, 783 enum raid_level level, struct raid_bdev_config **_raid_cfg) 784 { 785 struct raid_bdev_config *raid_cfg; 786 787 raid_cfg = raid_bdev_config_find_by_name(raid_name); 788 if (raid_cfg != NULL) { 789 SPDK_ERRLOG("Duplicate raid bdev name found in config file %s\n", 790 raid_name); 791 return -EEXIST; 792 } 793 794 if (spdk_u32_is_pow2(strip_size) == false) { 795 SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size); 796 return -EINVAL; 797 } 798 799 if (num_base_bdevs == 0) { 800 SPDK_ERRLOG("Invalid base device count %u\n", num_base_bdevs); 801 return -EINVAL; 802 } 803 804 raid_cfg = calloc(1, sizeof(*raid_cfg)); 805 if (raid_cfg == NULL) { 806 SPDK_ERRLOG("unable to allocate memory\n"); 807 return -ENOMEM; 808 } 809 810 raid_cfg->name = strdup(raid_name); 811 if (!raid_cfg->name) { 812 free(raid_cfg); 813 SPDK_ERRLOG("unable to allocate memory\n"); 814 return -ENOMEM; 815 } 816 raid_cfg->strip_size = strip_size; 817 raid_cfg->num_base_bdevs = num_base_bdevs; 818 raid_cfg->level = level; 819 820 raid_cfg->base_bdev = calloc(num_base_bdevs, sizeof(*raid_cfg->base_bdev)); 821 if (raid_cfg->base_bdev == NULL) { 822 free(raid_cfg->name); 823 free(raid_cfg); 824 SPDK_ERRLOG("unable to allocate memory\n"); 825 return -ENOMEM; 826 } 827 828 TAILQ_INSERT_TAIL(&g_raid_config.raid_bdev_config_head, raid_cfg, link); 829 g_raid_config.total_raid_bdev++; 830 831 *_raid_cfg = raid_cfg; 832 return 0; 833 } 834 835 /* 836 * brief: 837 * raid_bdev_config_add_base_bdev function add base bdev to raid bdev config. 838 * 839 * params: 840 * raid_cfg - pointer to raid bdev configuration 841 * base_bdev_name - name of base bdev 842 * slot - Position to add base bdev 843 */ 844 int 845 raid_bdev_config_add_base_bdev(struct raid_bdev_config *raid_cfg, const char *base_bdev_name, 846 uint8_t slot) 847 { 848 uint8_t i; 849 struct raid_bdev_config *tmp; 850 851 if (slot >= raid_cfg->num_base_bdevs) { 852 return -EINVAL; 853 } 854 855 TAILQ_FOREACH(tmp, &g_raid_config.raid_bdev_config_head, link) { 856 for (i = 0; i < tmp->num_base_bdevs; i++) { 857 if (tmp->base_bdev[i].name != NULL) { 858 if (!strcmp(tmp->base_bdev[i].name, base_bdev_name)) { 859 SPDK_ERRLOG("duplicate base bdev name %s mentioned\n", 860 base_bdev_name); 861 return -EEXIST; 862 } 863 } 864 } 865 } 866 867 raid_cfg->base_bdev[slot].name = strdup(base_bdev_name); 868 if (raid_cfg->base_bdev[slot].name == NULL) { 869 SPDK_ERRLOG("unable to allocate memory\n"); 870 return -ENOMEM; 871 } 872 873 return 0; 874 } 875 876 static struct { 877 const char *name; 878 enum raid_level value; 879 } g_raid_level_names[] = { 880 { "raid0", RAID0 }, 881 { "0", RAID0 }, 882 { "raid5", RAID5 }, 883 { "5", RAID5 }, 884 { "concat", CONCAT }, 885 { } 886 }; 887 888 /* We have to use the typedef in the function declaration to appease astyle. */ 889 typedef enum raid_level raid_level_t; 890 891 raid_level_t 892 raid_bdev_parse_raid_level(const char *str) 893 { 894 unsigned int i; 895 896 assert(str != NULL); 897 898 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 899 if (strcasecmp(g_raid_level_names[i].name, str) == 0) { 900 return g_raid_level_names[i].value; 901 } 902 } 903 904 return INVALID_RAID_LEVEL; 905 } 906 907 const char * 908 raid_bdev_level_to_str(enum raid_level level) 909 { 910 unsigned int i; 911 912 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 913 if (g_raid_level_names[i].value == level) { 914 return g_raid_level_names[i].name; 915 } 916 } 917 918 return ""; 919 } 920 921 /* 922 * brief: 923 * raid_bdev_fini_start is called when bdev layer is starting the 924 * shutdown process 925 * params: 926 * none 927 * returns: 928 * none 929 */ 930 static void 931 raid_bdev_fini_start(void) 932 { 933 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_fini_start\n"); 934 g_shutdown_started = true; 935 } 936 937 /* 938 * brief: 939 * raid_bdev_exit is called on raid bdev module exit time by bdev layer 940 * params: 941 * none 942 * returns: 943 * none 944 */ 945 static void 946 raid_bdev_exit(void) 947 { 948 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_exit\n"); 949 raid_bdev_free(); 950 } 951 952 /* 953 * brief: 954 * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid 955 * module 956 * params: 957 * none 958 * returns: 959 * size of spdk_bdev_io context for raid 960 */ 961 static int 962 raid_bdev_get_ctx_size(void) 963 { 964 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_get_ctx_size\n"); 965 return sizeof(struct raid_bdev_io); 966 } 967 968 /* 969 * brief: 970 * raid_bdev_can_claim_bdev is the function to check if this base_bdev can be 971 * claimed by raid bdev or not. 972 * params: 973 * bdev_name - represents base bdev name 974 * _raid_cfg - pointer to raid bdev config parsed from config file 975 * base_bdev_slot - if bdev can be claimed, it represents the base_bdev correct 976 * slot. This field is only valid if return value of this function is true 977 * returns: 978 * true - if bdev can be claimed 979 * false - if bdev can't be claimed 980 */ 981 static bool 982 raid_bdev_can_claim_bdev(const char *bdev_name, struct raid_bdev_config **_raid_cfg, 983 uint8_t *base_bdev_slot) 984 { 985 struct raid_bdev_config *raid_cfg; 986 uint8_t i; 987 988 TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) { 989 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 990 /* 991 * Check if the base bdev name is part of raid bdev configuration. 992 * If match is found then return true and the slot information where 993 * this base bdev should be inserted in raid bdev 994 */ 995 if (!strcmp(bdev_name, raid_cfg->base_bdev[i].name)) { 996 *_raid_cfg = raid_cfg; 997 *base_bdev_slot = i; 998 return true; 999 } 1000 } 1001 } 1002 1003 return false; 1004 } 1005 1006 1007 static struct spdk_bdev_module g_raid_if = { 1008 .name = "raid", 1009 .module_init = raid_bdev_init, 1010 .fini_start = raid_bdev_fini_start, 1011 .module_fini = raid_bdev_exit, 1012 .get_ctx_size = raid_bdev_get_ctx_size, 1013 .examine_config = raid_bdev_examine, 1014 .async_init = false, 1015 .async_fini = false, 1016 }; 1017 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if) 1018 1019 /* 1020 * brief: 1021 * raid_bdev_init is the initialization function for raid bdev module 1022 * params: 1023 * none 1024 * returns: 1025 * 0 - success 1026 * non zero - failure 1027 */ 1028 static int 1029 raid_bdev_init(void) 1030 { 1031 return 0; 1032 } 1033 1034 /* 1035 * brief: 1036 * raid_bdev_create allocates raid bdev based on passed configuration 1037 * params: 1038 * raid_cfg - configuration of raid bdev 1039 * returns: 1040 * 0 - success 1041 * non zero - failure 1042 */ 1043 int 1044 raid_bdev_create(struct raid_bdev_config *raid_cfg) 1045 { 1046 struct raid_bdev *raid_bdev; 1047 struct spdk_bdev *raid_bdev_gen; 1048 struct raid_bdev_module *module; 1049 1050 module = raid_bdev_module_find(raid_cfg->level); 1051 if (module == NULL) { 1052 SPDK_ERRLOG("Unsupported raid level '%d'\n", raid_cfg->level); 1053 return -EINVAL; 1054 } 1055 1056 assert(module->base_bdevs_min != 0); 1057 if (raid_cfg->num_base_bdevs < module->base_bdevs_min) { 1058 SPDK_ERRLOG("At least %u base devices required for %s\n", 1059 module->base_bdevs_min, 1060 raid_bdev_level_to_str(raid_cfg->level)); 1061 return -EINVAL; 1062 } 1063 1064 raid_bdev = calloc(1, sizeof(*raid_bdev)); 1065 if (!raid_bdev) { 1066 SPDK_ERRLOG("Unable to allocate memory for raid bdev\n"); 1067 return -ENOMEM; 1068 } 1069 1070 raid_bdev->module = module; 1071 raid_bdev->num_base_bdevs = raid_cfg->num_base_bdevs; 1072 raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, 1073 sizeof(struct raid_base_bdev_info)); 1074 if (!raid_bdev->base_bdev_info) { 1075 SPDK_ERRLOG("Unable able to allocate base bdev info\n"); 1076 free(raid_bdev); 1077 return -ENOMEM; 1078 } 1079 1080 /* strip_size_kb is from the rpc param. strip_size is in blocks and used 1081 * internally and set later. 1082 */ 1083 raid_bdev->strip_size = 0; 1084 raid_bdev->strip_size_kb = raid_cfg->strip_size; 1085 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1086 raid_bdev->config = raid_cfg; 1087 raid_bdev->level = raid_cfg->level; 1088 1089 raid_bdev_gen = &raid_bdev->bdev; 1090 1091 raid_bdev_gen->name = strdup(raid_cfg->name); 1092 if (!raid_bdev_gen->name) { 1093 SPDK_ERRLOG("Unable to allocate name for raid\n"); 1094 free(raid_bdev->base_bdev_info); 1095 free(raid_bdev); 1096 return -ENOMEM; 1097 } 1098 1099 raid_bdev_gen->product_name = "Raid Volume"; 1100 raid_bdev_gen->ctxt = raid_bdev; 1101 raid_bdev_gen->fn_table = &g_raid_bdev_fn_table; 1102 raid_bdev_gen->module = &g_raid_if; 1103 raid_bdev_gen->write_cache = 0; 1104 1105 TAILQ_INSERT_TAIL(&g_raid_bdev_configuring_list, raid_bdev, state_link); 1106 TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link); 1107 1108 raid_cfg->raid_bdev = raid_bdev; 1109 1110 return 0; 1111 } 1112 1113 /* 1114 * brief 1115 * raid_bdev_alloc_base_bdev_resource allocates resource of base bdev. 1116 * params: 1117 * raid_bdev - pointer to raid bdev 1118 * bdev_name - base bdev name 1119 * base_bdev_slot - position to add base bdev 1120 * returns: 1121 * 0 - success 1122 * non zero - failure 1123 */ 1124 static int 1125 raid_bdev_alloc_base_bdev_resource(struct raid_bdev *raid_bdev, const char *bdev_name, 1126 uint8_t base_bdev_slot) 1127 { 1128 struct spdk_bdev_desc *desc; 1129 struct spdk_bdev *bdev; 1130 int rc; 1131 1132 rc = spdk_bdev_open_ext(bdev_name, true, raid_bdev_event_base_bdev, NULL, &desc); 1133 if (rc != 0) { 1134 if (rc != -ENODEV) { 1135 SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", bdev_name); 1136 } 1137 return rc; 1138 } 1139 1140 bdev = spdk_bdev_desc_get_bdev(desc); 1141 1142 rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if); 1143 if (rc != 0) { 1144 SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); 1145 spdk_bdev_close(desc); 1146 return rc; 1147 } 1148 1149 SPDK_DEBUGLOG(bdev_raid, "bdev %s is claimed\n", bdev_name); 1150 1151 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 1152 assert(base_bdev_slot < raid_bdev->num_base_bdevs); 1153 1154 raid_bdev->base_bdev_info[base_bdev_slot].thread = spdk_get_thread(); 1155 raid_bdev->base_bdev_info[base_bdev_slot].bdev = bdev; 1156 raid_bdev->base_bdev_info[base_bdev_slot].desc = desc; 1157 raid_bdev->num_base_bdevs_discovered++; 1158 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1159 1160 return 0; 1161 } 1162 1163 /* 1164 * brief: 1165 * If raid bdev config is complete, then only register the raid bdev to 1166 * bdev layer and remove this raid bdev from configuring list and 1167 * insert the raid bdev to configured list 1168 * params: 1169 * raid_bdev - pointer to raid bdev 1170 * returns: 1171 * 0 - success 1172 * non zero - failure 1173 */ 1174 static int 1175 raid_bdev_configure(struct raid_bdev *raid_bdev) 1176 { 1177 uint32_t blocklen = 0; 1178 struct spdk_bdev *raid_bdev_gen; 1179 struct raid_base_bdev_info *base_info; 1180 int rc = 0; 1181 1182 assert(raid_bdev->state == RAID_BDEV_STATE_CONFIGURING); 1183 assert(raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs); 1184 1185 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1186 /* Check blocklen for all base bdevs that it should be same */ 1187 if (blocklen == 0) { 1188 blocklen = base_info->bdev->blocklen; 1189 } else if (blocklen != base_info->bdev->blocklen) { 1190 /* 1191 * Assumption is that all the base bdevs for any raid bdev should 1192 * have same blocklen 1193 */ 1194 SPDK_ERRLOG("Blocklen of various bdevs not matching\n"); 1195 return -EINVAL; 1196 } 1197 } 1198 assert(blocklen > 0); 1199 1200 /* The strip_size_kb is read in from user in KB. Convert to blocks here for 1201 * internal use. 1202 */ 1203 raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / blocklen; 1204 raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size); 1205 raid_bdev->blocklen_shift = spdk_u32log2(blocklen); 1206 1207 raid_bdev_gen = &raid_bdev->bdev; 1208 raid_bdev_gen->blocklen = blocklen; 1209 1210 rc = raid_bdev->module->start(raid_bdev); 1211 if (rc != 0) { 1212 SPDK_ERRLOG("raid module startup callback failed\n"); 1213 return rc; 1214 } 1215 raid_bdev->state = RAID_BDEV_STATE_ONLINE; 1216 SPDK_DEBUGLOG(bdev_raid, "io device register %p\n", raid_bdev); 1217 SPDK_DEBUGLOG(bdev_raid, "blockcnt %" PRIu64 ", blocklen %u\n", 1218 raid_bdev_gen->blockcnt, raid_bdev_gen->blocklen); 1219 spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb, 1220 sizeof(struct raid_bdev_io_channel), 1221 raid_bdev->bdev.name); 1222 rc = spdk_bdev_register(raid_bdev_gen); 1223 if (rc != 0) { 1224 SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n"); 1225 if (raid_bdev->module->stop != NULL) { 1226 raid_bdev->module->stop(raid_bdev); 1227 } 1228 spdk_io_device_unregister(raid_bdev, NULL); 1229 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1230 return rc; 1231 } 1232 SPDK_DEBUGLOG(bdev_raid, "raid bdev generic %p\n", raid_bdev_gen); 1233 TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link); 1234 TAILQ_INSERT_TAIL(&g_raid_bdev_configured_list, raid_bdev, state_link); 1235 SPDK_DEBUGLOG(bdev_raid, "raid bdev is created with name %s, raid_bdev %p\n", 1236 raid_bdev_gen->name, raid_bdev); 1237 1238 return 0; 1239 } 1240 1241 /* 1242 * brief: 1243 * If raid bdev is online and registered, change the bdev state to 1244 * configuring and unregister this raid device. Queue this raid device 1245 * in configuring list 1246 * params: 1247 * raid_bdev - pointer to raid bdev 1248 * cb_fn - callback function 1249 * cb_arg - argument to callback function 1250 * returns: 1251 * none 1252 */ 1253 static void 1254 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, 1255 void *cb_arg) 1256 { 1257 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1258 if (cb_fn) { 1259 cb_fn(cb_arg, 0); 1260 } 1261 return; 1262 } 1263 1264 assert(raid_bdev->num_base_bdevs == raid_bdev->num_base_bdevs_discovered); 1265 TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link); 1266 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 1267 assert(raid_bdev->num_base_bdevs_discovered); 1268 TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link); 1269 SPDK_DEBUGLOG(bdev_raid, "raid bdev state changing from online to offline\n"); 1270 1271 spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg); 1272 } 1273 1274 /* 1275 * brief: 1276 * raid_bdev_find_by_base_bdev function finds the raid bdev which has 1277 * claimed the base bdev. 1278 * params: 1279 * base_bdev - pointer to base bdev pointer 1280 * _raid_bdev - Reference to pointer to raid bdev 1281 * _base_info - Reference to the raid base bdev info. 1282 * returns: 1283 * true - if the raid bdev is found. 1284 * false - if the raid bdev is not found. 1285 */ 1286 static bool 1287 raid_bdev_find_by_base_bdev(struct spdk_bdev *base_bdev, struct raid_bdev **_raid_bdev, 1288 struct raid_base_bdev_info **_base_info) 1289 { 1290 struct raid_bdev *raid_bdev; 1291 struct raid_base_bdev_info *base_info; 1292 1293 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1294 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1295 if (base_info->bdev == base_bdev) { 1296 *_raid_bdev = raid_bdev; 1297 *_base_info = base_info; 1298 return true; 1299 } 1300 } 1301 } 1302 1303 return false; 1304 } 1305 1306 /* 1307 * brief: 1308 * raid_bdev_remove_base_bdev function is called by below layers when base_bdev 1309 * is removed. This function checks if this base bdev is part of any raid bdev 1310 * or not. If yes, it takes necessary action on that particular raid bdev. 1311 * params: 1312 * base_bdev - pointer to base bdev pointer which got removed 1313 * returns: 1314 * none 1315 */ 1316 static void 1317 raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev) 1318 { 1319 struct raid_bdev *raid_bdev = NULL; 1320 struct raid_base_bdev_info *base_info; 1321 1322 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_remove_base_bdev\n"); 1323 1324 /* Find the raid_bdev which has claimed this base_bdev */ 1325 if (!raid_bdev_find_by_base_bdev(base_bdev, &raid_bdev, &base_info)) { 1326 SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); 1327 return; 1328 } 1329 1330 assert(base_info->desc); 1331 base_info->remove_scheduled = true; 1332 1333 if (raid_bdev->destruct_called == true || 1334 raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 1335 /* 1336 * As raid bdev is not registered yet or already unregistered, 1337 * so cleanup should be done here itself. 1338 */ 1339 raid_bdev_free_base_bdev_resource(raid_bdev, base_info); 1340 if (raid_bdev->num_base_bdevs_discovered == 0) { 1341 /* There is no base bdev for this raid, so free the raid device. */ 1342 raid_bdev_cleanup(raid_bdev); 1343 return; 1344 } 1345 } 1346 1347 raid_bdev_deconfigure(raid_bdev, NULL, NULL); 1348 } 1349 1350 /* 1351 * brief: 1352 * raid_bdev_event_base_bdev function is called by below layers when base_bdev 1353 * triggers asynchronous event. 1354 * params: 1355 * type - event details. 1356 * bdev - bdev that triggered event. 1357 * event_ctx - context for event. 1358 * returns: 1359 * none 1360 */ 1361 static void 1362 raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 1363 void *event_ctx) 1364 { 1365 switch (type) { 1366 case SPDK_BDEV_EVENT_REMOVE: 1367 raid_bdev_remove_base_bdev(bdev); 1368 break; 1369 default: 1370 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 1371 break; 1372 } 1373 } 1374 1375 /* 1376 * brief: 1377 * Remove base bdevs from the raid bdev one by one. Skip any base bdev which 1378 * doesn't exist. 1379 * params: 1380 * raid_cfg - pointer to raid bdev config. 1381 * cb_fn - callback function 1382 * cb_ctx - argument to callback function 1383 */ 1384 void 1385 raid_bdev_remove_base_devices(struct raid_bdev_config *raid_cfg, 1386 raid_bdev_destruct_cb cb_fn, void *cb_arg) 1387 { 1388 struct raid_bdev *raid_bdev; 1389 struct raid_base_bdev_info *base_info; 1390 1391 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_remove_base_devices\n"); 1392 1393 raid_bdev = raid_cfg->raid_bdev; 1394 if (raid_bdev == NULL) { 1395 SPDK_DEBUGLOG(bdev_raid, "raid bdev %s doesn't exist now\n", raid_cfg->name); 1396 if (cb_fn) { 1397 cb_fn(cb_arg, 0); 1398 } 1399 return; 1400 } 1401 1402 if (raid_bdev->destroy_started) { 1403 SPDK_DEBUGLOG(bdev_raid, "destroying raid bdev %s is already started\n", 1404 raid_cfg->name); 1405 if (cb_fn) { 1406 cb_fn(cb_arg, -EALREADY); 1407 } 1408 return; 1409 } 1410 1411 raid_bdev->destroy_started = true; 1412 1413 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1414 if (base_info->bdev == NULL) { 1415 continue; 1416 } 1417 1418 assert(base_info->desc); 1419 base_info->remove_scheduled = true; 1420 1421 if (raid_bdev->destruct_called == true || 1422 raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 1423 /* 1424 * As raid bdev is not registered yet or already unregistered, 1425 * so cleanup should be done here itself. 1426 */ 1427 raid_bdev_free_base_bdev_resource(raid_bdev, base_info); 1428 } 1429 } 1430 1431 if (raid_bdev->num_base_bdevs_discovered == 0) { 1432 /* There is no base bdev for this raid, so free the raid device. */ 1433 raid_bdev_cleanup(raid_bdev); 1434 if (cb_fn) { 1435 cb_fn(cb_arg, 0); 1436 } 1437 } else { 1438 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg); 1439 } 1440 } 1441 1442 /* 1443 * brief: 1444 * raid_bdev_add_base_device function is the actual function which either adds 1445 * the nvme base device to existing raid bdev or create a new raid bdev. It also claims 1446 * the base device and keep the open descriptor. 1447 * params: 1448 * raid_cfg - pointer to raid bdev config 1449 * bdev - pointer to base bdev 1450 * base_bdev_slot - position to add base bdev 1451 * returns: 1452 * 0 - success 1453 * non zero - failure 1454 */ 1455 static int 1456 raid_bdev_add_base_device(struct raid_bdev_config *raid_cfg, const char *bdev_name, 1457 uint8_t base_bdev_slot) 1458 { 1459 struct raid_bdev *raid_bdev; 1460 int rc; 1461 1462 raid_bdev = raid_cfg->raid_bdev; 1463 if (!raid_bdev) { 1464 SPDK_ERRLOG("Raid bdev '%s' is not created yet\n", raid_cfg->name); 1465 return -ENODEV; 1466 } 1467 1468 rc = raid_bdev_alloc_base_bdev_resource(raid_bdev, bdev_name, base_bdev_slot); 1469 if (rc != 0) { 1470 if (rc != -ENODEV) { 1471 SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", bdev_name); 1472 } 1473 return rc; 1474 } 1475 1476 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1477 1478 if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) { 1479 rc = raid_bdev_configure(raid_bdev); 1480 if (rc != 0) { 1481 SPDK_ERRLOG("Failed to configure raid bdev\n"); 1482 return rc; 1483 } 1484 } 1485 1486 return 0; 1487 } 1488 1489 /* 1490 * brief: 1491 * Add base bdevs to the raid bdev one by one. Skip any base bdev which doesn't 1492 * exist or fails to add. If all base bdevs are successfully added, the raid bdev 1493 * moves to the configured state and becomes available. Otherwise, the raid bdev 1494 * stays at the configuring state with added base bdevs. 1495 * params: 1496 * raid_cfg - pointer to raid bdev config 1497 * returns: 1498 * 0 - The raid bdev moves to the configured state or stays at the configuring 1499 * state with added base bdevs due to any nonexistent base bdev. 1500 * non zero - Failed to add any base bdev and stays at the configuring state with 1501 * added base bdevs. 1502 */ 1503 int 1504 raid_bdev_add_base_devices(struct raid_bdev_config *raid_cfg) 1505 { 1506 uint8_t i; 1507 int rc = 0, _rc; 1508 1509 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 1510 _rc = raid_bdev_add_base_device(raid_cfg, raid_cfg->base_bdev[i].name, i); 1511 if (_rc == -ENODEV) { 1512 SPDK_DEBUGLOG(bdev_raid, "base bdev %s doesn't exist now\n", 1513 raid_cfg->base_bdev[i].name); 1514 } else if (_rc != 0) { 1515 SPDK_ERRLOG("Failed to add base bdev %s to RAID bdev %s: %s\n", 1516 raid_cfg->base_bdev[i].name, raid_cfg->name, 1517 spdk_strerror(-_rc)); 1518 if (rc == 0) { 1519 rc = _rc; 1520 } 1521 } 1522 } 1523 1524 return rc; 1525 } 1526 1527 /* 1528 * brief: 1529 * raid_bdev_examine function is the examine function call by the below layers 1530 * like bdev_nvme layer. This function will check if this base bdev can be 1531 * claimed by this raid bdev or not. 1532 * params: 1533 * bdev - pointer to base bdev 1534 * returns: 1535 * none 1536 */ 1537 static void 1538 raid_bdev_examine(struct spdk_bdev *bdev) 1539 { 1540 struct raid_bdev_config *raid_cfg; 1541 uint8_t base_bdev_slot; 1542 1543 if (raid_bdev_can_claim_bdev(bdev->name, &raid_cfg, &base_bdev_slot)) { 1544 raid_bdev_add_base_device(raid_cfg, bdev->name, base_bdev_slot); 1545 } else { 1546 SPDK_DEBUGLOG(bdev_raid, "bdev %s can't be claimed\n", 1547 bdev->name); 1548 } 1549 1550 spdk_bdev_module_examine_done(&g_raid_if); 1551 } 1552 1553 /* Log component for bdev raid bdev module */ 1554 SPDK_LOG_REGISTER_COMPONENT(bdev_raid) 1555