1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. 3 * All rights reserved. 4 * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "bdev_raid.h" 8 #include "spdk/env.h" 9 #include "spdk/thread.h" 10 #include "spdk/log.h" 11 #include "spdk/string.h" 12 #include "spdk/util.h" 13 #include "spdk/json.h" 14 15 static bool g_shutdown_started = false; 16 17 /* List of all raid bdevs */ 18 struct raid_all_tailq g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list); 19 20 static TAILQ_HEAD(, raid_bdev_module) g_raid_modules = TAILQ_HEAD_INITIALIZER(g_raid_modules); 21 22 static struct raid_bdev_module * 23 raid_bdev_module_find(enum raid_level level) 24 { 25 struct raid_bdev_module *raid_module; 26 27 TAILQ_FOREACH(raid_module, &g_raid_modules, link) { 28 if (raid_module->level == level) { 29 return raid_module; 30 } 31 } 32 33 return NULL; 34 } 35 36 void 37 raid_bdev_module_list_add(struct raid_bdev_module *raid_module) 38 { 39 if (raid_bdev_module_find(raid_module->level) != NULL) { 40 SPDK_ERRLOG("module for raid level '%s' already registered.\n", 41 raid_bdev_level_to_str(raid_module->level)); 42 assert(false); 43 } else { 44 TAILQ_INSERT_TAIL(&g_raid_modules, raid_module, link); 45 } 46 } 47 48 /* Function declarations */ 49 static void raid_bdev_examine(struct spdk_bdev *bdev); 50 static int raid_bdev_init(void); 51 static void raid_bdev_deconfigure(struct raid_bdev *raid_bdev, 52 raid_bdev_destruct_cb cb_fn, void *cb_arg); 53 54 /* 55 * brief: 56 * raid_bdev_create_cb function is a cb function for raid bdev which creates the 57 * hierarchy from raid bdev to base bdev io channels. It will be called per core 58 * params: 59 * io_device - pointer to raid bdev io device represented by raid_bdev 60 * ctx_buf - pointer to context buffer for raid bdev io channel 61 * returns: 62 * 0 - success 63 * non zero - failure 64 */ 65 static int 66 raid_bdev_create_cb(void *io_device, void *ctx_buf) 67 { 68 struct raid_bdev *raid_bdev = io_device; 69 struct raid_bdev_io_channel *raid_ch = ctx_buf; 70 uint8_t i; 71 int ret = 0; 72 73 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_create_cb, %p\n", raid_ch); 74 75 assert(raid_bdev != NULL); 76 assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); 77 78 raid_ch->num_channels = raid_bdev->num_base_bdevs; 79 80 raid_ch->base_channel = calloc(raid_ch->num_channels, 81 sizeof(struct spdk_io_channel *)); 82 if (!raid_ch->base_channel) { 83 SPDK_ERRLOG("Unable to allocate base bdevs io channel\n"); 84 return -ENOMEM; 85 } 86 87 spdk_spin_lock(&raid_bdev->base_bdev_lock); 88 for (i = 0; i < raid_ch->num_channels; i++) { 89 /* 90 * Get the spdk_io_channel for all the base bdevs. This is used during 91 * split logic to send the respective child bdev ios to respective base 92 * bdev io channel. 93 */ 94 if (raid_bdev->base_bdev_info[i].desc == NULL) { 95 continue; 96 } 97 raid_ch->base_channel[i] = spdk_bdev_get_io_channel( 98 raid_bdev->base_bdev_info[i].desc); 99 if (!raid_ch->base_channel[i]) { 100 SPDK_ERRLOG("Unable to create io channel for base bdev\n"); 101 ret = -ENOMEM; 102 break; 103 } 104 } 105 spdk_spin_unlock(&raid_bdev->base_bdev_lock); 106 107 if (!ret && raid_bdev->module->get_io_channel) { 108 raid_ch->module_channel = raid_bdev->module->get_io_channel(raid_bdev); 109 if (!raid_ch->module_channel) { 110 SPDK_ERRLOG("Unable to create io channel for raid module\n"); 111 ret = -ENOMEM; 112 } 113 } 114 115 if (ret) { 116 for (i = 0; i < raid_ch->num_channels; i++) { 117 if (raid_ch->base_channel[i] != NULL) { 118 spdk_put_io_channel(raid_ch->base_channel[i]); 119 } 120 } 121 free(raid_ch->base_channel); 122 raid_ch->base_channel = NULL; 123 } 124 return ret; 125 } 126 127 /* 128 * brief: 129 * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the 130 * hierarchy from raid bdev to base bdev io channels. It will be called per core 131 * params: 132 * io_device - pointer to raid bdev io device represented by raid_bdev 133 * ctx_buf - pointer to context buffer for raid bdev io channel 134 * returns: 135 * none 136 */ 137 static void 138 raid_bdev_destroy_cb(void *io_device, void *ctx_buf) 139 { 140 struct raid_bdev_io_channel *raid_ch = ctx_buf; 141 uint8_t i; 142 143 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destroy_cb\n"); 144 145 assert(raid_ch != NULL); 146 assert(raid_ch->base_channel); 147 148 if (raid_ch->module_channel) { 149 spdk_put_io_channel(raid_ch->module_channel); 150 } 151 152 for (i = 0; i < raid_ch->num_channels; i++) { 153 /* Free base bdev channels */ 154 if (raid_ch->base_channel[i] != NULL) { 155 spdk_put_io_channel(raid_ch->base_channel[i]); 156 } 157 } 158 free(raid_ch->base_channel); 159 raid_ch->base_channel = NULL; 160 } 161 162 /* 163 * brief: 164 * raid_bdev_cleanup is used to cleanup raid_bdev related data 165 * structures. 166 * params: 167 * raid_bdev - pointer to raid_bdev 168 * returns: 169 * none 170 */ 171 static void 172 raid_bdev_cleanup(struct raid_bdev *raid_bdev) 173 { 174 struct raid_base_bdev_info *base_info; 175 176 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_cleanup, %p name %s, state %s\n", 177 raid_bdev, raid_bdev->bdev.name, raid_bdev_state_to_str(raid_bdev->state)); 178 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 179 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 180 181 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 182 assert(base_info->desc == NULL); 183 free(base_info->name); 184 } 185 186 TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link); 187 free(raid_bdev->base_bdev_info); 188 } 189 190 static void 191 raid_bdev_free(struct raid_bdev *raid_bdev) 192 { 193 spdk_spin_destroy(&raid_bdev->base_bdev_lock); 194 free(raid_bdev->bdev.name); 195 free(raid_bdev); 196 } 197 198 static void 199 raid_bdev_cleanup_and_free(struct raid_bdev *raid_bdev) 200 { 201 raid_bdev_cleanup(raid_bdev); 202 raid_bdev_free(raid_bdev); 203 } 204 205 /* 206 * brief: 207 * free resource of base bdev for raid bdev 208 * params: 209 * base_info - raid base bdev info 210 * returns: 211 * 0 - success 212 * non zero - failure 213 */ 214 static void 215 raid_bdev_free_base_bdev_resource(struct raid_base_bdev_info *base_info) 216 { 217 struct raid_bdev *raid_bdev = base_info->raid_bdev; 218 219 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 220 221 free(base_info->name); 222 base_info->name = NULL; 223 224 if (base_info->desc == NULL) { 225 return; 226 } 227 228 spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(base_info->desc)); 229 spdk_bdev_close(base_info->desc); 230 base_info->desc = NULL; 231 232 assert(raid_bdev->num_base_bdevs_discovered); 233 raid_bdev->num_base_bdevs_discovered--; 234 } 235 236 static void 237 raid_bdev_io_device_unregister_cb(void *io_device) 238 { 239 struct raid_bdev *raid_bdev = io_device; 240 241 if (raid_bdev->num_base_bdevs_discovered == 0) { 242 /* Free raid_bdev when there are no base bdevs left */ 243 SPDK_DEBUGLOG(bdev_raid, "raid bdev base bdevs is 0, going to free all in destruct\n"); 244 raid_bdev_cleanup(raid_bdev); 245 spdk_bdev_destruct_done(&raid_bdev->bdev, 0); 246 raid_bdev_free(raid_bdev); 247 } else { 248 spdk_bdev_destruct_done(&raid_bdev->bdev, 0); 249 } 250 } 251 252 void 253 raid_bdev_module_stop_done(struct raid_bdev *raid_bdev) 254 { 255 if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) { 256 spdk_io_device_unregister(raid_bdev, raid_bdev_io_device_unregister_cb); 257 } 258 } 259 260 static void 261 _raid_bdev_destruct(void *ctxt) 262 { 263 struct raid_bdev *raid_bdev = ctxt; 264 struct raid_base_bdev_info *base_info; 265 266 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destruct\n"); 267 268 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 269 /* 270 * Close all base bdev descriptors for which call has come from below 271 * layers. Also close the descriptors if we have started shutdown. 272 */ 273 if (g_shutdown_started || base_info->remove_scheduled == true) { 274 raid_bdev_free_base_bdev_resource(base_info); 275 } 276 } 277 278 if (g_shutdown_started) { 279 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 280 } 281 282 if (raid_bdev->module->stop != NULL) { 283 if (raid_bdev->module->stop(raid_bdev) == false) { 284 return; 285 } 286 } 287 288 raid_bdev_module_stop_done(raid_bdev); 289 } 290 291 static int 292 raid_bdev_destruct(void *ctx) 293 { 294 spdk_thread_exec_msg(spdk_thread_get_app_thread(), _raid_bdev_destruct, ctx); 295 296 return 1; 297 } 298 299 void 300 raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status) 301 { 302 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 303 304 spdk_bdev_io_complete(bdev_io, status); 305 } 306 307 /* 308 * brief: 309 * raid_bdev_io_complete_part - signal the completion of a part of the expected 310 * base bdev IOs and complete the raid_io if this is the final expected IO. 311 * The caller should first set raid_io->base_bdev_io_remaining. This function 312 * will decrement this counter by the value of the 'completed' parameter and 313 * complete the raid_io if the counter reaches 0. The caller is free to 314 * interpret the 'base_bdev_io_remaining' and 'completed' values as needed, 315 * it can represent e.g. blocks or IOs. 316 * params: 317 * raid_io - pointer to raid_bdev_io 318 * completed - the part of the raid_io that has been completed 319 * status - status of the base IO 320 * returns: 321 * true - if the raid_io is completed 322 * false - otherwise 323 */ 324 bool 325 raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed, 326 enum spdk_bdev_io_status status) 327 { 328 assert(raid_io->base_bdev_io_remaining >= completed); 329 raid_io->base_bdev_io_remaining -= completed; 330 331 if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { 332 raid_io->base_bdev_io_status = status; 333 } 334 335 if (raid_io->base_bdev_io_remaining == 0) { 336 raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); 337 return true; 338 } else { 339 return false; 340 } 341 } 342 343 /* 344 * brief: 345 * raid_bdev_queue_io_wait function processes the IO which failed to submit. 346 * It will try to queue the IOs after storing the context to bdev wait queue logic. 347 * params: 348 * raid_io - pointer to raid_bdev_io 349 * bdev - the block device that the IO is submitted to 350 * ch - io channel 351 * cb_fn - callback when the spdk_bdev_io for bdev becomes available 352 * returns: 353 * none 354 */ 355 void 356 raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, 357 struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn) 358 { 359 raid_io->waitq_entry.bdev = bdev; 360 raid_io->waitq_entry.cb_fn = cb_fn; 361 raid_io->waitq_entry.cb_arg = raid_io; 362 spdk_bdev_queue_io_wait(bdev, ch, &raid_io->waitq_entry); 363 } 364 365 static void 366 raid_base_bdev_reset_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 367 { 368 struct raid_bdev_io *raid_io = cb_arg; 369 370 spdk_bdev_free_io(bdev_io); 371 372 raid_bdev_io_complete_part(raid_io, 1, success ? 373 SPDK_BDEV_IO_STATUS_SUCCESS : 374 SPDK_BDEV_IO_STATUS_FAILED); 375 } 376 377 static void raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io); 378 379 static void 380 _raid_bdev_submit_reset_request(void *_raid_io) 381 { 382 struct raid_bdev_io *raid_io = _raid_io; 383 384 raid_bdev_submit_reset_request(raid_io); 385 } 386 387 /* 388 * brief: 389 * raid_bdev_submit_reset_request function submits reset requests 390 * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in 391 * which case it will queue it for later submission 392 * params: 393 * raid_io 394 * returns: 395 * none 396 */ 397 static void 398 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io) 399 { 400 struct raid_bdev *raid_bdev; 401 int ret; 402 uint8_t i; 403 struct raid_base_bdev_info *base_info; 404 struct spdk_io_channel *base_ch; 405 406 raid_bdev = raid_io->raid_bdev; 407 408 if (raid_io->base_bdev_io_remaining == 0) { 409 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; 410 } 411 412 for (i = raid_io->base_bdev_io_submitted; i < raid_bdev->num_base_bdevs; i++) { 413 base_info = &raid_bdev->base_bdev_info[i]; 414 base_ch = raid_io->raid_ch->base_channel[i]; 415 if (base_ch == NULL) { 416 raid_io->base_bdev_io_submitted++; 417 raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS); 418 continue; 419 } 420 ret = spdk_bdev_reset(base_info->desc, base_ch, 421 raid_base_bdev_reset_complete, raid_io); 422 if (ret == 0) { 423 raid_io->base_bdev_io_submitted++; 424 } else if (ret == -ENOMEM) { 425 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), 426 base_ch, _raid_bdev_submit_reset_request); 427 return; 428 } else { 429 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 430 assert(false); 431 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 432 return; 433 } 434 } 435 } 436 437 /* 438 * brief: 439 * Callback function to spdk_bdev_io_get_buf. 440 * params: 441 * ch - pointer to raid bdev io channel 442 * bdev_io - pointer to parent bdev_io on raid bdev device 443 * success - True if buffer is allocated or false otherwise. 444 * returns: 445 * none 446 */ 447 static void 448 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 449 bool success) 450 { 451 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 452 453 if (!success) { 454 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 455 return; 456 } 457 458 raid_io->raid_bdev->module->submit_rw_request(raid_io); 459 } 460 461 /* 462 * brief: 463 * raid_bdev_submit_request function is the submit_request function pointer of 464 * raid bdev function table. This is used to submit the io on raid_bdev to below 465 * layers. 466 * params: 467 * ch - pointer to raid bdev io channel 468 * bdev_io - pointer to parent bdev_io on raid bdev device 469 * returns: 470 * none 471 */ 472 static void 473 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 474 { 475 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 476 477 raid_io->raid_bdev = bdev_io->bdev->ctxt; 478 raid_io->raid_ch = spdk_io_channel_get_ctx(ch); 479 raid_io->base_bdev_io_remaining = 0; 480 raid_io->base_bdev_io_submitted = 0; 481 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 482 483 switch (bdev_io->type) { 484 case SPDK_BDEV_IO_TYPE_READ: 485 spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb, 486 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 487 break; 488 case SPDK_BDEV_IO_TYPE_WRITE: 489 raid_io->raid_bdev->module->submit_rw_request(raid_io); 490 break; 491 492 case SPDK_BDEV_IO_TYPE_RESET: 493 raid_bdev_submit_reset_request(raid_io); 494 break; 495 496 case SPDK_BDEV_IO_TYPE_FLUSH: 497 case SPDK_BDEV_IO_TYPE_UNMAP: 498 raid_io->raid_bdev->module->submit_null_payload_request(raid_io); 499 break; 500 501 default: 502 SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type); 503 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 504 break; 505 } 506 } 507 508 /* 509 * brief: 510 * _raid_bdev_io_type_supported checks whether io_type is supported in 511 * all base bdev modules of raid bdev module. If anyone among the base_bdevs 512 * doesn't support, the raid device doesn't supports. 513 * 514 * params: 515 * raid_bdev - pointer to raid bdev context 516 * io_type - io type 517 * returns: 518 * true - io_type is supported 519 * false - io_type is not supported 520 */ 521 inline static bool 522 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type) 523 { 524 struct raid_base_bdev_info *base_info; 525 526 if (io_type == SPDK_BDEV_IO_TYPE_FLUSH || 527 io_type == SPDK_BDEV_IO_TYPE_UNMAP) { 528 if (raid_bdev->module->submit_null_payload_request == NULL) { 529 return false; 530 } 531 } 532 533 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 534 if (base_info->desc == NULL) { 535 continue; 536 } 537 538 if (spdk_bdev_io_type_supported(spdk_bdev_desc_get_bdev(base_info->desc), io_type) == false) { 539 return false; 540 } 541 } 542 543 return true; 544 } 545 546 /* 547 * brief: 548 * raid_bdev_io_type_supported is the io_supported function for bdev function 549 * table which returns whether the particular io type is supported or not by 550 * raid bdev module 551 * params: 552 * ctx - pointer to raid bdev context 553 * type - io type 554 * returns: 555 * true - io_type is supported 556 * false - io_type is not supported 557 */ 558 static bool 559 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 560 { 561 switch (io_type) { 562 case SPDK_BDEV_IO_TYPE_READ: 563 case SPDK_BDEV_IO_TYPE_WRITE: 564 return true; 565 566 case SPDK_BDEV_IO_TYPE_FLUSH: 567 case SPDK_BDEV_IO_TYPE_RESET: 568 case SPDK_BDEV_IO_TYPE_UNMAP: 569 return _raid_bdev_io_type_supported(ctx, io_type); 570 571 default: 572 return false; 573 } 574 575 return false; 576 } 577 578 /* 579 * brief: 580 * raid_bdev_get_io_channel is the get_io_channel function table pointer for 581 * raid bdev. This is used to return the io channel for this raid bdev 582 * params: 583 * ctxt - pointer to raid_bdev 584 * returns: 585 * pointer to io channel for raid bdev 586 */ 587 static struct spdk_io_channel * 588 raid_bdev_get_io_channel(void *ctxt) 589 { 590 struct raid_bdev *raid_bdev = ctxt; 591 592 return spdk_get_io_channel(raid_bdev); 593 } 594 595 void 596 raid_bdev_write_info_json(struct raid_bdev *raid_bdev, struct spdk_json_write_ctx *w) 597 { 598 struct raid_base_bdev_info *base_info; 599 600 assert(raid_bdev != NULL); 601 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 602 603 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 604 spdk_json_write_named_string(w, "state", raid_bdev_state_to_str(raid_bdev->state)); 605 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 606 spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs); 607 spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered); 608 spdk_json_write_name(w, "base_bdevs_list"); 609 spdk_json_write_array_begin(w); 610 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 611 if (base_info->desc) { 612 spdk_json_write_string(w, spdk_bdev_desc_get_bdev(base_info->desc)->name); 613 } else { 614 spdk_json_write_null(w); 615 } 616 } 617 spdk_json_write_array_end(w); 618 } 619 620 /* 621 * brief: 622 * raid_bdev_dump_info_json is the function table pointer for raid bdev 623 * params: 624 * ctx - pointer to raid_bdev 625 * w - pointer to json context 626 * returns: 627 * 0 - success 628 * non zero - failure 629 */ 630 static int 631 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 632 { 633 struct raid_bdev *raid_bdev = ctx; 634 635 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_dump_config_json\n"); 636 637 /* Dump the raid bdev configuration related information */ 638 spdk_json_write_named_object_begin(w, "raid"); 639 raid_bdev_write_info_json(raid_bdev, w); 640 spdk_json_write_object_end(w); 641 642 return 0; 643 } 644 645 /* 646 * brief: 647 * raid_bdev_write_config_json is the function table pointer for raid bdev 648 * params: 649 * bdev - pointer to spdk_bdev 650 * w - pointer to json context 651 * returns: 652 * none 653 */ 654 static void 655 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 656 { 657 struct raid_bdev *raid_bdev = bdev->ctxt; 658 struct raid_base_bdev_info *base_info; 659 660 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 661 662 spdk_json_write_object_begin(w); 663 664 spdk_json_write_named_string(w, "method", "bdev_raid_create"); 665 666 spdk_json_write_named_object_begin(w, "params"); 667 spdk_json_write_named_string(w, "name", bdev->name); 668 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 669 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 670 671 spdk_json_write_named_array_begin(w, "base_bdevs"); 672 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 673 if (base_info->desc) { 674 spdk_json_write_string(w, spdk_bdev_desc_get_bdev(base_info->desc)->name); 675 } 676 } 677 spdk_json_write_array_end(w); 678 spdk_json_write_object_end(w); 679 680 spdk_json_write_object_end(w); 681 } 682 683 static int 684 raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size) 685 { 686 struct raid_bdev *raid_bdev = ctx; 687 struct raid_base_bdev_info *base_info; 688 int domains_count = 0, rc = 0; 689 690 if (raid_bdev->module->memory_domains_supported == false) { 691 return 0; 692 } 693 694 spdk_spin_lock(&raid_bdev->base_bdev_lock); 695 696 /* First loop to get the number of memory domains */ 697 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 698 if (base_info->desc == NULL) { 699 continue; 700 } 701 rc = spdk_bdev_get_memory_domains(spdk_bdev_desc_get_bdev(base_info->desc), NULL, 0); 702 if (rc < 0) { 703 goto out; 704 } 705 domains_count += rc; 706 } 707 708 if (!domains || array_size < domains_count) { 709 goto out; 710 } 711 712 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 713 if (base_info->desc == NULL) { 714 continue; 715 } 716 rc = spdk_bdev_get_memory_domains(spdk_bdev_desc_get_bdev(base_info->desc), domains, array_size); 717 if (rc < 0) { 718 goto out; 719 } 720 domains += rc; 721 array_size -= rc; 722 } 723 out: 724 spdk_spin_unlock(&raid_bdev->base_bdev_lock); 725 726 if (rc < 0) { 727 return rc; 728 } 729 730 return domains_count; 731 } 732 733 /* g_raid_bdev_fn_table is the function table for raid bdev */ 734 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = { 735 .destruct = raid_bdev_destruct, 736 .submit_request = raid_bdev_submit_request, 737 .io_type_supported = raid_bdev_io_type_supported, 738 .get_io_channel = raid_bdev_get_io_channel, 739 .dump_info_json = raid_bdev_dump_info_json, 740 .write_config_json = raid_bdev_write_config_json, 741 .get_memory_domains = raid_bdev_get_memory_domains, 742 }; 743 744 struct raid_bdev * 745 raid_bdev_find_by_name(const char *name) 746 { 747 struct raid_bdev *raid_bdev; 748 749 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 750 if (strcmp(raid_bdev->bdev.name, name) == 0) { 751 return raid_bdev; 752 } 753 } 754 755 return NULL; 756 } 757 758 static struct { 759 const char *name; 760 enum raid_level value; 761 } g_raid_level_names[] = { 762 { "raid0", RAID0 }, 763 { "0", RAID0 }, 764 { "raid1", RAID1 }, 765 { "1", RAID1 }, 766 { "raid5f", RAID5F }, 767 { "5f", RAID5F }, 768 { "concat", CONCAT }, 769 { } 770 }; 771 772 static struct { 773 const char *name; 774 enum raid_bdev_state value; 775 } g_raid_state_names[] = { 776 { "online", RAID_BDEV_STATE_ONLINE }, 777 { "configuring", RAID_BDEV_STATE_CONFIGURING }, 778 { "offline", RAID_BDEV_STATE_OFFLINE }, 779 { } 780 }; 781 782 /* We have to use the typedef in the function declaration to appease astyle. */ 783 typedef enum raid_level raid_level_t; 784 typedef enum raid_bdev_state raid_bdev_state_t; 785 786 raid_level_t 787 raid_bdev_str_to_level(const char *str) 788 { 789 unsigned int i; 790 791 assert(str != NULL); 792 793 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 794 if (strcasecmp(g_raid_level_names[i].name, str) == 0) { 795 return g_raid_level_names[i].value; 796 } 797 } 798 799 return INVALID_RAID_LEVEL; 800 } 801 802 const char * 803 raid_bdev_level_to_str(enum raid_level level) 804 { 805 unsigned int i; 806 807 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 808 if (g_raid_level_names[i].value == level) { 809 return g_raid_level_names[i].name; 810 } 811 } 812 813 return ""; 814 } 815 816 raid_bdev_state_t 817 raid_bdev_str_to_state(const char *str) 818 { 819 unsigned int i; 820 821 assert(str != NULL); 822 823 for (i = 0; g_raid_state_names[i].name != NULL; i++) { 824 if (strcasecmp(g_raid_state_names[i].name, str) == 0) { 825 return g_raid_state_names[i].value; 826 } 827 } 828 829 return RAID_BDEV_STATE_MAX; 830 } 831 832 const char * 833 raid_bdev_state_to_str(enum raid_bdev_state state) 834 { 835 unsigned int i; 836 837 for (i = 0; g_raid_state_names[i].name != NULL; i++) { 838 if (g_raid_state_names[i].value == state) { 839 return g_raid_state_names[i].name; 840 } 841 } 842 843 assert(false); 844 return ""; 845 } 846 847 /* 848 * brief: 849 * raid_bdev_fini_start is called when bdev layer is starting the 850 * shutdown process 851 * params: 852 * none 853 * returns: 854 * none 855 */ 856 static void 857 raid_bdev_fini_start(void) 858 { 859 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_fini_start\n"); 860 g_shutdown_started = true; 861 } 862 863 /* 864 * brief: 865 * raid_bdev_exit is called on raid bdev module exit time by bdev layer 866 * params: 867 * none 868 * returns: 869 * none 870 */ 871 static void 872 raid_bdev_exit(void) 873 { 874 struct raid_bdev *raid_bdev, *tmp; 875 876 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_exit\n"); 877 878 TAILQ_FOREACH_SAFE(raid_bdev, &g_raid_bdev_list, global_link, tmp) { 879 raid_bdev_cleanup_and_free(raid_bdev); 880 } 881 } 882 883 /* 884 * brief: 885 * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid 886 * module 887 * params: 888 * none 889 * returns: 890 * size of spdk_bdev_io context for raid 891 */ 892 static int 893 raid_bdev_get_ctx_size(void) 894 { 895 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_get_ctx_size\n"); 896 return sizeof(struct raid_bdev_io); 897 } 898 899 static struct spdk_bdev_module g_raid_if = { 900 .name = "raid", 901 .module_init = raid_bdev_init, 902 .fini_start = raid_bdev_fini_start, 903 .module_fini = raid_bdev_exit, 904 .get_ctx_size = raid_bdev_get_ctx_size, 905 .examine_config = raid_bdev_examine, 906 .async_init = false, 907 .async_fini = false, 908 }; 909 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if) 910 911 /* 912 * brief: 913 * raid_bdev_init is the initialization function for raid bdev module 914 * params: 915 * none 916 * returns: 917 * 0 - success 918 * non zero - failure 919 */ 920 static int 921 raid_bdev_init(void) 922 { 923 return 0; 924 } 925 926 /* 927 * brief: 928 * raid_bdev_create allocates raid bdev based on passed configuration 929 * params: 930 * name - name for raid bdev 931 * strip_size - strip size in KB 932 * num_base_bdevs - number of base bdevs 933 * level - raid level 934 * raid_bdev_out - the created raid bdev 935 * returns: 936 * 0 - success 937 * non zero - failure 938 */ 939 int 940 raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, 941 enum raid_level level, struct raid_bdev **raid_bdev_out, const struct spdk_uuid *uuid) 942 { 943 struct raid_bdev *raid_bdev; 944 struct spdk_bdev *raid_bdev_gen; 945 struct raid_bdev_module *module; 946 struct raid_base_bdev_info *base_info; 947 uint8_t min_operational; 948 949 if (raid_bdev_find_by_name(name) != NULL) { 950 SPDK_ERRLOG("Duplicate raid bdev name found: %s\n", name); 951 return -EEXIST; 952 } 953 954 if (level == RAID1) { 955 if (strip_size != 0) { 956 SPDK_ERRLOG("Strip size is not supported by raid1\n"); 957 return -EINVAL; 958 } 959 } else if (spdk_u32_is_pow2(strip_size) == false) { 960 SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size); 961 return -EINVAL; 962 } 963 964 module = raid_bdev_module_find(level); 965 if (module == NULL) { 966 SPDK_ERRLOG("Unsupported raid level '%d'\n", level); 967 return -EINVAL; 968 } 969 970 assert(module->base_bdevs_min != 0); 971 if (num_base_bdevs < module->base_bdevs_min) { 972 SPDK_ERRLOG("At least %u base devices required for %s\n", 973 module->base_bdevs_min, 974 raid_bdev_level_to_str(level)); 975 return -EINVAL; 976 } 977 978 switch (module->base_bdevs_constraint.type) { 979 case CONSTRAINT_MAX_BASE_BDEVS_REMOVED: 980 min_operational = num_base_bdevs - module->base_bdevs_constraint.value; 981 break; 982 case CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL: 983 min_operational = module->base_bdevs_constraint.value; 984 break; 985 case CONSTRAINT_UNSET: 986 if (module->base_bdevs_constraint.value != 0) { 987 SPDK_ERRLOG("Unexpected constraint value '%u' provided for raid bdev '%s'.\n", 988 (uint8_t)module->base_bdevs_constraint.value, name); 989 return -EINVAL; 990 } 991 min_operational = num_base_bdevs; 992 break; 993 default: 994 SPDK_ERRLOG("Unrecognised constraint type '%u' in module for raid level '%s'.\n", 995 (uint8_t)module->base_bdevs_constraint.type, 996 raid_bdev_level_to_str(module->level)); 997 return -EINVAL; 998 }; 999 1000 if (min_operational == 0 || min_operational > num_base_bdevs) { 1001 SPDK_ERRLOG("Wrong constraint value for raid level '%s'.\n", 1002 raid_bdev_level_to_str(module->level)); 1003 return -EINVAL; 1004 } 1005 1006 raid_bdev = calloc(1, sizeof(*raid_bdev)); 1007 if (!raid_bdev) { 1008 SPDK_ERRLOG("Unable to allocate memory for raid bdev\n"); 1009 return -ENOMEM; 1010 } 1011 1012 raid_bdev->module = module; 1013 raid_bdev->num_base_bdevs = num_base_bdevs; 1014 raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, 1015 sizeof(struct raid_base_bdev_info)); 1016 if (!raid_bdev->base_bdev_info) { 1017 SPDK_ERRLOG("Unable able to allocate base bdev info\n"); 1018 free(raid_bdev); 1019 return -ENOMEM; 1020 } 1021 1022 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1023 base_info->raid_bdev = raid_bdev; 1024 } 1025 1026 /* strip_size_kb is from the rpc param. strip_size is in blocks and used 1027 * internally and set later. 1028 */ 1029 raid_bdev->strip_size = 0; 1030 raid_bdev->strip_size_kb = strip_size; 1031 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1032 raid_bdev->level = level; 1033 raid_bdev->min_base_bdevs_operational = min_operational; 1034 1035 raid_bdev_gen = &raid_bdev->bdev; 1036 1037 raid_bdev_gen->name = strdup(name); 1038 if (!raid_bdev_gen->name) { 1039 SPDK_ERRLOG("Unable to allocate name for raid\n"); 1040 free(raid_bdev->base_bdev_info); 1041 free(raid_bdev); 1042 return -ENOMEM; 1043 } 1044 1045 spdk_spin_init(&raid_bdev->base_bdev_lock); 1046 1047 raid_bdev_gen->product_name = "Raid Volume"; 1048 raid_bdev_gen->ctxt = raid_bdev; 1049 raid_bdev_gen->fn_table = &g_raid_bdev_fn_table; 1050 raid_bdev_gen->module = &g_raid_if; 1051 raid_bdev_gen->write_cache = 0; 1052 1053 if (uuid) { 1054 spdk_uuid_copy(&raid_bdev_gen->uuid, uuid); 1055 } 1056 1057 TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link); 1058 1059 *raid_bdev_out = raid_bdev; 1060 1061 return 0; 1062 } 1063 1064 /* 1065 * brief: 1066 * Check underlying block devices against support for metadata. Do not configure 1067 * md support when parameters from block devices are inconsistent. 1068 * params: 1069 * raid_bdev - pointer to raid bdev 1070 * returns: 1071 * 0 - The raid bdev md parameters were successfully configured. 1072 * non zero - Failed to configure md. 1073 */ 1074 static int 1075 raid_bdev_configure_md(struct raid_bdev *raid_bdev) 1076 { 1077 struct spdk_bdev *base_bdev; 1078 uint8_t i; 1079 1080 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1081 base_bdev = spdk_bdev_desc_get_bdev(raid_bdev->base_bdev_info[i].desc); 1082 1083 if (i == 0) { 1084 raid_bdev->bdev.md_len = spdk_bdev_get_md_size(base_bdev); 1085 raid_bdev->bdev.md_interleave = spdk_bdev_is_md_interleaved(base_bdev); 1086 raid_bdev->bdev.dif_type = spdk_bdev_get_dif_type(base_bdev); 1087 raid_bdev->bdev.dif_is_head_of_md = spdk_bdev_is_dif_head_of_md(base_bdev); 1088 raid_bdev->bdev.dif_check_flags = base_bdev->dif_check_flags; 1089 continue; 1090 } 1091 1092 if (raid_bdev->bdev.md_len != spdk_bdev_get_md_size(base_bdev) || 1093 raid_bdev->bdev.md_interleave != spdk_bdev_is_md_interleaved(base_bdev) || 1094 raid_bdev->bdev.dif_type != spdk_bdev_get_dif_type(base_bdev) || 1095 raid_bdev->bdev.dif_is_head_of_md != spdk_bdev_is_dif_head_of_md(base_bdev) || 1096 raid_bdev->bdev.dif_check_flags != base_bdev->dif_check_flags) { 1097 SPDK_ERRLOG("base bdevs are configured with different metadata formats\n"); 1098 return -EPERM; 1099 } 1100 } 1101 1102 return 0; 1103 } 1104 1105 /* 1106 * brief: 1107 * If raid bdev config is complete, then only register the raid bdev to 1108 * bdev layer and remove this raid bdev from configuring list and 1109 * insert the raid bdev to configured list 1110 * params: 1111 * raid_bdev - pointer to raid bdev 1112 * returns: 1113 * 0 - success 1114 * non zero - failure 1115 */ 1116 static int 1117 raid_bdev_configure(struct raid_bdev *raid_bdev) 1118 { 1119 uint32_t blocklen = 0; 1120 struct spdk_bdev *raid_bdev_gen; 1121 struct raid_base_bdev_info *base_info; 1122 struct spdk_bdev *base_bdev; 1123 int rc = 0; 1124 1125 assert(raid_bdev->state == RAID_BDEV_STATE_CONFIGURING); 1126 assert(raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs); 1127 1128 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1129 assert(base_info->desc != NULL); 1130 base_bdev = spdk_bdev_desc_get_bdev(base_info->desc); 1131 /* Check blocklen for all base bdevs that it should be same */ 1132 if (blocklen == 0) { 1133 blocklen = base_bdev->blocklen; 1134 } else if (blocklen != base_bdev->blocklen) { 1135 /* 1136 * Assumption is that all the base bdevs for any raid bdev should 1137 * have same blocklen 1138 */ 1139 SPDK_ERRLOG("Blocklen of various bdevs not matching\n"); 1140 return -EINVAL; 1141 } 1142 } 1143 assert(blocklen > 0); 1144 1145 /* The strip_size_kb is read in from user in KB. Convert to blocks here for 1146 * internal use. 1147 */ 1148 raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / blocklen; 1149 if (raid_bdev->strip_size == 0 && raid_bdev->level != RAID1) { 1150 SPDK_ERRLOG("Strip size cannot be smaller than the device block size\n"); 1151 return -EINVAL; 1152 } 1153 raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size); 1154 raid_bdev->blocklen_shift = spdk_u32log2(blocklen); 1155 1156 raid_bdev_gen = &raid_bdev->bdev; 1157 raid_bdev_gen->blocklen = blocklen; 1158 1159 rc = raid_bdev_configure_md(raid_bdev); 1160 if (rc != 0) { 1161 SPDK_ERRLOG("raid metadata configuration failed\n"); 1162 return rc; 1163 } 1164 1165 rc = raid_bdev->module->start(raid_bdev); 1166 if (rc != 0) { 1167 SPDK_ERRLOG("raid module startup callback failed\n"); 1168 return rc; 1169 } 1170 raid_bdev->state = RAID_BDEV_STATE_ONLINE; 1171 SPDK_DEBUGLOG(bdev_raid, "io device register %p\n", raid_bdev); 1172 SPDK_DEBUGLOG(bdev_raid, "blockcnt %" PRIu64 ", blocklen %u\n", 1173 raid_bdev_gen->blockcnt, raid_bdev_gen->blocklen); 1174 spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb, 1175 sizeof(struct raid_bdev_io_channel), 1176 raid_bdev->bdev.name); 1177 rc = spdk_bdev_register(raid_bdev_gen); 1178 if (rc != 0) { 1179 SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n"); 1180 if (raid_bdev->module->stop != NULL) { 1181 raid_bdev->module->stop(raid_bdev); 1182 } 1183 spdk_io_device_unregister(raid_bdev, NULL); 1184 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1185 return rc; 1186 } 1187 SPDK_DEBUGLOG(bdev_raid, "raid bdev generic %p\n", raid_bdev_gen); 1188 SPDK_DEBUGLOG(bdev_raid, "raid bdev is created with name %s, raid_bdev %p\n", 1189 raid_bdev_gen->name, raid_bdev); 1190 1191 return 0; 1192 } 1193 1194 /* 1195 * brief: 1196 * If raid bdev is online and registered, change the bdev state to 1197 * configuring and unregister this raid device. Queue this raid device 1198 * in configuring list 1199 * params: 1200 * raid_bdev - pointer to raid bdev 1201 * cb_fn - callback function 1202 * cb_arg - argument to callback function 1203 * returns: 1204 * none 1205 */ 1206 static void 1207 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, 1208 void *cb_arg) 1209 { 1210 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1211 if (cb_fn) { 1212 cb_fn(cb_arg, 0); 1213 } 1214 return; 1215 } 1216 1217 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 1218 assert(raid_bdev->num_base_bdevs_discovered); 1219 SPDK_DEBUGLOG(bdev_raid, "raid bdev state changing from online to offline\n"); 1220 1221 spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg); 1222 } 1223 1224 /* 1225 * brief: 1226 * raid_bdev_find_base_info_by_bdev function finds the base bdev info by bdev. 1227 * params: 1228 * base_bdev - pointer to base bdev 1229 * returns: 1230 * base bdev info if found, otherwise NULL. 1231 */ 1232 static struct raid_base_bdev_info * 1233 raid_bdev_find_base_info_by_bdev(struct spdk_bdev *base_bdev) 1234 { 1235 struct raid_bdev *raid_bdev; 1236 struct raid_base_bdev_info *base_info; 1237 1238 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1239 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1240 if (base_info->desc != NULL && 1241 spdk_bdev_desc_get_bdev(base_info->desc) == base_bdev) { 1242 return base_info; 1243 } 1244 } 1245 } 1246 1247 return NULL; 1248 } 1249 1250 static void 1251 raid_bdev_remove_base_bdev_on_unquiesced(void *ctx, int status) 1252 { 1253 struct raid_base_bdev_info *base_info = ctx; 1254 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1255 1256 base_info->remove_scheduled = false; 1257 1258 if (status != 0) { 1259 SPDK_ERRLOG("Failed to unquiesce raid bdev %s: %s\n", 1260 raid_bdev->bdev.name, spdk_strerror(-status)); 1261 goto out; 1262 } 1263 1264 spdk_spin_lock(&raid_bdev->base_bdev_lock); 1265 raid_bdev_free_base_bdev_resource(base_info); 1266 spdk_spin_unlock(&raid_bdev->base_bdev_lock); 1267 out: 1268 if (base_info->remove_cb != NULL) { 1269 base_info->remove_cb(base_info->remove_cb_ctx, status); 1270 } 1271 } 1272 1273 static void 1274 raid_bdev_channel_remove_base_bdev(struct spdk_io_channel_iter *i) 1275 { 1276 struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i); 1277 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 1278 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch); 1279 uint8_t idx = base_info - base_info->raid_bdev->base_bdev_info; 1280 1281 SPDK_DEBUGLOG(bdev_raid, "slot: %u raid_ch: %p\n", idx, raid_ch); 1282 1283 if (raid_ch->base_channel[idx] != NULL) { 1284 spdk_put_io_channel(raid_ch->base_channel[idx]); 1285 raid_ch->base_channel[idx] = NULL; 1286 } 1287 1288 spdk_for_each_channel_continue(i, 0); 1289 } 1290 1291 static void 1292 raid_bdev_channels_remove_base_bdev_done(struct spdk_io_channel_iter *i, int status) 1293 { 1294 struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i); 1295 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1296 1297 spdk_bdev_unquiesce(&raid_bdev->bdev, &g_raid_if, raid_bdev_remove_base_bdev_on_unquiesced, 1298 base_info); 1299 } 1300 1301 static void 1302 raid_bdev_remove_base_bdev_on_quiesced(void *ctx, int status) 1303 { 1304 struct raid_base_bdev_info *base_info = ctx; 1305 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1306 1307 if (status != 0) { 1308 SPDK_ERRLOG("Failed to quiesce raid bdev %s: %s\n", 1309 raid_bdev->bdev.name, spdk_strerror(-status)); 1310 base_info->remove_scheduled = false; 1311 if (base_info->remove_cb != NULL) { 1312 base_info->remove_cb(base_info->remove_cb_ctx, status); 1313 } 1314 return; 1315 } 1316 1317 spdk_for_each_channel(raid_bdev, raid_bdev_channel_remove_base_bdev, base_info, 1318 raid_bdev_channels_remove_base_bdev_done); 1319 } 1320 1321 /* 1322 * brief: 1323 * raid_bdev_remove_base_bdev function is called by below layers when base_bdev 1324 * is removed. This function checks if this base bdev is part of any raid bdev 1325 * or not. If yes, it takes necessary action on that particular raid bdev. 1326 * params: 1327 * base_bdev - pointer to base bdev which got removed 1328 * cb_fn - callback function 1329 * cb_arg - argument to callback function 1330 * returns: 1331 * 0 - success 1332 * non zero - failure 1333 */ 1334 int 1335 raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev, raid_bdev_remove_base_bdev_cb cb_fn, 1336 void *cb_ctx) 1337 { 1338 struct raid_bdev *raid_bdev; 1339 struct raid_base_bdev_info *base_info; 1340 1341 SPDK_DEBUGLOG(bdev_raid, "%s\n", base_bdev->name); 1342 1343 /* Find the raid_bdev which has claimed this base_bdev */ 1344 base_info = raid_bdev_find_base_info_by_bdev(base_bdev); 1345 if (!base_info) { 1346 SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); 1347 return -ENODEV; 1348 } 1349 raid_bdev = base_info->raid_bdev; 1350 1351 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1352 1353 if (base_info->remove_scheduled) { 1354 return 0; 1355 } 1356 1357 assert(base_info->desc); 1358 base_info->remove_scheduled = true; 1359 base_info->remove_cb = cb_fn; 1360 base_info->remove_cb_ctx = cb_ctx; 1361 1362 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1363 /* 1364 * As raid bdev is not registered yet or already unregistered, 1365 * so cleanup should be done here itself. 1366 */ 1367 raid_bdev_free_base_bdev_resource(base_info); 1368 if (raid_bdev->num_base_bdevs_discovered == 0) { 1369 /* There is no base bdev for this raid, so free the raid device. */ 1370 raid_bdev_cleanup_and_free(raid_bdev); 1371 } 1372 } else if (raid_bdev->num_base_bdevs_discovered == raid_bdev->min_base_bdevs_operational) { 1373 /* 1374 * After this base bdev is removed there will not be enough base bdevs 1375 * to keep the raid bdev operational. 1376 */ 1377 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_ctx); 1378 } else { 1379 int ret; 1380 1381 ret = spdk_bdev_quiesce(&raid_bdev->bdev, &g_raid_if, 1382 raid_bdev_remove_base_bdev_on_quiesced, base_info); 1383 if (ret != 0) { 1384 base_info->remove_scheduled = false; 1385 } 1386 } 1387 1388 return 0; 1389 } 1390 1391 /* 1392 * brief: 1393 * raid_bdev_resize_base_bdev function is called by below layers when base_bdev 1394 * is resized. This function checks if the smallest size of the base_bdevs is changed. 1395 * If yes, call module handler to resize the raid_bdev if implemented. 1396 * params: 1397 * base_bdev - pointer to base bdev which got resized. 1398 * returns: 1399 * none 1400 */ 1401 static void 1402 raid_bdev_resize_base_bdev(struct spdk_bdev *base_bdev) 1403 { 1404 struct raid_bdev *raid_bdev; 1405 struct raid_base_bdev_info *base_info; 1406 1407 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_resize_base_bdev\n"); 1408 1409 base_info = raid_bdev_find_base_info_by_bdev(base_bdev); 1410 1411 /* Find the raid_bdev which has claimed this base_bdev */ 1412 if (!base_info) { 1413 SPDK_ERRLOG("raid_bdev whose base_bdev '%s' not found\n", base_bdev->name); 1414 return; 1415 } 1416 raid_bdev = base_info->raid_bdev; 1417 1418 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1419 1420 SPDK_NOTICELOG("base_bdev '%s' was resized: old size %" PRIu64 ", new size %" PRIu64 "\n", 1421 base_bdev->name, base_info->blockcnt, base_bdev->blockcnt); 1422 1423 if (raid_bdev->module->resize) { 1424 raid_bdev->module->resize(raid_bdev); 1425 } 1426 } 1427 1428 /* 1429 * brief: 1430 * raid_bdev_event_base_bdev function is called by below layers when base_bdev 1431 * triggers asynchronous event. 1432 * params: 1433 * type - event details. 1434 * bdev - bdev that triggered event. 1435 * event_ctx - context for event. 1436 * returns: 1437 * none 1438 */ 1439 static void 1440 raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 1441 void *event_ctx) 1442 { 1443 int rc; 1444 1445 switch (type) { 1446 case SPDK_BDEV_EVENT_REMOVE: 1447 rc = raid_bdev_remove_base_bdev(bdev, NULL, NULL); 1448 if (rc != 0) { 1449 SPDK_ERRLOG("Failed to remove base bdev %s: %s\n", 1450 spdk_bdev_get_name(bdev), spdk_strerror(-rc)); 1451 } 1452 break; 1453 case SPDK_BDEV_EVENT_RESIZE: 1454 raid_bdev_resize_base_bdev(bdev); 1455 break; 1456 default: 1457 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 1458 break; 1459 } 1460 } 1461 1462 /* 1463 * brief: 1464 * Deletes the specified raid bdev 1465 * params: 1466 * raid_bdev - pointer to raid bdev 1467 * cb_fn - callback function 1468 * cb_arg - argument to callback function 1469 */ 1470 void 1471 raid_bdev_delete(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, void *cb_arg) 1472 { 1473 struct raid_base_bdev_info *base_info; 1474 1475 SPDK_DEBUGLOG(bdev_raid, "delete raid bdev: %s\n", raid_bdev->bdev.name); 1476 1477 if (raid_bdev->destroy_started) { 1478 SPDK_DEBUGLOG(bdev_raid, "destroying raid bdev %s is already started\n", 1479 raid_bdev->bdev.name); 1480 if (cb_fn) { 1481 cb_fn(cb_arg, -EALREADY); 1482 } 1483 return; 1484 } 1485 1486 raid_bdev->destroy_started = true; 1487 1488 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1489 base_info->remove_scheduled = true; 1490 1491 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1492 /* 1493 * As raid bdev is not registered yet or already unregistered, 1494 * so cleanup should be done here itself. 1495 */ 1496 raid_bdev_free_base_bdev_resource(base_info); 1497 } 1498 } 1499 1500 if (raid_bdev->num_base_bdevs_discovered == 0) { 1501 /* There is no base bdev for this raid, so free the raid device. */ 1502 raid_bdev_cleanup_and_free(raid_bdev); 1503 if (cb_fn) { 1504 cb_fn(cb_arg, 0); 1505 } 1506 } else { 1507 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg); 1508 } 1509 } 1510 1511 static int 1512 raid_bdev_configure_base_bdev(struct raid_base_bdev_info *base_info) 1513 { 1514 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1515 struct spdk_bdev_desc *desc; 1516 struct spdk_bdev *bdev; 1517 int rc; 1518 1519 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1520 assert(base_info->name != NULL); 1521 assert(base_info->desc == NULL); 1522 1523 rc = spdk_bdev_open_ext(base_info->name, true, raid_bdev_event_base_bdev, NULL, &desc); 1524 if (rc != 0) { 1525 if (rc != -ENODEV) { 1526 SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", base_info->name); 1527 } 1528 return rc; 1529 } 1530 1531 bdev = spdk_bdev_desc_get_bdev(desc); 1532 1533 rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if); 1534 if (rc != 0) { 1535 SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); 1536 spdk_bdev_close(desc); 1537 return rc; 1538 } 1539 1540 SPDK_DEBUGLOG(bdev_raid, "bdev %s is claimed\n", bdev->name); 1541 1542 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 1543 1544 base_info->desc = desc; 1545 base_info->blockcnt = bdev->blockcnt; 1546 raid_bdev->num_base_bdevs_discovered++; 1547 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1548 1549 if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) { 1550 rc = raid_bdev_configure(raid_bdev); 1551 if (rc != 0) { 1552 SPDK_ERRLOG("Failed to configure raid bdev\n"); 1553 return rc; 1554 } 1555 } 1556 1557 return 0; 1558 } 1559 1560 /* 1561 * brief: 1562 * raid_bdev_add_base_device function is the actual function which either adds 1563 * the nvme base device to existing raid bdev or create a new raid bdev. It also claims 1564 * the base device and keep the open descriptor. 1565 * params: 1566 * raid_bdev - pointer to raid bdev 1567 * name - name of the base bdev 1568 * slot - position to add base bdev 1569 * returns: 1570 * 0 - success 1571 * non zero - failure 1572 */ 1573 int 1574 raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name, uint8_t slot) 1575 { 1576 struct raid_base_bdev_info *base_info; 1577 int rc; 1578 1579 if (slot >= raid_bdev->num_base_bdevs) { 1580 return -EINVAL; 1581 } 1582 1583 base_info = &raid_bdev->base_bdev_info[slot]; 1584 1585 if (base_info->name != NULL) { 1586 SPDK_ERRLOG("Slot %u on raid bdev '%s' already assigned to bdev '%s'\n", 1587 slot, raid_bdev->bdev.name, base_info->name); 1588 return -EBUSY; 1589 } 1590 1591 base_info->name = strdup(name); 1592 if (base_info->name == NULL) { 1593 return -ENOMEM; 1594 } 1595 1596 rc = raid_bdev_configure_base_bdev(base_info); 1597 if (rc != 0) { 1598 if (rc != -ENODEV) { 1599 SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", name); 1600 } 1601 return rc; 1602 } 1603 1604 return 0; 1605 } 1606 1607 /* 1608 * brief: 1609 * raid_bdev_examine function is the examine function call by the below layers 1610 * like bdev_nvme layer. This function will check if this base bdev can be 1611 * claimed by this raid bdev or not. 1612 * params: 1613 * bdev - pointer to base bdev 1614 * returns: 1615 * none 1616 */ 1617 static void 1618 raid_bdev_examine(struct spdk_bdev *bdev) 1619 { 1620 struct raid_bdev *raid_bdev; 1621 struct raid_base_bdev_info *base_info; 1622 1623 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1624 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1625 if (base_info->desc == NULL && base_info->name != NULL && 1626 strcmp(bdev->name, base_info->name) == 0) { 1627 raid_bdev_configure_base_bdev(base_info); 1628 break; 1629 } 1630 } 1631 } 1632 1633 spdk_bdev_module_examine_done(&g_raid_if); 1634 } 1635 1636 /* Log component for bdev raid bdev module */ 1637 SPDK_LOG_REGISTER_COMPONENT(bdev_raid) 1638