1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. 3 * All rights reserved. 4 * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "bdev_raid.h" 8 #include "spdk/env.h" 9 #include "spdk/thread.h" 10 #include "spdk/log.h" 11 #include "spdk/string.h" 12 #include "spdk/util.h" 13 #include "spdk/json.h" 14 #include "spdk/likely.h" 15 16 static bool g_shutdown_started = false; 17 18 /* List of all raid bdevs */ 19 struct raid_all_tailq g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list); 20 21 static TAILQ_HEAD(, raid_bdev_module) g_raid_modules = TAILQ_HEAD_INITIALIZER(g_raid_modules); 22 23 /* 24 * raid_bdev_io_channel is the context of spdk_io_channel for raid bdev device. It 25 * contains the relationship of raid bdev io channel with base bdev io channels. 26 */ 27 struct raid_bdev_io_channel { 28 /* Array of IO channels of base bdevs */ 29 struct spdk_io_channel **base_channel; 30 31 /* Private raid module IO channel */ 32 struct spdk_io_channel *module_channel; 33 }; 34 35 static struct raid_bdev_module * 36 raid_bdev_module_find(enum raid_level level) 37 { 38 struct raid_bdev_module *raid_module; 39 40 TAILQ_FOREACH(raid_module, &g_raid_modules, link) { 41 if (raid_module->level == level) { 42 return raid_module; 43 } 44 } 45 46 return NULL; 47 } 48 49 void 50 raid_bdev_module_list_add(struct raid_bdev_module *raid_module) 51 { 52 if (raid_bdev_module_find(raid_module->level) != NULL) { 53 SPDK_ERRLOG("module for raid level '%s' already registered.\n", 54 raid_bdev_level_to_str(raid_module->level)); 55 assert(false); 56 } else { 57 TAILQ_INSERT_TAIL(&g_raid_modules, raid_module, link); 58 } 59 } 60 61 struct spdk_io_channel * 62 raid_bdev_channel_get_base_channel(struct raid_bdev_io_channel *raid_ch, uint8_t idx) 63 { 64 return raid_ch->base_channel[idx]; 65 } 66 67 void * 68 raid_bdev_channel_get_module_ctx(struct raid_bdev_io_channel *raid_ch) 69 { 70 assert(raid_ch->module_channel != NULL); 71 72 return spdk_io_channel_get_ctx(raid_ch->module_channel); 73 } 74 75 /* Function declarations */ 76 static void raid_bdev_examine(struct spdk_bdev *bdev); 77 static int raid_bdev_init(void); 78 static void raid_bdev_deconfigure(struct raid_bdev *raid_bdev, 79 raid_bdev_destruct_cb cb_fn, void *cb_arg); 80 81 /* 82 * brief: 83 * raid_bdev_create_cb function is a cb function for raid bdev which creates the 84 * hierarchy from raid bdev to base bdev io channels. It will be called per core 85 * params: 86 * io_device - pointer to raid bdev io device represented by raid_bdev 87 * ctx_buf - pointer to context buffer for raid bdev io channel 88 * returns: 89 * 0 - success 90 * non zero - failure 91 */ 92 static int 93 raid_bdev_create_cb(void *io_device, void *ctx_buf) 94 { 95 struct raid_bdev *raid_bdev = io_device; 96 struct raid_bdev_io_channel *raid_ch = ctx_buf; 97 uint8_t i; 98 int ret = 0; 99 100 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_create_cb, %p\n", raid_ch); 101 102 assert(raid_bdev != NULL); 103 assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); 104 105 106 raid_ch->base_channel = calloc(raid_bdev->num_base_bdevs, sizeof(struct spdk_io_channel *)); 107 if (!raid_ch->base_channel) { 108 SPDK_ERRLOG("Unable to allocate base bdevs io channel\n"); 109 return -ENOMEM; 110 } 111 112 spdk_spin_lock(&raid_bdev->base_bdev_lock); 113 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 114 /* 115 * Get the spdk_io_channel for all the base bdevs. This is used during 116 * split logic to send the respective child bdev ios to respective base 117 * bdev io channel. 118 */ 119 if (raid_bdev->base_bdev_info[i].desc == NULL) { 120 continue; 121 } 122 raid_ch->base_channel[i] = spdk_bdev_get_io_channel( 123 raid_bdev->base_bdev_info[i].desc); 124 if (!raid_ch->base_channel[i]) { 125 SPDK_ERRLOG("Unable to create io channel for base bdev\n"); 126 ret = -ENOMEM; 127 break; 128 } 129 } 130 spdk_spin_unlock(&raid_bdev->base_bdev_lock); 131 132 if (!ret && raid_bdev->module->get_io_channel) { 133 raid_ch->module_channel = raid_bdev->module->get_io_channel(raid_bdev); 134 if (!raid_ch->module_channel) { 135 SPDK_ERRLOG("Unable to create io channel for raid module\n"); 136 ret = -ENOMEM; 137 } 138 } 139 140 if (ret) { 141 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 142 if (raid_ch->base_channel[i] != NULL) { 143 spdk_put_io_channel(raid_ch->base_channel[i]); 144 } 145 } 146 free(raid_ch->base_channel); 147 raid_ch->base_channel = NULL; 148 } 149 return ret; 150 } 151 152 /* 153 * brief: 154 * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the 155 * hierarchy from raid bdev to base bdev io channels. It will be called per core 156 * params: 157 * io_device - pointer to raid bdev io device represented by raid_bdev 158 * ctx_buf - pointer to context buffer for raid bdev io channel 159 * returns: 160 * none 161 */ 162 static void 163 raid_bdev_destroy_cb(void *io_device, void *ctx_buf) 164 { 165 struct raid_bdev *raid_bdev = io_device; 166 struct raid_bdev_io_channel *raid_ch = ctx_buf; 167 uint8_t i; 168 169 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destroy_cb\n"); 170 171 assert(raid_ch != NULL); 172 assert(raid_ch->base_channel); 173 174 if (raid_ch->module_channel) { 175 spdk_put_io_channel(raid_ch->module_channel); 176 } 177 178 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 179 /* Free base bdev channels */ 180 if (raid_ch->base_channel[i] != NULL) { 181 spdk_put_io_channel(raid_ch->base_channel[i]); 182 } 183 } 184 free(raid_ch->base_channel); 185 raid_ch->base_channel = NULL; 186 } 187 188 /* 189 * brief: 190 * raid_bdev_cleanup is used to cleanup raid_bdev related data 191 * structures. 192 * params: 193 * raid_bdev - pointer to raid_bdev 194 * returns: 195 * none 196 */ 197 static void 198 raid_bdev_cleanup(struct raid_bdev *raid_bdev) 199 { 200 struct raid_base_bdev_info *base_info; 201 202 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_cleanup, %p name %s, state %s\n", 203 raid_bdev, raid_bdev->bdev.name, raid_bdev_state_to_str(raid_bdev->state)); 204 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 205 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 206 207 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 208 assert(base_info->desc == NULL); 209 free(base_info->name); 210 } 211 212 TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link); 213 } 214 215 static void 216 raid_bdev_free(struct raid_bdev *raid_bdev) 217 { 218 spdk_dma_free(raid_bdev->sb); 219 spdk_spin_destroy(&raid_bdev->base_bdev_lock); 220 free(raid_bdev->base_bdev_info); 221 free(raid_bdev->bdev.name); 222 free(raid_bdev); 223 } 224 225 static void 226 raid_bdev_cleanup_and_free(struct raid_bdev *raid_bdev) 227 { 228 raid_bdev_cleanup(raid_bdev); 229 raid_bdev_free(raid_bdev); 230 } 231 232 /* 233 * brief: 234 * free resource of base bdev for raid bdev 235 * params: 236 * base_info - raid base bdev info 237 * returns: 238 * none 239 */ 240 static void 241 raid_bdev_free_base_bdev_resource(struct raid_base_bdev_info *base_info) 242 { 243 struct raid_bdev *raid_bdev = base_info->raid_bdev; 244 245 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 246 247 free(base_info->name); 248 base_info->name = NULL; 249 if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) { 250 spdk_uuid_set_null(&base_info->uuid); 251 } 252 253 if (base_info->desc == NULL) { 254 return; 255 } 256 257 spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(base_info->desc)); 258 spdk_bdev_close(base_info->desc); 259 base_info->desc = NULL; 260 spdk_put_io_channel(base_info->app_thread_ch); 261 base_info->app_thread_ch = NULL; 262 263 if (base_info->is_configured) { 264 assert(raid_bdev->num_base_bdevs_discovered); 265 raid_bdev->num_base_bdevs_discovered--; 266 base_info->is_configured = false; 267 } 268 } 269 270 static void 271 raid_bdev_io_device_unregister_cb(void *io_device) 272 { 273 struct raid_bdev *raid_bdev = io_device; 274 275 if (raid_bdev->num_base_bdevs_discovered == 0) { 276 /* Free raid_bdev when there are no base bdevs left */ 277 SPDK_DEBUGLOG(bdev_raid, "raid bdev base bdevs is 0, going to free all in destruct\n"); 278 raid_bdev_cleanup(raid_bdev); 279 spdk_bdev_destruct_done(&raid_bdev->bdev, 0); 280 raid_bdev_free(raid_bdev); 281 } else { 282 spdk_bdev_destruct_done(&raid_bdev->bdev, 0); 283 } 284 } 285 286 void 287 raid_bdev_module_stop_done(struct raid_bdev *raid_bdev) 288 { 289 if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) { 290 spdk_io_device_unregister(raid_bdev, raid_bdev_io_device_unregister_cb); 291 } 292 } 293 294 static void 295 _raid_bdev_destruct(void *ctxt) 296 { 297 struct raid_bdev *raid_bdev = ctxt; 298 struct raid_base_bdev_info *base_info; 299 300 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destruct\n"); 301 302 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 303 /* 304 * Close all base bdev descriptors for which call has come from below 305 * layers. Also close the descriptors if we have started shutdown. 306 */ 307 if (g_shutdown_started || base_info->remove_scheduled == true) { 308 raid_bdev_free_base_bdev_resource(base_info); 309 } 310 } 311 312 if (g_shutdown_started) { 313 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 314 } 315 316 if (raid_bdev->module->stop != NULL) { 317 if (raid_bdev->module->stop(raid_bdev) == false) { 318 return; 319 } 320 } 321 322 raid_bdev_module_stop_done(raid_bdev); 323 } 324 325 static int 326 raid_bdev_destruct(void *ctx) 327 { 328 spdk_thread_exec_msg(spdk_thread_get_app_thread(), _raid_bdev_destruct, ctx); 329 330 return 1; 331 } 332 333 void 334 raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status) 335 { 336 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 337 338 if (spdk_unlikely(raid_io->completion_cb != NULL)) { 339 raid_io->completion_cb(raid_io, status); 340 } else { 341 spdk_bdev_io_complete(bdev_io, status); 342 } 343 } 344 345 /* 346 * brief: 347 * raid_bdev_io_complete_part - signal the completion of a part of the expected 348 * base bdev IOs and complete the raid_io if this is the final expected IO. 349 * The caller should first set raid_io->base_bdev_io_remaining. This function 350 * will decrement this counter by the value of the 'completed' parameter and 351 * complete the raid_io if the counter reaches 0. The caller is free to 352 * interpret the 'base_bdev_io_remaining' and 'completed' values as needed, 353 * it can represent e.g. blocks or IOs. 354 * params: 355 * raid_io - pointer to raid_bdev_io 356 * completed - the part of the raid_io that has been completed 357 * status - status of the base IO 358 * returns: 359 * true - if the raid_io is completed 360 * false - otherwise 361 */ 362 bool 363 raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed, 364 enum spdk_bdev_io_status status) 365 { 366 assert(raid_io->base_bdev_io_remaining >= completed); 367 raid_io->base_bdev_io_remaining -= completed; 368 369 if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { 370 raid_io->base_bdev_io_status = status; 371 } 372 373 if (raid_io->base_bdev_io_remaining == 0) { 374 raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); 375 return true; 376 } else { 377 return false; 378 } 379 } 380 381 /* 382 * brief: 383 * raid_bdev_queue_io_wait function processes the IO which failed to submit. 384 * It will try to queue the IOs after storing the context to bdev wait queue logic. 385 * params: 386 * raid_io - pointer to raid_bdev_io 387 * bdev - the block device that the IO is submitted to 388 * ch - io channel 389 * cb_fn - callback when the spdk_bdev_io for bdev becomes available 390 * returns: 391 * none 392 */ 393 void 394 raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, 395 struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn) 396 { 397 raid_io->waitq_entry.bdev = bdev; 398 raid_io->waitq_entry.cb_fn = cb_fn; 399 raid_io->waitq_entry.cb_arg = raid_io; 400 spdk_bdev_queue_io_wait(bdev, ch, &raid_io->waitq_entry); 401 } 402 403 static void 404 raid_base_bdev_reset_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 405 { 406 struct raid_bdev_io *raid_io = cb_arg; 407 408 spdk_bdev_free_io(bdev_io); 409 410 raid_bdev_io_complete_part(raid_io, 1, success ? 411 SPDK_BDEV_IO_STATUS_SUCCESS : 412 SPDK_BDEV_IO_STATUS_FAILED); 413 } 414 415 static void raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io); 416 417 static void 418 _raid_bdev_submit_reset_request(void *_raid_io) 419 { 420 struct raid_bdev_io *raid_io = _raid_io; 421 422 raid_bdev_submit_reset_request(raid_io); 423 } 424 425 /* 426 * brief: 427 * raid_bdev_submit_reset_request function submits reset requests 428 * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in 429 * which case it will queue it for later submission 430 * params: 431 * raid_io 432 * returns: 433 * none 434 */ 435 static void 436 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io) 437 { 438 struct raid_bdev *raid_bdev; 439 int ret; 440 uint8_t i; 441 struct raid_base_bdev_info *base_info; 442 struct spdk_io_channel *base_ch; 443 444 raid_bdev = raid_io->raid_bdev; 445 446 if (raid_io->base_bdev_io_remaining == 0) { 447 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; 448 } 449 450 for (i = raid_io->base_bdev_io_submitted; i < raid_bdev->num_base_bdevs; i++) { 451 base_info = &raid_bdev->base_bdev_info[i]; 452 base_ch = raid_io->raid_ch->base_channel[i]; 453 if (base_ch == NULL) { 454 raid_io->base_bdev_io_submitted++; 455 raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS); 456 continue; 457 } 458 ret = spdk_bdev_reset(base_info->desc, base_ch, 459 raid_base_bdev_reset_complete, raid_io); 460 if (ret == 0) { 461 raid_io->base_bdev_io_submitted++; 462 } else if (ret == -ENOMEM) { 463 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), 464 base_ch, _raid_bdev_submit_reset_request); 465 return; 466 } else { 467 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 468 assert(false); 469 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 470 return; 471 } 472 } 473 } 474 475 /* 476 * brief: 477 * Callback function to spdk_bdev_io_get_buf. 478 * params: 479 * ch - pointer to raid bdev io channel 480 * bdev_io - pointer to parent bdev_io on raid bdev device 481 * success - True if buffer is allocated or false otherwise. 482 * returns: 483 * none 484 */ 485 static void 486 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 487 bool success) 488 { 489 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 490 491 if (!success) { 492 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 493 return; 494 } 495 496 raid_io->raid_bdev->module->submit_rw_request(raid_io); 497 } 498 499 void 500 raid_bdev_io_init(struct raid_bdev_io *raid_io, struct raid_bdev_io_channel *raid_ch, 501 enum spdk_bdev_io_type type, uint64_t offset_blocks, 502 uint64_t num_blocks, struct iovec *iovs, int iovcnt, void *md_buf, 503 struct spdk_memory_domain *memory_domain, void *memory_domain_ctx) 504 { 505 struct spdk_io_channel *ch = spdk_io_channel_from_ctx(raid_ch); 506 struct raid_bdev *raid_bdev = spdk_io_channel_get_io_device(ch); 507 508 raid_io->type = type; 509 raid_io->offset_blocks = offset_blocks; 510 raid_io->num_blocks = num_blocks; 511 raid_io->iovs = iovs; 512 raid_io->iovcnt = iovcnt; 513 raid_io->memory_domain = memory_domain; 514 raid_io->memory_domain_ctx = memory_domain_ctx; 515 raid_io->md_buf = md_buf; 516 517 raid_io->raid_bdev = raid_bdev; 518 raid_io->raid_ch = raid_ch; 519 raid_io->base_bdev_io_remaining = 0; 520 raid_io->base_bdev_io_submitted = 0; 521 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 522 raid_io->completion_cb = NULL; 523 } 524 525 /* 526 * brief: 527 * raid_bdev_submit_request function is the submit_request function pointer of 528 * raid bdev function table. This is used to submit the io on raid_bdev to below 529 * layers. 530 * params: 531 * ch - pointer to raid bdev io channel 532 * bdev_io - pointer to parent bdev_io on raid bdev device 533 * returns: 534 * none 535 */ 536 static void 537 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 538 { 539 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 540 541 raid_bdev_io_init(raid_io, spdk_io_channel_get_ctx(ch), bdev_io->type, 542 bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks, 543 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.md_buf, 544 bdev_io->u.bdev.memory_domain, bdev_io->u.bdev.memory_domain_ctx); 545 546 switch (bdev_io->type) { 547 case SPDK_BDEV_IO_TYPE_READ: 548 spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb, 549 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 550 break; 551 case SPDK_BDEV_IO_TYPE_WRITE: 552 raid_io->raid_bdev->module->submit_rw_request(raid_io); 553 break; 554 555 case SPDK_BDEV_IO_TYPE_RESET: 556 raid_bdev_submit_reset_request(raid_io); 557 break; 558 559 case SPDK_BDEV_IO_TYPE_FLUSH: 560 case SPDK_BDEV_IO_TYPE_UNMAP: 561 raid_io->raid_bdev->module->submit_null_payload_request(raid_io); 562 break; 563 564 default: 565 SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type); 566 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 567 break; 568 } 569 } 570 571 /* 572 * brief: 573 * _raid_bdev_io_type_supported checks whether io_type is supported in 574 * all base bdev modules of raid bdev module. If anyone among the base_bdevs 575 * doesn't support, the raid device doesn't supports. 576 * 577 * params: 578 * raid_bdev - pointer to raid bdev context 579 * io_type - io type 580 * returns: 581 * true - io_type is supported 582 * false - io_type is not supported 583 */ 584 inline static bool 585 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type) 586 { 587 struct raid_base_bdev_info *base_info; 588 589 if (io_type == SPDK_BDEV_IO_TYPE_FLUSH || 590 io_type == SPDK_BDEV_IO_TYPE_UNMAP) { 591 if (raid_bdev->module->submit_null_payload_request == NULL) { 592 return false; 593 } 594 } 595 596 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 597 if (base_info->desc == NULL) { 598 continue; 599 } 600 601 if (spdk_bdev_io_type_supported(spdk_bdev_desc_get_bdev(base_info->desc), io_type) == false) { 602 return false; 603 } 604 } 605 606 return true; 607 } 608 609 /* 610 * brief: 611 * raid_bdev_io_type_supported is the io_supported function for bdev function 612 * table which returns whether the particular io type is supported or not by 613 * raid bdev module 614 * params: 615 * ctx - pointer to raid bdev context 616 * type - io type 617 * returns: 618 * true - io_type is supported 619 * false - io_type is not supported 620 */ 621 static bool 622 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 623 { 624 switch (io_type) { 625 case SPDK_BDEV_IO_TYPE_READ: 626 case SPDK_BDEV_IO_TYPE_WRITE: 627 return true; 628 629 case SPDK_BDEV_IO_TYPE_FLUSH: 630 case SPDK_BDEV_IO_TYPE_RESET: 631 case SPDK_BDEV_IO_TYPE_UNMAP: 632 return _raid_bdev_io_type_supported(ctx, io_type); 633 634 default: 635 return false; 636 } 637 638 return false; 639 } 640 641 /* 642 * brief: 643 * raid_bdev_get_io_channel is the get_io_channel function table pointer for 644 * raid bdev. This is used to return the io channel for this raid bdev 645 * params: 646 * ctxt - pointer to raid_bdev 647 * returns: 648 * pointer to io channel for raid bdev 649 */ 650 static struct spdk_io_channel * 651 raid_bdev_get_io_channel(void *ctxt) 652 { 653 struct raid_bdev *raid_bdev = ctxt; 654 655 return spdk_get_io_channel(raid_bdev); 656 } 657 658 void 659 raid_bdev_write_info_json(struct raid_bdev *raid_bdev, struct spdk_json_write_ctx *w) 660 { 661 struct raid_base_bdev_info *base_info; 662 char uuid_str[SPDK_UUID_STRING_LEN]; 663 664 assert(raid_bdev != NULL); 665 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 666 667 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &raid_bdev->bdev.uuid); 668 spdk_json_write_named_string(w, "uuid", uuid_str); 669 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 670 spdk_json_write_named_string(w, "state", raid_bdev_state_to_str(raid_bdev->state)); 671 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 672 spdk_json_write_named_bool(w, "superblock", raid_bdev->sb != NULL); 673 spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs); 674 spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered); 675 spdk_json_write_named_uint32(w, "num_base_bdevs_operational", 676 raid_bdev->num_base_bdevs_operational); 677 spdk_json_write_name(w, "base_bdevs_list"); 678 spdk_json_write_array_begin(w); 679 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 680 spdk_json_write_object_begin(w); 681 spdk_json_write_name(w, "name"); 682 if (base_info->name) { 683 spdk_json_write_string(w, base_info->name); 684 } else { 685 spdk_json_write_null(w); 686 } 687 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &base_info->uuid); 688 spdk_json_write_named_string(w, "uuid", uuid_str); 689 spdk_json_write_named_bool(w, "is_configured", base_info->is_configured); 690 spdk_json_write_named_uint64(w, "data_offset", base_info->data_offset); 691 spdk_json_write_named_uint64(w, "data_size", base_info->data_size); 692 spdk_json_write_object_end(w); 693 } 694 spdk_json_write_array_end(w); 695 } 696 697 /* 698 * brief: 699 * raid_bdev_dump_info_json is the function table pointer for raid bdev 700 * params: 701 * ctx - pointer to raid_bdev 702 * w - pointer to json context 703 * returns: 704 * 0 - success 705 * non zero - failure 706 */ 707 static int 708 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 709 { 710 struct raid_bdev *raid_bdev = ctx; 711 712 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_dump_config_json\n"); 713 714 /* Dump the raid bdev configuration related information */ 715 spdk_json_write_named_object_begin(w, "raid"); 716 raid_bdev_write_info_json(raid_bdev, w); 717 spdk_json_write_object_end(w); 718 719 return 0; 720 } 721 722 /* 723 * brief: 724 * raid_bdev_write_config_json is the function table pointer for raid bdev 725 * params: 726 * bdev - pointer to spdk_bdev 727 * w - pointer to json context 728 * returns: 729 * none 730 */ 731 static void 732 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 733 { 734 struct raid_bdev *raid_bdev = bdev->ctxt; 735 struct raid_base_bdev_info *base_info; 736 char uuid_str[SPDK_UUID_STRING_LEN]; 737 738 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 739 740 if (raid_bdev->sb != NULL) { 741 /* raid bdev configuration is stored in the superblock */ 742 return; 743 } 744 745 spdk_json_write_object_begin(w); 746 747 spdk_json_write_named_string(w, "method", "bdev_raid_create"); 748 749 spdk_json_write_named_object_begin(w, "params"); 750 spdk_json_write_named_string(w, "name", bdev->name); 751 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &raid_bdev->bdev.uuid); 752 spdk_json_write_named_string(w, "uuid", uuid_str); 753 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 754 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 755 spdk_json_write_named_bool(w, "superblock", raid_bdev->sb != NULL); 756 757 spdk_json_write_named_array_begin(w, "base_bdevs"); 758 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 759 if (base_info->desc) { 760 spdk_json_write_string(w, spdk_bdev_desc_get_bdev(base_info->desc)->name); 761 } 762 } 763 spdk_json_write_array_end(w); 764 spdk_json_write_object_end(w); 765 766 spdk_json_write_object_end(w); 767 } 768 769 static int 770 raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size) 771 { 772 struct raid_bdev *raid_bdev = ctx; 773 struct raid_base_bdev_info *base_info; 774 int domains_count = 0, rc = 0; 775 776 if (raid_bdev->module->memory_domains_supported == false) { 777 return 0; 778 } 779 780 spdk_spin_lock(&raid_bdev->base_bdev_lock); 781 782 /* First loop to get the number of memory domains */ 783 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 784 if (base_info->desc == NULL) { 785 continue; 786 } 787 rc = spdk_bdev_get_memory_domains(spdk_bdev_desc_get_bdev(base_info->desc), NULL, 0); 788 if (rc < 0) { 789 goto out; 790 } 791 domains_count += rc; 792 } 793 794 if (!domains || array_size < domains_count) { 795 goto out; 796 } 797 798 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 799 if (base_info->desc == NULL) { 800 continue; 801 } 802 rc = spdk_bdev_get_memory_domains(spdk_bdev_desc_get_bdev(base_info->desc), domains, array_size); 803 if (rc < 0) { 804 goto out; 805 } 806 domains += rc; 807 array_size -= rc; 808 } 809 out: 810 spdk_spin_unlock(&raid_bdev->base_bdev_lock); 811 812 if (rc < 0) { 813 return rc; 814 } 815 816 return domains_count; 817 } 818 819 /* g_raid_bdev_fn_table is the function table for raid bdev */ 820 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = { 821 .destruct = raid_bdev_destruct, 822 .submit_request = raid_bdev_submit_request, 823 .io_type_supported = raid_bdev_io_type_supported, 824 .get_io_channel = raid_bdev_get_io_channel, 825 .dump_info_json = raid_bdev_dump_info_json, 826 .write_config_json = raid_bdev_write_config_json, 827 .get_memory_domains = raid_bdev_get_memory_domains, 828 }; 829 830 struct raid_bdev * 831 raid_bdev_find_by_name(const char *name) 832 { 833 struct raid_bdev *raid_bdev; 834 835 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 836 if (strcmp(raid_bdev->bdev.name, name) == 0) { 837 return raid_bdev; 838 } 839 } 840 841 return NULL; 842 } 843 844 static struct { 845 const char *name; 846 enum raid_level value; 847 } g_raid_level_names[] = { 848 { "raid0", RAID0 }, 849 { "0", RAID0 }, 850 { "raid1", RAID1 }, 851 { "1", RAID1 }, 852 { "raid5f", RAID5F }, 853 { "5f", RAID5F }, 854 { "concat", CONCAT }, 855 { } 856 }; 857 858 static struct { 859 const char *name; 860 enum raid_bdev_state value; 861 } g_raid_state_names[] = { 862 { "online", RAID_BDEV_STATE_ONLINE }, 863 { "configuring", RAID_BDEV_STATE_CONFIGURING }, 864 { "offline", RAID_BDEV_STATE_OFFLINE }, 865 { } 866 }; 867 868 /* We have to use the typedef in the function declaration to appease astyle. */ 869 typedef enum raid_level raid_level_t; 870 typedef enum raid_bdev_state raid_bdev_state_t; 871 872 raid_level_t 873 raid_bdev_str_to_level(const char *str) 874 { 875 unsigned int i; 876 877 assert(str != NULL); 878 879 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 880 if (strcasecmp(g_raid_level_names[i].name, str) == 0) { 881 return g_raid_level_names[i].value; 882 } 883 } 884 885 return INVALID_RAID_LEVEL; 886 } 887 888 const char * 889 raid_bdev_level_to_str(enum raid_level level) 890 { 891 unsigned int i; 892 893 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 894 if (g_raid_level_names[i].value == level) { 895 return g_raid_level_names[i].name; 896 } 897 } 898 899 return ""; 900 } 901 902 raid_bdev_state_t 903 raid_bdev_str_to_state(const char *str) 904 { 905 unsigned int i; 906 907 assert(str != NULL); 908 909 for (i = 0; g_raid_state_names[i].name != NULL; i++) { 910 if (strcasecmp(g_raid_state_names[i].name, str) == 0) { 911 return g_raid_state_names[i].value; 912 } 913 } 914 915 return RAID_BDEV_STATE_MAX; 916 } 917 918 const char * 919 raid_bdev_state_to_str(enum raid_bdev_state state) 920 { 921 unsigned int i; 922 923 for (i = 0; g_raid_state_names[i].name != NULL; i++) { 924 if (g_raid_state_names[i].value == state) { 925 return g_raid_state_names[i].name; 926 } 927 } 928 929 assert(false); 930 return ""; 931 } 932 933 /* 934 * brief: 935 * raid_bdev_fini_start is called when bdev layer is starting the 936 * shutdown process 937 * params: 938 * none 939 * returns: 940 * none 941 */ 942 static void 943 raid_bdev_fini_start(void) 944 { 945 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_fini_start\n"); 946 g_shutdown_started = true; 947 } 948 949 /* 950 * brief: 951 * raid_bdev_exit is called on raid bdev module exit time by bdev layer 952 * params: 953 * none 954 * returns: 955 * none 956 */ 957 static void 958 raid_bdev_exit(void) 959 { 960 struct raid_bdev *raid_bdev, *tmp; 961 962 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_exit\n"); 963 964 TAILQ_FOREACH_SAFE(raid_bdev, &g_raid_bdev_list, global_link, tmp) { 965 raid_bdev_cleanup_and_free(raid_bdev); 966 } 967 } 968 969 /* 970 * brief: 971 * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid 972 * module 973 * params: 974 * none 975 * returns: 976 * size of spdk_bdev_io context for raid 977 */ 978 static int 979 raid_bdev_get_ctx_size(void) 980 { 981 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_get_ctx_size\n"); 982 return sizeof(struct raid_bdev_io); 983 } 984 985 static struct spdk_bdev_module g_raid_if = { 986 .name = "raid", 987 .module_init = raid_bdev_init, 988 .fini_start = raid_bdev_fini_start, 989 .module_fini = raid_bdev_exit, 990 .get_ctx_size = raid_bdev_get_ctx_size, 991 .examine_disk = raid_bdev_examine, 992 .async_init = false, 993 .async_fini = false, 994 }; 995 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if) 996 997 /* 998 * brief: 999 * raid_bdev_init is the initialization function for raid bdev module 1000 * params: 1001 * none 1002 * returns: 1003 * 0 - success 1004 * non zero - failure 1005 */ 1006 static int 1007 raid_bdev_init(void) 1008 { 1009 return 0; 1010 } 1011 1012 static int 1013 _raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, 1014 enum raid_level level, bool superblock_enabled, const struct spdk_uuid *uuid, 1015 struct raid_bdev **raid_bdev_out) 1016 { 1017 struct raid_bdev *raid_bdev; 1018 struct spdk_bdev *raid_bdev_gen; 1019 struct raid_bdev_module *module; 1020 struct raid_base_bdev_info *base_info; 1021 uint8_t min_operational; 1022 1023 if (strnlen(name, RAID_BDEV_SB_NAME_SIZE) == RAID_BDEV_SB_NAME_SIZE) { 1024 SPDK_ERRLOG("Raid bdev name '%s' exceeds %d characters\n", name, RAID_BDEV_SB_NAME_SIZE - 1); 1025 return -EINVAL; 1026 } 1027 1028 if (raid_bdev_find_by_name(name) != NULL) { 1029 SPDK_ERRLOG("Duplicate raid bdev name found: %s\n", name); 1030 return -EEXIST; 1031 } 1032 1033 if (level == RAID1) { 1034 if (strip_size != 0) { 1035 SPDK_ERRLOG("Strip size is not supported by raid1\n"); 1036 return -EINVAL; 1037 } 1038 } else if (spdk_u32_is_pow2(strip_size) == false) { 1039 SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size); 1040 return -EINVAL; 1041 } 1042 1043 module = raid_bdev_module_find(level); 1044 if (module == NULL) { 1045 SPDK_ERRLOG("Unsupported raid level '%d'\n", level); 1046 return -EINVAL; 1047 } 1048 1049 assert(module->base_bdevs_min != 0); 1050 if (num_base_bdevs < module->base_bdevs_min) { 1051 SPDK_ERRLOG("At least %u base devices required for %s\n", 1052 module->base_bdevs_min, 1053 raid_bdev_level_to_str(level)); 1054 return -EINVAL; 1055 } 1056 1057 switch (module->base_bdevs_constraint.type) { 1058 case CONSTRAINT_MAX_BASE_BDEVS_REMOVED: 1059 min_operational = num_base_bdevs - module->base_bdevs_constraint.value; 1060 break; 1061 case CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL: 1062 min_operational = module->base_bdevs_constraint.value; 1063 break; 1064 case CONSTRAINT_UNSET: 1065 if (module->base_bdevs_constraint.value != 0) { 1066 SPDK_ERRLOG("Unexpected constraint value '%u' provided for raid bdev '%s'.\n", 1067 (uint8_t)module->base_bdevs_constraint.value, name); 1068 return -EINVAL; 1069 } 1070 min_operational = num_base_bdevs; 1071 break; 1072 default: 1073 SPDK_ERRLOG("Unrecognised constraint type '%u' in module for raid level '%s'.\n", 1074 (uint8_t)module->base_bdevs_constraint.type, 1075 raid_bdev_level_to_str(module->level)); 1076 return -EINVAL; 1077 }; 1078 1079 if (min_operational == 0 || min_operational > num_base_bdevs) { 1080 SPDK_ERRLOG("Wrong constraint value for raid level '%s'.\n", 1081 raid_bdev_level_to_str(module->level)); 1082 return -EINVAL; 1083 } 1084 1085 raid_bdev = calloc(1, sizeof(*raid_bdev)); 1086 if (!raid_bdev) { 1087 SPDK_ERRLOG("Unable to allocate memory for raid bdev\n"); 1088 return -ENOMEM; 1089 } 1090 1091 spdk_spin_init(&raid_bdev->base_bdev_lock); 1092 raid_bdev->module = module; 1093 raid_bdev->num_base_bdevs = num_base_bdevs; 1094 raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, 1095 sizeof(struct raid_base_bdev_info)); 1096 if (!raid_bdev->base_bdev_info) { 1097 SPDK_ERRLOG("Unable able to allocate base bdev info\n"); 1098 raid_bdev_free(raid_bdev); 1099 return -ENOMEM; 1100 } 1101 1102 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1103 base_info->raid_bdev = raid_bdev; 1104 } 1105 1106 /* strip_size_kb is from the rpc param. strip_size is in blocks and used 1107 * internally and set later. 1108 */ 1109 raid_bdev->strip_size = 0; 1110 raid_bdev->strip_size_kb = strip_size; 1111 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1112 raid_bdev->level = level; 1113 raid_bdev->min_base_bdevs_operational = min_operational; 1114 1115 if (superblock_enabled) { 1116 raid_bdev->sb = spdk_dma_zmalloc(RAID_BDEV_SB_MAX_LENGTH, 0x1000, NULL); 1117 if (!raid_bdev->sb) { 1118 SPDK_ERRLOG("Failed to allocate raid bdev sb buffer\n"); 1119 raid_bdev_free(raid_bdev); 1120 return -ENOMEM; 1121 } 1122 } 1123 1124 raid_bdev_gen = &raid_bdev->bdev; 1125 1126 raid_bdev_gen->name = strdup(name); 1127 if (!raid_bdev_gen->name) { 1128 SPDK_ERRLOG("Unable to allocate name for raid\n"); 1129 raid_bdev_free(raid_bdev); 1130 return -ENOMEM; 1131 } 1132 1133 raid_bdev_gen->product_name = "Raid Volume"; 1134 raid_bdev_gen->ctxt = raid_bdev; 1135 raid_bdev_gen->fn_table = &g_raid_bdev_fn_table; 1136 raid_bdev_gen->module = &g_raid_if; 1137 raid_bdev_gen->write_cache = 0; 1138 spdk_uuid_copy(&raid_bdev_gen->uuid, uuid); 1139 1140 TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link); 1141 1142 *raid_bdev_out = raid_bdev; 1143 1144 return 0; 1145 } 1146 1147 /* 1148 * brief: 1149 * raid_bdev_create allocates raid bdev based on passed configuration 1150 * params: 1151 * name - name for raid bdev 1152 * strip_size - strip size in KB 1153 * num_base_bdevs - number of base bdevs 1154 * level - raid level 1155 * superblock_enabled - true if raid should have superblock 1156 * uuid - uuid to set for the bdev 1157 * raid_bdev_out - the created raid bdev 1158 * returns: 1159 * 0 - success 1160 * non zero - failure 1161 */ 1162 int 1163 raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, 1164 enum raid_level level, bool superblock_enabled, const struct spdk_uuid *uuid, 1165 struct raid_bdev **raid_bdev_out) 1166 { 1167 struct raid_bdev *raid_bdev; 1168 int rc; 1169 1170 assert(uuid != NULL); 1171 1172 rc = _raid_bdev_create(name, strip_size, num_base_bdevs, level, superblock_enabled, uuid, 1173 &raid_bdev); 1174 if (rc != 0) { 1175 return rc; 1176 } 1177 1178 if (superblock_enabled && spdk_uuid_is_null(uuid)) { 1179 /* we need to have the uuid to store in the superblock before the bdev is registered */ 1180 spdk_uuid_generate(&raid_bdev->bdev.uuid); 1181 } 1182 1183 raid_bdev->num_base_bdevs_operational = num_base_bdevs; 1184 1185 *raid_bdev_out = raid_bdev; 1186 1187 return 0; 1188 } 1189 1190 /* 1191 * brief: 1192 * Check underlying block devices against support for metadata. Do not configure 1193 * md support when parameters from block devices are inconsistent. 1194 * params: 1195 * raid_bdev - pointer to raid bdev 1196 * returns: 1197 * 0 - The raid bdev md parameters were successfully configured. 1198 * non zero - Failed to configure md. 1199 */ 1200 static int 1201 raid_bdev_configure_md(struct raid_bdev *raid_bdev) 1202 { 1203 struct spdk_bdev *base_bdev; 1204 uint8_t i; 1205 1206 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1207 if (raid_bdev->base_bdev_info[i].desc == NULL) { 1208 continue; 1209 } 1210 base_bdev = spdk_bdev_desc_get_bdev(raid_bdev->base_bdev_info[i].desc); 1211 1212 /* Currently, RAID bdevs do not support DIF or DIX, so a RAID bdev cannot 1213 * be created on top of any bdev which supports it */ 1214 if (spdk_bdev_get_dif_type(base_bdev) != SPDK_DIF_DISABLE) { 1215 SPDK_ERRLOG("at least one base bdev has DIF or DIX enabled " 1216 "- unsupported RAID configuration\n"); 1217 return -EPERM; 1218 } 1219 1220 if (i == 0) { 1221 raid_bdev->bdev.md_len = spdk_bdev_get_md_size(base_bdev); 1222 raid_bdev->bdev.md_interleave = spdk_bdev_is_md_interleaved(base_bdev); 1223 continue; 1224 } 1225 1226 if (raid_bdev->bdev.md_len != spdk_bdev_get_md_size(base_bdev) || 1227 raid_bdev->bdev.md_interleave != spdk_bdev_is_md_interleaved(base_bdev)) { 1228 SPDK_ERRLOG("base bdevs are configured with different metadata formats\n"); 1229 return -EPERM; 1230 } 1231 } 1232 1233 return 0; 1234 } 1235 1236 static void 1237 raid_bdev_configure_cont(struct raid_bdev *raid_bdev) 1238 { 1239 struct spdk_bdev *raid_bdev_gen = &raid_bdev->bdev; 1240 int rc; 1241 1242 raid_bdev->state = RAID_BDEV_STATE_ONLINE; 1243 SPDK_DEBUGLOG(bdev_raid, "io device register %p\n", raid_bdev); 1244 SPDK_DEBUGLOG(bdev_raid, "blockcnt %" PRIu64 ", blocklen %u\n", 1245 raid_bdev_gen->blockcnt, raid_bdev_gen->blocklen); 1246 spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb, 1247 sizeof(struct raid_bdev_io_channel), 1248 raid_bdev_gen->name); 1249 rc = spdk_bdev_register(raid_bdev_gen); 1250 if (rc != 0) { 1251 SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n"); 1252 if (raid_bdev->module->stop != NULL) { 1253 raid_bdev->module->stop(raid_bdev); 1254 } 1255 spdk_io_device_unregister(raid_bdev, NULL); 1256 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1257 return; 1258 } 1259 SPDK_DEBUGLOG(bdev_raid, "raid bdev generic %p\n", raid_bdev_gen); 1260 SPDK_DEBUGLOG(bdev_raid, "raid bdev is created with name %s, raid_bdev %p\n", 1261 raid_bdev_gen->name, raid_bdev); 1262 } 1263 1264 static void 1265 raid_bdev_configure_write_sb_cb(int status, struct raid_bdev *raid_bdev, void *ctx) 1266 { 1267 if (status == 0) { 1268 raid_bdev_configure_cont(raid_bdev); 1269 } else { 1270 SPDK_ERRLOG("Failed to write raid bdev '%s' superblock: %s\n", 1271 raid_bdev->bdev.name, spdk_strerror(-status)); 1272 if (raid_bdev->module->stop != NULL) { 1273 raid_bdev->module->stop(raid_bdev); 1274 } 1275 } 1276 } 1277 1278 /* 1279 * brief: 1280 * If raid bdev config is complete, then only register the raid bdev to 1281 * bdev layer and remove this raid bdev from configuring list and 1282 * insert the raid bdev to configured list 1283 * params: 1284 * raid_bdev - pointer to raid bdev 1285 * returns: 1286 * 0 - success 1287 * non zero - failure 1288 */ 1289 static int 1290 raid_bdev_configure(struct raid_bdev *raid_bdev) 1291 { 1292 uint32_t blocklen = 0; 1293 struct raid_base_bdev_info *base_info; 1294 struct spdk_bdev *base_bdev; 1295 int rc = 0; 1296 1297 assert(raid_bdev->state == RAID_BDEV_STATE_CONFIGURING); 1298 assert(raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs_operational); 1299 1300 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1301 if (base_info->desc == NULL) { 1302 continue; 1303 } 1304 base_bdev = spdk_bdev_desc_get_bdev(base_info->desc); 1305 1306 /* Check blocklen for all base bdevs that it should be same */ 1307 if (blocklen == 0) { 1308 blocklen = base_bdev->blocklen; 1309 } else if (blocklen != base_bdev->blocklen) { 1310 /* 1311 * Assumption is that all the base bdevs for any raid bdev should 1312 * have same blocklen 1313 */ 1314 SPDK_ERRLOG("Blocklen of various bdevs not matching\n"); 1315 return -EINVAL; 1316 } 1317 } 1318 assert(blocklen > 0); 1319 1320 /* The strip_size_kb is read in from user in KB. Convert to blocks here for 1321 * internal use. 1322 */ 1323 raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / blocklen; 1324 if (raid_bdev->strip_size == 0 && raid_bdev->level != RAID1) { 1325 SPDK_ERRLOG("Strip size cannot be smaller than the device block size\n"); 1326 return -EINVAL; 1327 } 1328 raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size); 1329 raid_bdev->blocklen_shift = spdk_u32log2(blocklen); 1330 raid_bdev->bdev.blocklen = blocklen; 1331 1332 rc = raid_bdev_configure_md(raid_bdev); 1333 if (rc != 0) { 1334 SPDK_ERRLOG("raid metadata configuration failed\n"); 1335 return rc; 1336 } 1337 1338 rc = raid_bdev->module->start(raid_bdev); 1339 if (rc != 0) { 1340 SPDK_ERRLOG("raid module startup callback failed\n"); 1341 return rc; 1342 } 1343 1344 if (raid_bdev->sb != NULL) { 1345 if (spdk_uuid_is_null(&raid_bdev->sb->uuid)) { 1346 /* NULL UUID is not valid in the sb so it means that we are creating a new 1347 * raid bdev and should initialize the superblock. 1348 */ 1349 raid_bdev_init_superblock(raid_bdev); 1350 } else { 1351 assert(spdk_uuid_compare(&raid_bdev->sb->uuid, &raid_bdev->bdev.uuid) == 0); 1352 if (raid_bdev->sb->block_size != blocklen) { 1353 SPDK_ERRLOG("blocklen does not match value in superblock\n"); 1354 rc = -EINVAL; 1355 } 1356 if (raid_bdev->sb->raid_size != raid_bdev->bdev.blockcnt) { 1357 SPDK_ERRLOG("blockcnt does not match value in superblock\n"); 1358 rc = -EINVAL; 1359 } 1360 if (rc != 0) { 1361 if (raid_bdev->module->stop != NULL) { 1362 raid_bdev->module->stop(raid_bdev); 1363 } 1364 return rc; 1365 } 1366 } 1367 1368 raid_bdev_write_superblock(raid_bdev, raid_bdev_configure_write_sb_cb, NULL); 1369 } else { 1370 raid_bdev_configure_cont(raid_bdev); 1371 } 1372 1373 return 0; 1374 } 1375 1376 /* 1377 * brief: 1378 * If raid bdev is online and registered, change the bdev state to 1379 * configuring and unregister this raid device. Queue this raid device 1380 * in configuring list 1381 * params: 1382 * raid_bdev - pointer to raid bdev 1383 * cb_fn - callback function 1384 * cb_arg - argument to callback function 1385 * returns: 1386 * none 1387 */ 1388 static void 1389 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, 1390 void *cb_arg) 1391 { 1392 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1393 if (cb_fn) { 1394 cb_fn(cb_arg, 0); 1395 } 1396 return; 1397 } 1398 1399 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 1400 assert(raid_bdev->num_base_bdevs_discovered); 1401 SPDK_DEBUGLOG(bdev_raid, "raid bdev state changing from online to offline\n"); 1402 1403 spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg); 1404 } 1405 1406 /* 1407 * brief: 1408 * raid_bdev_find_base_info_by_bdev function finds the base bdev info by bdev. 1409 * params: 1410 * base_bdev - pointer to base bdev 1411 * returns: 1412 * base bdev info if found, otherwise NULL. 1413 */ 1414 static struct raid_base_bdev_info * 1415 raid_bdev_find_base_info_by_bdev(struct spdk_bdev *base_bdev) 1416 { 1417 struct raid_bdev *raid_bdev; 1418 struct raid_base_bdev_info *base_info; 1419 1420 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1421 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1422 if (base_info->desc != NULL && 1423 spdk_bdev_desc_get_bdev(base_info->desc) == base_bdev) { 1424 return base_info; 1425 } 1426 } 1427 } 1428 1429 return NULL; 1430 } 1431 1432 static void 1433 raid_bdev_remove_base_bdev_done(struct raid_base_bdev_info *base_info, int status) 1434 { 1435 assert(base_info->remove_scheduled); 1436 1437 base_info->remove_scheduled = false; 1438 if (base_info->remove_cb != NULL) { 1439 base_info->remove_cb(base_info->remove_cb_ctx, status); 1440 } 1441 } 1442 1443 static void 1444 raid_bdev_remove_base_bdev_write_sb_cb(int status, struct raid_bdev *raid_bdev, void *ctx) 1445 { 1446 struct raid_base_bdev_info *base_info = ctx; 1447 1448 if (status != 0) { 1449 SPDK_ERRLOG("Failed to write raid bdev '%s' superblock: %s\n", 1450 raid_bdev->bdev.name, spdk_strerror(-status)); 1451 } 1452 1453 raid_bdev_remove_base_bdev_done(base_info, status); 1454 } 1455 1456 static void 1457 raid_bdev_remove_base_bdev_on_unquiesced(void *ctx, int status) 1458 { 1459 struct raid_base_bdev_info *base_info = ctx; 1460 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1461 1462 if (status != 0) { 1463 SPDK_ERRLOG("Failed to unquiesce raid bdev %s: %s\n", 1464 raid_bdev->bdev.name, spdk_strerror(-status)); 1465 goto out; 1466 } 1467 1468 spdk_spin_lock(&raid_bdev->base_bdev_lock); 1469 raid_bdev_free_base_bdev_resource(base_info); 1470 spdk_spin_unlock(&raid_bdev->base_bdev_lock); 1471 1472 if (raid_bdev->sb) { 1473 struct raid_bdev_superblock *sb = raid_bdev->sb; 1474 struct raid_bdev_sb_base_bdev *sb_base_bdev = NULL; 1475 uint8_t slot = raid_bdev_base_bdev_slot(base_info); 1476 uint8_t i; 1477 1478 for (i = 0; i < sb->base_bdevs_size; i++) { 1479 sb_base_bdev = &sb->base_bdevs[i]; 1480 1481 if (sb_base_bdev->state == RAID_SB_BASE_BDEV_CONFIGURED && 1482 sb_base_bdev->slot == slot) { 1483 break; 1484 } 1485 } 1486 1487 assert(i < sb->base_bdevs_size); 1488 1489 /* TODO: distinguish between failure and intentional removal */ 1490 sb_base_bdev->state = RAID_SB_BASE_BDEV_FAILED; 1491 1492 raid_bdev_write_superblock(raid_bdev, raid_bdev_remove_base_bdev_write_sb_cb, base_info); 1493 return; 1494 } 1495 out: 1496 raid_bdev_remove_base_bdev_done(base_info, status); 1497 } 1498 1499 static void 1500 raid_bdev_channel_remove_base_bdev(struct spdk_io_channel_iter *i) 1501 { 1502 struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i); 1503 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 1504 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch); 1505 uint8_t idx = raid_bdev_base_bdev_slot(base_info); 1506 1507 SPDK_DEBUGLOG(bdev_raid, "slot: %u raid_ch: %p\n", idx, raid_ch); 1508 1509 if (raid_ch->base_channel[idx] != NULL) { 1510 spdk_put_io_channel(raid_ch->base_channel[idx]); 1511 raid_ch->base_channel[idx] = NULL; 1512 } 1513 1514 spdk_for_each_channel_continue(i, 0); 1515 } 1516 1517 static void 1518 raid_bdev_channels_remove_base_bdev_done(struct spdk_io_channel_iter *i, int status) 1519 { 1520 struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i); 1521 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1522 1523 spdk_bdev_unquiesce(&raid_bdev->bdev, &g_raid_if, raid_bdev_remove_base_bdev_on_unquiesced, 1524 base_info); 1525 } 1526 1527 static void 1528 raid_bdev_remove_base_bdev_on_quiesced(void *ctx, int status) 1529 { 1530 struct raid_base_bdev_info *base_info = ctx; 1531 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1532 1533 if (status != 0) { 1534 SPDK_ERRLOG("Failed to quiesce raid bdev %s: %s\n", 1535 raid_bdev->bdev.name, spdk_strerror(-status)); 1536 raid_bdev_remove_base_bdev_done(base_info, status); 1537 return; 1538 } 1539 1540 spdk_for_each_channel(raid_bdev, raid_bdev_channel_remove_base_bdev, base_info, 1541 raid_bdev_channels_remove_base_bdev_done); 1542 } 1543 1544 static int 1545 _raid_bdev_remove_base_bdev(struct raid_base_bdev_info *base_info, 1546 raid_bdev_remove_base_bdev_cb cb_fn, void *cb_ctx) 1547 { 1548 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1549 1550 SPDK_DEBUGLOG(bdev_raid, "%s\n", base_info->name); 1551 1552 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1553 1554 if (base_info->remove_scheduled) { 1555 return 0; 1556 } 1557 1558 assert(base_info->desc); 1559 base_info->remove_scheduled = true; 1560 base_info->remove_cb = cb_fn; 1561 base_info->remove_cb_ctx = cb_ctx; 1562 1563 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1564 /* 1565 * As raid bdev is not registered yet or already unregistered, 1566 * so cleanup should be done here itself. 1567 * 1568 * Removing a base bdev at this stage does not change the number of operational 1569 * base bdevs, only the number of discovered base bdevs. 1570 */ 1571 raid_bdev_free_base_bdev_resource(base_info); 1572 if (raid_bdev->num_base_bdevs_discovered == 0) { 1573 /* There is no base bdev for this raid, so free the raid device. */ 1574 raid_bdev_cleanup_and_free(raid_bdev); 1575 } 1576 } else if (raid_bdev->num_base_bdevs_operational-- == raid_bdev->min_base_bdevs_operational) { 1577 /* 1578 * After this base bdev is removed there will not be enough base bdevs 1579 * to keep the raid bdev operational. 1580 */ 1581 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_ctx); 1582 } else { 1583 int ret; 1584 1585 ret = spdk_bdev_quiesce(&raid_bdev->bdev, &g_raid_if, 1586 raid_bdev_remove_base_bdev_on_quiesced, base_info); 1587 if (ret != 0) { 1588 base_info->remove_scheduled = false; 1589 } 1590 } 1591 1592 return 0; 1593 } 1594 1595 /* 1596 * brief: 1597 * raid_bdev_remove_base_bdev function is called by below layers when base_bdev 1598 * is removed. This function checks if this base bdev is part of any raid bdev 1599 * or not. If yes, it takes necessary action on that particular raid bdev. 1600 * params: 1601 * base_bdev - pointer to base bdev which got removed 1602 * cb_fn - callback function 1603 * cb_arg - argument to callback function 1604 * returns: 1605 * 0 - success 1606 * non zero - failure 1607 */ 1608 int 1609 raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev, raid_bdev_remove_base_bdev_cb cb_fn, 1610 void *cb_ctx) 1611 { 1612 struct raid_base_bdev_info *base_info; 1613 1614 /* Find the raid_bdev which has claimed this base_bdev */ 1615 base_info = raid_bdev_find_base_info_by_bdev(base_bdev); 1616 if (!base_info) { 1617 SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); 1618 return -ENODEV; 1619 } 1620 1621 return _raid_bdev_remove_base_bdev(base_info, cb_fn, cb_ctx); 1622 } 1623 1624 /* 1625 * brief: 1626 * raid_bdev_resize_base_bdev function is called by below layers when base_bdev 1627 * is resized. This function checks if the smallest size of the base_bdevs is changed. 1628 * If yes, call module handler to resize the raid_bdev if implemented. 1629 * params: 1630 * base_bdev - pointer to base bdev which got resized. 1631 * returns: 1632 * none 1633 */ 1634 static void 1635 raid_bdev_resize_base_bdev(struct spdk_bdev *base_bdev) 1636 { 1637 struct raid_bdev *raid_bdev; 1638 struct raid_base_bdev_info *base_info; 1639 1640 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_resize_base_bdev\n"); 1641 1642 base_info = raid_bdev_find_base_info_by_bdev(base_bdev); 1643 1644 /* Find the raid_bdev which has claimed this base_bdev */ 1645 if (!base_info) { 1646 SPDK_ERRLOG("raid_bdev whose base_bdev '%s' not found\n", base_bdev->name); 1647 return; 1648 } 1649 raid_bdev = base_info->raid_bdev; 1650 1651 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1652 1653 SPDK_NOTICELOG("base_bdev '%s' was resized: old size %" PRIu64 ", new size %" PRIu64 "\n", 1654 base_bdev->name, base_info->blockcnt, base_bdev->blockcnt); 1655 1656 if (raid_bdev->module->resize) { 1657 raid_bdev->module->resize(raid_bdev); 1658 } 1659 } 1660 1661 /* 1662 * brief: 1663 * raid_bdev_event_base_bdev function is called by below layers when base_bdev 1664 * triggers asynchronous event. 1665 * params: 1666 * type - event details. 1667 * bdev - bdev that triggered event. 1668 * event_ctx - context for event. 1669 * returns: 1670 * none 1671 */ 1672 static void 1673 raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 1674 void *event_ctx) 1675 { 1676 int rc; 1677 1678 switch (type) { 1679 case SPDK_BDEV_EVENT_REMOVE: 1680 rc = raid_bdev_remove_base_bdev(bdev, NULL, NULL); 1681 if (rc != 0) { 1682 SPDK_ERRLOG("Failed to remove base bdev %s: %s\n", 1683 spdk_bdev_get_name(bdev), spdk_strerror(-rc)); 1684 } 1685 break; 1686 case SPDK_BDEV_EVENT_RESIZE: 1687 raid_bdev_resize_base_bdev(bdev); 1688 break; 1689 default: 1690 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 1691 break; 1692 } 1693 } 1694 1695 /* 1696 * brief: 1697 * Deletes the specified raid bdev 1698 * params: 1699 * raid_bdev - pointer to raid bdev 1700 * cb_fn - callback function 1701 * cb_arg - argument to callback function 1702 */ 1703 void 1704 raid_bdev_delete(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, void *cb_arg) 1705 { 1706 struct raid_base_bdev_info *base_info; 1707 1708 SPDK_DEBUGLOG(bdev_raid, "delete raid bdev: %s\n", raid_bdev->bdev.name); 1709 1710 if (raid_bdev->destroy_started) { 1711 SPDK_DEBUGLOG(bdev_raid, "destroying raid bdev %s is already started\n", 1712 raid_bdev->bdev.name); 1713 if (cb_fn) { 1714 cb_fn(cb_arg, -EALREADY); 1715 } 1716 return; 1717 } 1718 1719 raid_bdev->destroy_started = true; 1720 1721 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1722 base_info->remove_scheduled = true; 1723 1724 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1725 /* 1726 * As raid bdev is not registered yet or already unregistered, 1727 * so cleanup should be done here itself. 1728 */ 1729 raid_bdev_free_base_bdev_resource(base_info); 1730 } 1731 } 1732 1733 if (raid_bdev->num_base_bdevs_discovered == 0) { 1734 /* There is no base bdev for this raid, so free the raid device. */ 1735 raid_bdev_cleanup_and_free(raid_bdev); 1736 if (cb_fn) { 1737 cb_fn(cb_arg, 0); 1738 } 1739 } else { 1740 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg); 1741 } 1742 } 1743 1744 static void 1745 raid_bdev_configure_base_bdev_cont(struct raid_base_bdev_info *base_info) 1746 { 1747 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1748 int rc; 1749 1750 base_info->is_configured = true; 1751 1752 raid_bdev->num_base_bdevs_discovered++; 1753 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1754 assert(raid_bdev->num_base_bdevs_operational <= raid_bdev->num_base_bdevs); 1755 assert(raid_bdev->num_base_bdevs_operational >= raid_bdev->min_base_bdevs_operational); 1756 1757 /* 1758 * Configure the raid bdev when the number of discovered base bdevs reaches the number 1759 * of base bdevs we know to be operational members of the array. Usually this is equal 1760 * to the total number of base bdevs (num_base_bdevs) but can be less - when the array is 1761 * degraded. 1762 */ 1763 if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs_operational) { 1764 rc = raid_bdev_configure(raid_bdev); 1765 if (rc != 0) { 1766 SPDK_ERRLOG("Failed to configure raid bdev: %s\n", spdk_strerror(-rc)); 1767 } 1768 } 1769 } 1770 1771 static void 1772 raid_bdev_configure_base_bdev_check_sb_cb(const struct raid_bdev_superblock *sb, int status, 1773 void *ctx) 1774 { 1775 struct raid_base_bdev_info *base_info = ctx; 1776 1777 switch (status) { 1778 case 0: 1779 /* valid superblock found */ 1780 SPDK_ERRLOG("Existing raid superblock found on bdev %s\n", base_info->name); 1781 raid_bdev_free_base_bdev_resource(base_info); 1782 break; 1783 case -EINVAL: 1784 /* no valid superblock */ 1785 raid_bdev_configure_base_bdev_cont(base_info); 1786 break; 1787 default: 1788 SPDK_ERRLOG("Failed to examine bdev %s: %s\n", 1789 base_info->name, spdk_strerror(-status)); 1790 break; 1791 } 1792 } 1793 1794 static int 1795 raid_bdev_configure_base_bdev(struct raid_base_bdev_info *base_info, bool existing) 1796 { 1797 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1798 struct spdk_bdev_desc *desc; 1799 struct spdk_bdev *bdev; 1800 const struct spdk_uuid *bdev_uuid; 1801 int rc; 1802 1803 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1804 assert(base_info->desc == NULL); 1805 1806 /* 1807 * Base bdev can be added by name or uuid. Here we assure both properties are set and valid 1808 * before claiming the bdev. 1809 */ 1810 1811 if (!spdk_uuid_is_null(&base_info->uuid)) { 1812 char uuid_str[SPDK_UUID_STRING_LEN]; 1813 const char *bdev_name; 1814 1815 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &base_info->uuid); 1816 1817 /* UUID of a bdev is registered as its alias */ 1818 bdev = spdk_bdev_get_by_name(uuid_str); 1819 if (bdev == NULL) { 1820 return -ENODEV; 1821 } 1822 1823 bdev_name = spdk_bdev_get_name(bdev); 1824 1825 if (base_info->name == NULL) { 1826 assert(existing == true); 1827 base_info->name = strdup(bdev_name); 1828 if (base_info->name == NULL) { 1829 return -ENOMEM; 1830 } 1831 } else if (strcmp(base_info->name, bdev_name) != 0) { 1832 SPDK_ERRLOG("Name mismatch for base bdev '%s' - expected '%s'\n", 1833 bdev_name, base_info->name); 1834 return -EINVAL; 1835 } 1836 } 1837 1838 assert(base_info->name != NULL); 1839 1840 rc = spdk_bdev_open_ext(base_info->name, true, raid_bdev_event_base_bdev, NULL, &desc); 1841 if (rc != 0) { 1842 if (rc != -ENODEV) { 1843 SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", base_info->name); 1844 } 1845 return rc; 1846 } 1847 1848 bdev = spdk_bdev_desc_get_bdev(desc); 1849 bdev_uuid = spdk_bdev_get_uuid(bdev); 1850 1851 if (spdk_uuid_is_null(&base_info->uuid)) { 1852 spdk_uuid_copy(&base_info->uuid, bdev_uuid); 1853 } else if (spdk_uuid_compare(&base_info->uuid, bdev_uuid) != 0) { 1854 SPDK_ERRLOG("UUID mismatch for base bdev '%s'\n", base_info->name); 1855 spdk_bdev_close(desc); 1856 return -EINVAL; 1857 } 1858 1859 rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if); 1860 if (rc != 0) { 1861 SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); 1862 spdk_bdev_close(desc); 1863 return rc; 1864 } 1865 1866 SPDK_DEBUGLOG(bdev_raid, "bdev %s is claimed\n", bdev->name); 1867 1868 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 1869 1870 base_info->app_thread_ch = spdk_bdev_get_io_channel(desc); 1871 if (base_info->app_thread_ch == NULL) { 1872 SPDK_ERRLOG("Failed to get io channel\n"); 1873 spdk_bdev_module_release_bdev(bdev); 1874 spdk_bdev_close(desc); 1875 return -ENOMEM; 1876 } 1877 1878 base_info->desc = desc; 1879 base_info->blockcnt = bdev->blockcnt; 1880 1881 if (raid_bdev->sb != NULL) { 1882 uint64_t data_offset; 1883 1884 if (base_info->data_offset == 0) { 1885 assert((RAID_BDEV_MIN_DATA_OFFSET_SIZE % bdev->blocklen) == 0); 1886 data_offset = RAID_BDEV_MIN_DATA_OFFSET_SIZE / bdev->blocklen; 1887 } else { 1888 data_offset = base_info->data_offset; 1889 } 1890 1891 if (bdev->optimal_io_boundary != 0) { 1892 data_offset = spdk_divide_round_up(data_offset, 1893 bdev->optimal_io_boundary) * bdev->optimal_io_boundary; 1894 if (base_info->data_offset != 0 && base_info->data_offset != data_offset) { 1895 SPDK_WARNLOG("Data offset %lu on bdev '%s' is different than optimal value %lu\n", 1896 base_info->data_offset, base_info->name, data_offset); 1897 data_offset = base_info->data_offset; 1898 } 1899 } 1900 1901 base_info->data_offset = data_offset; 1902 } 1903 1904 if (base_info->data_offset >= bdev->blockcnt) { 1905 SPDK_ERRLOG("Data offset %lu exceeds base bdev capacity %lu on bdev '%s'\n", 1906 base_info->data_offset, bdev->blockcnt, base_info->name); 1907 rc = -EINVAL; 1908 goto out; 1909 } 1910 1911 if (base_info->data_size == 0) { 1912 base_info->data_size = bdev->blockcnt - base_info->data_offset; 1913 } else if (base_info->data_offset + base_info->data_size > bdev->blockcnt) { 1914 SPDK_ERRLOG("Data offset and size exceeds base bdev capacity %lu on bdev '%s'\n", 1915 bdev->blockcnt, base_info->name); 1916 rc = -EINVAL; 1917 goto out; 1918 } 1919 1920 if (existing) { 1921 raid_bdev_configure_base_bdev_cont(base_info); 1922 } else { 1923 /* check for existing superblock when using a new bdev */ 1924 rc = raid_bdev_load_base_bdev_superblock(desc, base_info->app_thread_ch, 1925 raid_bdev_configure_base_bdev_check_sb_cb, base_info); 1926 if (rc) { 1927 SPDK_ERRLOG("Failed to read bdev %s superblock: %s\n", 1928 bdev->name, spdk_strerror(-rc)); 1929 } 1930 } 1931 out: 1932 if (rc != 0) { 1933 raid_bdev_free_base_bdev_resource(base_info); 1934 } 1935 return rc; 1936 } 1937 1938 /* 1939 * brief: 1940 * raid_bdev_add_base_device function is the actual function which either adds 1941 * the nvme base device to existing raid bdev or create a new raid bdev. It also claims 1942 * the base device and keep the open descriptor. 1943 * params: 1944 * raid_bdev - pointer to raid bdev 1945 * name - name of the base bdev 1946 * slot - position to add base bdev 1947 * returns: 1948 * 0 - success 1949 * non zero - failure 1950 */ 1951 int 1952 raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name, uint8_t slot) 1953 { 1954 struct raid_base_bdev_info *base_info; 1955 int rc; 1956 1957 if (slot >= raid_bdev->num_base_bdevs) { 1958 return -EINVAL; 1959 } 1960 1961 base_info = &raid_bdev->base_bdev_info[slot]; 1962 1963 if (base_info->name != NULL) { 1964 SPDK_ERRLOG("Slot %u on raid bdev '%s' already assigned to bdev '%s'\n", 1965 slot, raid_bdev->bdev.name, base_info->name); 1966 return -EBUSY; 1967 } 1968 1969 if (!spdk_uuid_is_null(&base_info->uuid)) { 1970 char uuid_str[SPDK_UUID_STRING_LEN]; 1971 1972 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &base_info->uuid); 1973 SPDK_ERRLOG("Slot %u on raid bdev '%s' already assigned to bdev with uuid %s\n", 1974 slot, raid_bdev->bdev.name, uuid_str); 1975 return -EBUSY; 1976 } 1977 1978 base_info->name = strdup(name); 1979 if (base_info->name == NULL) { 1980 return -ENOMEM; 1981 } 1982 1983 rc = raid_bdev_configure_base_bdev(base_info, false); 1984 if (rc != 0) { 1985 if (rc != -ENODEV) { 1986 SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", name); 1987 } 1988 return rc; 1989 } 1990 1991 return 0; 1992 } 1993 1994 static int 1995 raid_bdev_create_from_sb(const struct raid_bdev_superblock *sb, struct raid_bdev **raid_bdev_out) 1996 { 1997 struct raid_bdev *raid_bdev; 1998 uint8_t i; 1999 int rc; 2000 2001 rc = _raid_bdev_create(sb->name, (sb->strip_size * sb->block_size) / 1024, sb->num_base_bdevs, 2002 sb->level, true, &sb->uuid, &raid_bdev); 2003 if (rc != 0) { 2004 return rc; 2005 } 2006 2007 assert(sb->length <= RAID_BDEV_SB_MAX_LENGTH); 2008 memcpy(raid_bdev->sb, sb, sb->length); 2009 2010 for (i = 0; i < sb->base_bdevs_size; i++) { 2011 const struct raid_bdev_sb_base_bdev *sb_base_bdev = &sb->base_bdevs[i]; 2012 struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[sb_base_bdev->slot]; 2013 2014 if (sb_base_bdev->state == RAID_SB_BASE_BDEV_CONFIGURED) { 2015 spdk_uuid_copy(&base_info->uuid, &sb_base_bdev->uuid); 2016 raid_bdev->num_base_bdevs_operational++; 2017 } 2018 2019 base_info->data_offset = sb_base_bdev->data_offset; 2020 base_info->data_size = sb_base_bdev->data_size; 2021 } 2022 2023 *raid_bdev_out = raid_bdev; 2024 return 0; 2025 } 2026 2027 static void 2028 raid_bdev_examine_no_sb(struct spdk_bdev *bdev) 2029 { 2030 struct raid_bdev *raid_bdev; 2031 struct raid_base_bdev_info *base_info; 2032 2033 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 2034 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 2035 if (base_info->desc == NULL && base_info->name != NULL && 2036 strcmp(bdev->name, base_info->name) == 0) { 2037 raid_bdev_configure_base_bdev(base_info, true); 2038 break; 2039 } 2040 } 2041 } 2042 } 2043 2044 static void 2045 raid_bdev_examine_sb(const struct raid_bdev_superblock *sb, struct spdk_bdev *bdev) 2046 { 2047 const struct raid_bdev_sb_base_bdev *sb_base_bdev; 2048 struct raid_bdev *raid_bdev; 2049 struct raid_base_bdev_info *iter, *base_info; 2050 uint8_t i; 2051 int rc; 2052 2053 if (sb->block_size != bdev->blocklen) { 2054 SPDK_WARNLOG("Bdev %s block size (%u) does not match the value in superblock (%u)\n", 2055 bdev->name, sb->block_size, bdev->blocklen); 2056 return; 2057 } 2058 2059 if (spdk_uuid_is_null(&sb->uuid)) { 2060 SPDK_WARNLOG("NULL raid bdev UUID in superblock on bdev %s\n", bdev->name); 2061 return; 2062 } 2063 2064 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 2065 if (spdk_uuid_compare(&raid_bdev->bdev.uuid, &sb->uuid) == 0) { 2066 break; 2067 } 2068 } 2069 2070 if (raid_bdev) { 2071 if (sb->seq_number > raid_bdev->sb->seq_number) { 2072 SPDK_DEBUGLOG(bdev_raid, 2073 "raid superblock seq_number on bdev %s (%lu) greater than existing raid bdev %s (%lu)\n", 2074 bdev->name, sb->seq_number, raid_bdev->bdev.name, raid_bdev->sb->seq_number); 2075 2076 if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) { 2077 SPDK_WARNLOG("Newer version of raid bdev %s superblock found on bdev %s but raid bdev is not in configuring state.\n", 2078 raid_bdev->bdev.name, bdev->name); 2079 return; 2080 } 2081 2082 /* remove and then recreate the raid bdev using the newer superblock */ 2083 raid_bdev_delete(raid_bdev, NULL, NULL); 2084 raid_bdev = NULL; 2085 } else if (sb->seq_number < raid_bdev->sb->seq_number) { 2086 SPDK_DEBUGLOG(bdev_raid, 2087 "raid superblock seq_number on bdev %s (%lu) smaller than existing raid bdev %s (%lu)\n", 2088 bdev->name, sb->seq_number, raid_bdev->bdev.name, raid_bdev->sb->seq_number); 2089 /* use the current raid bdev superblock */ 2090 sb = raid_bdev->sb; 2091 } 2092 } 2093 2094 for (i = 0; i < sb->base_bdevs_size; i++) { 2095 sb_base_bdev = &sb->base_bdevs[i]; 2096 2097 assert(spdk_uuid_is_null(&sb_base_bdev->uuid) == false); 2098 2099 if (spdk_uuid_compare(&sb_base_bdev->uuid, spdk_bdev_get_uuid(bdev)) == 0) { 2100 break; 2101 } 2102 } 2103 2104 if (i == sb->base_bdevs_size) { 2105 SPDK_DEBUGLOG(bdev_raid, "raid superblock does not contain this bdev's uuid\n"); 2106 return; 2107 } 2108 2109 if (!raid_bdev) { 2110 rc = raid_bdev_create_from_sb(sb, &raid_bdev); 2111 if (rc != 0) { 2112 SPDK_ERRLOG("Failed to create raid bdev %s: %s\n", 2113 sb->name, spdk_strerror(-rc)); 2114 } 2115 } 2116 2117 if (sb_base_bdev->state != RAID_SB_BASE_BDEV_CONFIGURED) { 2118 SPDK_NOTICELOG("Bdev %s is not an active member of raid bdev %s. Ignoring.\n", 2119 bdev->name, raid_bdev->bdev.name); 2120 return; 2121 } 2122 2123 base_info = NULL; 2124 RAID_FOR_EACH_BASE_BDEV(raid_bdev, iter) { 2125 if (spdk_uuid_compare(&iter->uuid, spdk_bdev_get_uuid(bdev)) == 0) { 2126 base_info = iter; 2127 break; 2128 } 2129 } 2130 2131 if (base_info == NULL) { 2132 SPDK_ERRLOG("Bdev %s is not a member of raid bdev %s\n", 2133 bdev->name, raid_bdev->bdev.name); 2134 return; 2135 } 2136 2137 rc = raid_bdev_configure_base_bdev(base_info, true); 2138 if (rc != 0) { 2139 SPDK_ERRLOG("Failed to configure bdev %s as base bdev of raid %s: %s\n", 2140 bdev->name, raid_bdev->bdev.name, spdk_strerror(-rc)); 2141 } 2142 } 2143 2144 struct raid_bdev_examine_ctx { 2145 struct spdk_bdev_desc *desc; 2146 struct spdk_io_channel *ch; 2147 }; 2148 2149 static void 2150 raid_bdev_examine_ctx_free(struct raid_bdev_examine_ctx *ctx) 2151 { 2152 if (!ctx) { 2153 return; 2154 } 2155 2156 if (ctx->ch) { 2157 spdk_put_io_channel(ctx->ch); 2158 } 2159 2160 if (ctx->desc) { 2161 spdk_bdev_close(ctx->desc); 2162 } 2163 2164 free(ctx); 2165 } 2166 2167 static void 2168 raid_bdev_examine_load_sb_cb(const struct raid_bdev_superblock *sb, int status, void *_ctx) 2169 { 2170 struct raid_bdev_examine_ctx *ctx = _ctx; 2171 struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(ctx->desc); 2172 2173 switch (status) { 2174 case 0: 2175 /* valid superblock found */ 2176 SPDK_DEBUGLOG(bdev_raid, "raid superblock found on bdev %s\n", bdev->name); 2177 raid_bdev_examine_sb(sb, bdev); 2178 break; 2179 case -EINVAL: 2180 /* no valid superblock, check if it can be claimed anyway */ 2181 raid_bdev_examine_no_sb(bdev); 2182 break; 2183 default: 2184 SPDK_ERRLOG("Failed to examine bdev %s: %s\n", 2185 bdev->name, spdk_strerror(-status)); 2186 break; 2187 } 2188 2189 raid_bdev_examine_ctx_free(ctx); 2190 spdk_bdev_module_examine_done(&g_raid_if); 2191 } 2192 2193 static void 2194 raid_bdev_examine_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) 2195 { 2196 } 2197 2198 /* 2199 * brief: 2200 * raid_bdev_examine function is the examine function call by the below layers 2201 * like bdev_nvme layer. This function will check if this base bdev can be 2202 * claimed by this raid bdev or not. 2203 * params: 2204 * bdev - pointer to base bdev 2205 * returns: 2206 * none 2207 */ 2208 static void 2209 raid_bdev_examine(struct spdk_bdev *bdev) 2210 { 2211 struct raid_bdev_examine_ctx *ctx; 2212 int rc; 2213 2214 ctx = calloc(1, sizeof(*ctx)); 2215 if (!ctx) { 2216 SPDK_ERRLOG("Failed to examine bdev %s: %s\n", 2217 bdev->name, spdk_strerror(ENOMEM)); 2218 goto err; 2219 } 2220 2221 rc = spdk_bdev_open_ext(spdk_bdev_get_name(bdev), false, raid_bdev_examine_event_cb, NULL, 2222 &ctx->desc); 2223 if (rc) { 2224 SPDK_ERRLOG("Failed to open bdev %s: %s\n", 2225 bdev->name, spdk_strerror(-rc)); 2226 goto err; 2227 } 2228 2229 ctx->ch = spdk_bdev_get_io_channel(ctx->desc); 2230 if (!ctx->ch) { 2231 SPDK_ERRLOG("Failed to get io channel for bdev %s\n", bdev->name); 2232 goto err; 2233 } 2234 2235 rc = raid_bdev_load_base_bdev_superblock(ctx->desc, ctx->ch, raid_bdev_examine_load_sb_cb, ctx); 2236 if (rc) { 2237 SPDK_ERRLOG("Failed to read bdev %s superblock: %s\n", 2238 bdev->name, spdk_strerror(-rc)); 2239 goto err; 2240 } 2241 2242 return; 2243 err: 2244 raid_bdev_examine_ctx_free(ctx); 2245 spdk_bdev_module_examine_done(&g_raid_if); 2246 } 2247 2248 /* Log component for bdev raid bdev module */ 2249 SPDK_LOG_REGISTER_COMPONENT(bdev_raid) 2250