1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. 3 * All rights reserved. 4 * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "bdev_raid.h" 8 #include "spdk/env.h" 9 #include "spdk/thread.h" 10 #include "spdk/log.h" 11 #include "spdk/string.h" 12 #include "spdk/util.h" 13 #include "spdk/json.h" 14 #include "spdk/likely.h" 15 16 static bool g_shutdown_started = false; 17 18 /* List of all raid bdevs */ 19 struct raid_all_tailq g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list); 20 21 static TAILQ_HEAD(, raid_bdev_module) g_raid_modules = TAILQ_HEAD_INITIALIZER(g_raid_modules); 22 23 /* 24 * raid_bdev_io_channel is the context of spdk_io_channel for raid bdev device. It 25 * contains the relationship of raid bdev io channel with base bdev io channels. 26 */ 27 struct raid_bdev_io_channel { 28 /* Array of IO channels of base bdevs */ 29 struct spdk_io_channel **base_channel; 30 31 /* Private raid module IO channel */ 32 struct spdk_io_channel *module_channel; 33 }; 34 35 static struct raid_bdev_module * 36 raid_bdev_module_find(enum raid_level level) 37 { 38 struct raid_bdev_module *raid_module; 39 40 TAILQ_FOREACH(raid_module, &g_raid_modules, link) { 41 if (raid_module->level == level) { 42 return raid_module; 43 } 44 } 45 46 return NULL; 47 } 48 49 void 50 raid_bdev_module_list_add(struct raid_bdev_module *raid_module) 51 { 52 if (raid_bdev_module_find(raid_module->level) != NULL) { 53 SPDK_ERRLOG("module for raid level '%s' already registered.\n", 54 raid_bdev_level_to_str(raid_module->level)); 55 assert(false); 56 } else { 57 TAILQ_INSERT_TAIL(&g_raid_modules, raid_module, link); 58 } 59 } 60 61 struct spdk_io_channel * 62 raid_bdev_channel_get_base_channel(struct raid_bdev_io_channel *raid_ch, uint8_t idx) 63 { 64 return raid_ch->base_channel[idx]; 65 } 66 67 void * 68 raid_bdev_channel_get_module_ctx(struct raid_bdev_io_channel *raid_ch) 69 { 70 assert(raid_ch->module_channel != NULL); 71 72 return spdk_io_channel_get_ctx(raid_ch->module_channel); 73 } 74 75 /* Function declarations */ 76 static void raid_bdev_examine(struct spdk_bdev *bdev); 77 static int raid_bdev_init(void); 78 static void raid_bdev_deconfigure(struct raid_bdev *raid_bdev, 79 raid_bdev_destruct_cb cb_fn, void *cb_arg); 80 81 /* 82 * brief: 83 * raid_bdev_create_cb function is a cb function for raid bdev which creates the 84 * hierarchy from raid bdev to base bdev io channels. It will be called per core 85 * params: 86 * io_device - pointer to raid bdev io device represented by raid_bdev 87 * ctx_buf - pointer to context buffer for raid bdev io channel 88 * returns: 89 * 0 - success 90 * non zero - failure 91 */ 92 static int 93 raid_bdev_create_cb(void *io_device, void *ctx_buf) 94 { 95 struct raid_bdev *raid_bdev = io_device; 96 struct raid_bdev_io_channel *raid_ch = ctx_buf; 97 uint8_t i; 98 int ret = 0; 99 100 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_create_cb, %p\n", raid_ch); 101 102 assert(raid_bdev != NULL); 103 assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); 104 105 106 raid_ch->base_channel = calloc(raid_bdev->num_base_bdevs, sizeof(struct spdk_io_channel *)); 107 if (!raid_ch->base_channel) { 108 SPDK_ERRLOG("Unable to allocate base bdevs io channel\n"); 109 return -ENOMEM; 110 } 111 112 spdk_spin_lock(&raid_bdev->base_bdev_lock); 113 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 114 /* 115 * Get the spdk_io_channel for all the base bdevs. This is used during 116 * split logic to send the respective child bdev ios to respective base 117 * bdev io channel. 118 */ 119 if (raid_bdev->base_bdev_info[i].desc == NULL) { 120 continue; 121 } 122 raid_ch->base_channel[i] = spdk_bdev_get_io_channel( 123 raid_bdev->base_bdev_info[i].desc); 124 if (!raid_ch->base_channel[i]) { 125 SPDK_ERRLOG("Unable to create io channel for base bdev\n"); 126 ret = -ENOMEM; 127 break; 128 } 129 } 130 spdk_spin_unlock(&raid_bdev->base_bdev_lock); 131 132 if (!ret && raid_bdev->module->get_io_channel) { 133 raid_ch->module_channel = raid_bdev->module->get_io_channel(raid_bdev); 134 if (!raid_ch->module_channel) { 135 SPDK_ERRLOG("Unable to create io channel for raid module\n"); 136 ret = -ENOMEM; 137 } 138 } 139 140 if (ret) { 141 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 142 if (raid_ch->base_channel[i] != NULL) { 143 spdk_put_io_channel(raid_ch->base_channel[i]); 144 } 145 } 146 free(raid_ch->base_channel); 147 raid_ch->base_channel = NULL; 148 } 149 return ret; 150 } 151 152 /* 153 * brief: 154 * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the 155 * hierarchy from raid bdev to base bdev io channels. It will be called per core 156 * params: 157 * io_device - pointer to raid bdev io device represented by raid_bdev 158 * ctx_buf - pointer to context buffer for raid bdev io channel 159 * returns: 160 * none 161 */ 162 static void 163 raid_bdev_destroy_cb(void *io_device, void *ctx_buf) 164 { 165 struct raid_bdev *raid_bdev = io_device; 166 struct raid_bdev_io_channel *raid_ch = ctx_buf; 167 uint8_t i; 168 169 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destroy_cb\n"); 170 171 assert(raid_ch != NULL); 172 assert(raid_ch->base_channel); 173 174 if (raid_ch->module_channel) { 175 spdk_put_io_channel(raid_ch->module_channel); 176 } 177 178 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 179 /* Free base bdev channels */ 180 if (raid_ch->base_channel[i] != NULL) { 181 spdk_put_io_channel(raid_ch->base_channel[i]); 182 } 183 } 184 free(raid_ch->base_channel); 185 raid_ch->base_channel = NULL; 186 } 187 188 /* 189 * brief: 190 * raid_bdev_cleanup is used to cleanup raid_bdev related data 191 * structures. 192 * params: 193 * raid_bdev - pointer to raid_bdev 194 * returns: 195 * none 196 */ 197 static void 198 raid_bdev_cleanup(struct raid_bdev *raid_bdev) 199 { 200 struct raid_base_bdev_info *base_info; 201 202 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_cleanup, %p name %s, state %s\n", 203 raid_bdev, raid_bdev->bdev.name, raid_bdev_state_to_str(raid_bdev->state)); 204 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 205 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 206 207 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 208 assert(base_info->desc == NULL); 209 free(base_info->name); 210 } 211 212 TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link); 213 } 214 215 static void 216 raid_bdev_free(struct raid_bdev *raid_bdev) 217 { 218 spdk_dma_free(raid_bdev->sb); 219 spdk_spin_destroy(&raid_bdev->base_bdev_lock); 220 free(raid_bdev->base_bdev_info); 221 free(raid_bdev->bdev.name); 222 free(raid_bdev); 223 } 224 225 static void 226 raid_bdev_cleanup_and_free(struct raid_bdev *raid_bdev) 227 { 228 raid_bdev_cleanup(raid_bdev); 229 raid_bdev_free(raid_bdev); 230 } 231 232 /* 233 * brief: 234 * free resource of base bdev for raid bdev 235 * params: 236 * base_info - raid base bdev info 237 * returns: 238 * none 239 */ 240 static void 241 raid_bdev_free_base_bdev_resource(struct raid_base_bdev_info *base_info) 242 { 243 struct raid_bdev *raid_bdev = base_info->raid_bdev; 244 245 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 246 247 free(base_info->name); 248 base_info->name = NULL; 249 if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) { 250 spdk_uuid_set_null(&base_info->uuid); 251 } 252 253 if (base_info->desc == NULL) { 254 return; 255 } 256 257 spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(base_info->desc)); 258 spdk_bdev_close(base_info->desc); 259 base_info->desc = NULL; 260 spdk_put_io_channel(base_info->app_thread_ch); 261 base_info->app_thread_ch = NULL; 262 263 if (base_info->is_configured) { 264 assert(raid_bdev->num_base_bdevs_discovered); 265 raid_bdev->num_base_bdevs_discovered--; 266 base_info->is_configured = false; 267 } 268 } 269 270 static void 271 raid_bdev_io_device_unregister_cb(void *io_device) 272 { 273 struct raid_bdev *raid_bdev = io_device; 274 275 if (raid_bdev->num_base_bdevs_discovered == 0) { 276 /* Free raid_bdev when there are no base bdevs left */ 277 SPDK_DEBUGLOG(bdev_raid, "raid bdev base bdevs is 0, going to free all in destruct\n"); 278 raid_bdev_cleanup(raid_bdev); 279 spdk_bdev_destruct_done(&raid_bdev->bdev, 0); 280 raid_bdev_free(raid_bdev); 281 } else { 282 spdk_bdev_destruct_done(&raid_bdev->bdev, 0); 283 } 284 } 285 286 void 287 raid_bdev_module_stop_done(struct raid_bdev *raid_bdev) 288 { 289 if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) { 290 spdk_io_device_unregister(raid_bdev, raid_bdev_io_device_unregister_cb); 291 } 292 } 293 294 static void 295 _raid_bdev_destruct(void *ctxt) 296 { 297 struct raid_bdev *raid_bdev = ctxt; 298 struct raid_base_bdev_info *base_info; 299 300 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destruct\n"); 301 302 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 303 /* 304 * Close all base bdev descriptors for which call has come from below 305 * layers. Also close the descriptors if we have started shutdown. 306 */ 307 if (g_shutdown_started || base_info->remove_scheduled == true) { 308 raid_bdev_free_base_bdev_resource(base_info); 309 } 310 } 311 312 if (g_shutdown_started) { 313 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 314 } 315 316 if (raid_bdev->module->stop != NULL) { 317 if (raid_bdev->module->stop(raid_bdev) == false) { 318 return; 319 } 320 } 321 322 raid_bdev_module_stop_done(raid_bdev); 323 } 324 325 static int 326 raid_bdev_destruct(void *ctx) 327 { 328 spdk_thread_exec_msg(spdk_thread_get_app_thread(), _raid_bdev_destruct, ctx); 329 330 return 1; 331 } 332 333 void 334 raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status) 335 { 336 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 337 338 if (spdk_unlikely(raid_io->completion_cb != NULL)) { 339 raid_io->completion_cb(raid_io, status); 340 } else { 341 spdk_bdev_io_complete(bdev_io, status); 342 } 343 } 344 345 /* 346 * brief: 347 * raid_bdev_io_complete_part - signal the completion of a part of the expected 348 * base bdev IOs and complete the raid_io if this is the final expected IO. 349 * The caller should first set raid_io->base_bdev_io_remaining. This function 350 * will decrement this counter by the value of the 'completed' parameter and 351 * complete the raid_io if the counter reaches 0. The caller is free to 352 * interpret the 'base_bdev_io_remaining' and 'completed' values as needed, 353 * it can represent e.g. blocks or IOs. 354 * params: 355 * raid_io - pointer to raid_bdev_io 356 * completed - the part of the raid_io that has been completed 357 * status - status of the base IO 358 * returns: 359 * true - if the raid_io is completed 360 * false - otherwise 361 */ 362 bool 363 raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed, 364 enum spdk_bdev_io_status status) 365 { 366 assert(raid_io->base_bdev_io_remaining >= completed); 367 raid_io->base_bdev_io_remaining -= completed; 368 369 if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { 370 raid_io->base_bdev_io_status = status; 371 } 372 373 if (raid_io->base_bdev_io_remaining == 0) { 374 raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status); 375 return true; 376 } else { 377 return false; 378 } 379 } 380 381 /* 382 * brief: 383 * raid_bdev_queue_io_wait function processes the IO which failed to submit. 384 * It will try to queue the IOs after storing the context to bdev wait queue logic. 385 * params: 386 * raid_io - pointer to raid_bdev_io 387 * bdev - the block device that the IO is submitted to 388 * ch - io channel 389 * cb_fn - callback when the spdk_bdev_io for bdev becomes available 390 * returns: 391 * none 392 */ 393 void 394 raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, 395 struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn) 396 { 397 raid_io->waitq_entry.bdev = bdev; 398 raid_io->waitq_entry.cb_fn = cb_fn; 399 raid_io->waitq_entry.cb_arg = raid_io; 400 spdk_bdev_queue_io_wait(bdev, ch, &raid_io->waitq_entry); 401 } 402 403 static void 404 raid_base_bdev_reset_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 405 { 406 struct raid_bdev_io *raid_io = cb_arg; 407 408 spdk_bdev_free_io(bdev_io); 409 410 raid_bdev_io_complete_part(raid_io, 1, success ? 411 SPDK_BDEV_IO_STATUS_SUCCESS : 412 SPDK_BDEV_IO_STATUS_FAILED); 413 } 414 415 static void raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io); 416 417 static void 418 _raid_bdev_submit_reset_request(void *_raid_io) 419 { 420 struct raid_bdev_io *raid_io = _raid_io; 421 422 raid_bdev_submit_reset_request(raid_io); 423 } 424 425 /* 426 * brief: 427 * raid_bdev_submit_reset_request function submits reset requests 428 * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in 429 * which case it will queue it for later submission 430 * params: 431 * raid_io 432 * returns: 433 * none 434 */ 435 static void 436 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io) 437 { 438 struct raid_bdev *raid_bdev; 439 int ret; 440 uint8_t i; 441 struct raid_base_bdev_info *base_info; 442 struct spdk_io_channel *base_ch; 443 444 raid_bdev = raid_io->raid_bdev; 445 446 if (raid_io->base_bdev_io_remaining == 0) { 447 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; 448 } 449 450 for (i = raid_io->base_bdev_io_submitted; i < raid_bdev->num_base_bdevs; i++) { 451 base_info = &raid_bdev->base_bdev_info[i]; 452 base_ch = raid_io->raid_ch->base_channel[i]; 453 if (base_ch == NULL) { 454 raid_io->base_bdev_io_submitted++; 455 raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS); 456 continue; 457 } 458 ret = spdk_bdev_reset(base_info->desc, base_ch, 459 raid_base_bdev_reset_complete, raid_io); 460 if (ret == 0) { 461 raid_io->base_bdev_io_submitted++; 462 } else if (ret == -ENOMEM) { 463 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), 464 base_ch, _raid_bdev_submit_reset_request); 465 return; 466 } else { 467 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 468 assert(false); 469 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 470 return; 471 } 472 } 473 } 474 475 /* 476 * brief: 477 * Callback function to spdk_bdev_io_get_buf. 478 * params: 479 * ch - pointer to raid bdev io channel 480 * bdev_io - pointer to parent bdev_io on raid bdev device 481 * success - True if buffer is allocated or false otherwise. 482 * returns: 483 * none 484 */ 485 static void 486 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 487 bool success) 488 { 489 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 490 491 if (!success) { 492 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 493 return; 494 } 495 496 raid_io->raid_bdev->module->submit_rw_request(raid_io); 497 } 498 499 void 500 raid_bdev_io_init(struct raid_bdev_io *raid_io, struct raid_bdev_io_channel *raid_ch, 501 enum spdk_bdev_io_type type, uint64_t offset_blocks, 502 uint64_t num_blocks, struct iovec *iovs, int iovcnt, void *md_buf, 503 struct spdk_memory_domain *memory_domain, void *memory_domain_ctx) 504 { 505 struct spdk_io_channel *ch = spdk_io_channel_from_ctx(raid_ch); 506 struct raid_bdev *raid_bdev = spdk_io_channel_get_io_device(ch); 507 508 raid_io->type = type; 509 raid_io->offset_blocks = offset_blocks; 510 raid_io->num_blocks = num_blocks; 511 raid_io->iovs = iovs; 512 raid_io->iovcnt = iovcnt; 513 raid_io->memory_domain = memory_domain; 514 raid_io->memory_domain_ctx = memory_domain_ctx; 515 raid_io->md_buf = md_buf; 516 517 raid_io->raid_bdev = raid_bdev; 518 raid_io->raid_ch = raid_ch; 519 raid_io->base_bdev_io_remaining = 0; 520 raid_io->base_bdev_io_submitted = 0; 521 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 522 raid_io->completion_cb = NULL; 523 } 524 525 /* 526 * brief: 527 * raid_bdev_submit_request function is the submit_request function pointer of 528 * raid bdev function table. This is used to submit the io on raid_bdev to below 529 * layers. 530 * params: 531 * ch - pointer to raid bdev io channel 532 * bdev_io - pointer to parent bdev_io on raid bdev device 533 * returns: 534 * none 535 */ 536 static void 537 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 538 { 539 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 540 541 raid_bdev_io_init(raid_io, spdk_io_channel_get_ctx(ch), bdev_io->type, 542 bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks, 543 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.md_buf, 544 bdev_io->u.bdev.memory_domain, bdev_io->u.bdev.memory_domain_ctx); 545 546 switch (bdev_io->type) { 547 case SPDK_BDEV_IO_TYPE_READ: 548 spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb, 549 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 550 break; 551 case SPDK_BDEV_IO_TYPE_WRITE: 552 raid_io->raid_bdev->module->submit_rw_request(raid_io); 553 break; 554 555 case SPDK_BDEV_IO_TYPE_RESET: 556 raid_bdev_submit_reset_request(raid_io); 557 break; 558 559 case SPDK_BDEV_IO_TYPE_FLUSH: 560 case SPDK_BDEV_IO_TYPE_UNMAP: 561 raid_io->raid_bdev->module->submit_null_payload_request(raid_io); 562 break; 563 564 default: 565 SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type); 566 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 567 break; 568 } 569 } 570 571 /* 572 * brief: 573 * _raid_bdev_io_type_supported checks whether io_type is supported in 574 * all base bdev modules of raid bdev module. If anyone among the base_bdevs 575 * doesn't support, the raid device doesn't supports. 576 * 577 * params: 578 * raid_bdev - pointer to raid bdev context 579 * io_type - io type 580 * returns: 581 * true - io_type is supported 582 * false - io_type is not supported 583 */ 584 inline static bool 585 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type) 586 { 587 struct raid_base_bdev_info *base_info; 588 589 if (io_type == SPDK_BDEV_IO_TYPE_FLUSH || 590 io_type == SPDK_BDEV_IO_TYPE_UNMAP) { 591 if (raid_bdev->module->submit_null_payload_request == NULL) { 592 return false; 593 } 594 } 595 596 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 597 if (base_info->desc == NULL) { 598 continue; 599 } 600 601 if (spdk_bdev_io_type_supported(spdk_bdev_desc_get_bdev(base_info->desc), io_type) == false) { 602 return false; 603 } 604 } 605 606 return true; 607 } 608 609 /* 610 * brief: 611 * raid_bdev_io_type_supported is the io_supported function for bdev function 612 * table which returns whether the particular io type is supported or not by 613 * raid bdev module 614 * params: 615 * ctx - pointer to raid bdev context 616 * type - io type 617 * returns: 618 * true - io_type is supported 619 * false - io_type is not supported 620 */ 621 static bool 622 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 623 { 624 switch (io_type) { 625 case SPDK_BDEV_IO_TYPE_READ: 626 case SPDK_BDEV_IO_TYPE_WRITE: 627 return true; 628 629 case SPDK_BDEV_IO_TYPE_FLUSH: 630 case SPDK_BDEV_IO_TYPE_RESET: 631 case SPDK_BDEV_IO_TYPE_UNMAP: 632 return _raid_bdev_io_type_supported(ctx, io_type); 633 634 default: 635 return false; 636 } 637 638 return false; 639 } 640 641 /* 642 * brief: 643 * raid_bdev_get_io_channel is the get_io_channel function table pointer for 644 * raid bdev. This is used to return the io channel for this raid bdev 645 * params: 646 * ctxt - pointer to raid_bdev 647 * returns: 648 * pointer to io channel for raid bdev 649 */ 650 static struct spdk_io_channel * 651 raid_bdev_get_io_channel(void *ctxt) 652 { 653 struct raid_bdev *raid_bdev = ctxt; 654 655 return spdk_get_io_channel(raid_bdev); 656 } 657 658 void 659 raid_bdev_write_info_json(struct raid_bdev *raid_bdev, struct spdk_json_write_ctx *w) 660 { 661 struct raid_base_bdev_info *base_info; 662 char uuid_str[SPDK_UUID_STRING_LEN]; 663 664 assert(raid_bdev != NULL); 665 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 666 667 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &raid_bdev->bdev.uuid); 668 spdk_json_write_named_string(w, "uuid", uuid_str); 669 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 670 spdk_json_write_named_string(w, "state", raid_bdev_state_to_str(raid_bdev->state)); 671 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 672 spdk_json_write_named_bool(w, "superblock", raid_bdev->sb != NULL); 673 spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs); 674 spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered); 675 spdk_json_write_named_uint32(w, "num_base_bdevs_operational", 676 raid_bdev->num_base_bdevs_operational); 677 spdk_json_write_name(w, "base_bdevs_list"); 678 spdk_json_write_array_begin(w); 679 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 680 spdk_json_write_object_begin(w); 681 spdk_json_write_name(w, "name"); 682 if (base_info->name) { 683 spdk_json_write_string(w, base_info->name); 684 } else { 685 spdk_json_write_null(w); 686 } 687 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &base_info->uuid); 688 spdk_json_write_named_string(w, "uuid", uuid_str); 689 spdk_json_write_named_bool(w, "is_configured", base_info->is_configured); 690 spdk_json_write_named_uint64(w, "data_offset", base_info->data_offset); 691 spdk_json_write_named_uint64(w, "data_size", base_info->data_size); 692 spdk_json_write_object_end(w); 693 } 694 spdk_json_write_array_end(w); 695 } 696 697 /* 698 * brief: 699 * raid_bdev_dump_info_json is the function table pointer for raid bdev 700 * params: 701 * ctx - pointer to raid_bdev 702 * w - pointer to json context 703 * returns: 704 * 0 - success 705 * non zero - failure 706 */ 707 static int 708 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 709 { 710 struct raid_bdev *raid_bdev = ctx; 711 712 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_dump_config_json\n"); 713 714 /* Dump the raid bdev configuration related information */ 715 spdk_json_write_named_object_begin(w, "raid"); 716 raid_bdev_write_info_json(raid_bdev, w); 717 spdk_json_write_object_end(w); 718 719 return 0; 720 } 721 722 /* 723 * brief: 724 * raid_bdev_write_config_json is the function table pointer for raid bdev 725 * params: 726 * bdev - pointer to spdk_bdev 727 * w - pointer to json context 728 * returns: 729 * none 730 */ 731 static void 732 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 733 { 734 struct raid_bdev *raid_bdev = bdev->ctxt; 735 struct raid_base_bdev_info *base_info; 736 char uuid_str[SPDK_UUID_STRING_LEN]; 737 738 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 739 740 if (raid_bdev->sb != NULL) { 741 /* raid bdev configuration is stored in the superblock */ 742 return; 743 } 744 745 spdk_json_write_object_begin(w); 746 747 spdk_json_write_named_string(w, "method", "bdev_raid_create"); 748 749 spdk_json_write_named_object_begin(w, "params"); 750 spdk_json_write_named_string(w, "name", bdev->name); 751 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &raid_bdev->bdev.uuid); 752 spdk_json_write_named_string(w, "uuid", uuid_str); 753 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 754 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 755 spdk_json_write_named_bool(w, "superblock", raid_bdev->sb != NULL); 756 757 spdk_json_write_named_array_begin(w, "base_bdevs"); 758 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 759 if (base_info->desc) { 760 spdk_json_write_string(w, spdk_bdev_desc_get_bdev(base_info->desc)->name); 761 } 762 } 763 spdk_json_write_array_end(w); 764 spdk_json_write_object_end(w); 765 766 spdk_json_write_object_end(w); 767 } 768 769 static int 770 raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size) 771 { 772 struct raid_bdev *raid_bdev = ctx; 773 struct raid_base_bdev_info *base_info; 774 int domains_count = 0, rc = 0; 775 776 if (raid_bdev->module->memory_domains_supported == false) { 777 return 0; 778 } 779 780 spdk_spin_lock(&raid_bdev->base_bdev_lock); 781 782 /* First loop to get the number of memory domains */ 783 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 784 if (base_info->desc == NULL) { 785 continue; 786 } 787 rc = spdk_bdev_get_memory_domains(spdk_bdev_desc_get_bdev(base_info->desc), NULL, 0); 788 if (rc < 0) { 789 goto out; 790 } 791 domains_count += rc; 792 } 793 794 if (!domains || array_size < domains_count) { 795 goto out; 796 } 797 798 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 799 if (base_info->desc == NULL) { 800 continue; 801 } 802 rc = spdk_bdev_get_memory_domains(spdk_bdev_desc_get_bdev(base_info->desc), domains, array_size); 803 if (rc < 0) { 804 goto out; 805 } 806 domains += rc; 807 array_size -= rc; 808 } 809 out: 810 spdk_spin_unlock(&raid_bdev->base_bdev_lock); 811 812 if (rc < 0) { 813 return rc; 814 } 815 816 return domains_count; 817 } 818 819 /* g_raid_bdev_fn_table is the function table for raid bdev */ 820 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = { 821 .destruct = raid_bdev_destruct, 822 .submit_request = raid_bdev_submit_request, 823 .io_type_supported = raid_bdev_io_type_supported, 824 .get_io_channel = raid_bdev_get_io_channel, 825 .dump_info_json = raid_bdev_dump_info_json, 826 .write_config_json = raid_bdev_write_config_json, 827 .get_memory_domains = raid_bdev_get_memory_domains, 828 }; 829 830 struct raid_bdev * 831 raid_bdev_find_by_name(const char *name) 832 { 833 struct raid_bdev *raid_bdev; 834 835 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 836 if (strcmp(raid_bdev->bdev.name, name) == 0) { 837 return raid_bdev; 838 } 839 } 840 841 return NULL; 842 } 843 844 static struct { 845 const char *name; 846 enum raid_level value; 847 } g_raid_level_names[] = { 848 { "raid0", RAID0 }, 849 { "0", RAID0 }, 850 { "raid1", RAID1 }, 851 { "1", RAID1 }, 852 { "raid5f", RAID5F }, 853 { "5f", RAID5F }, 854 { "concat", CONCAT }, 855 { } 856 }; 857 858 static struct { 859 const char *name; 860 enum raid_bdev_state value; 861 } g_raid_state_names[] = { 862 { "online", RAID_BDEV_STATE_ONLINE }, 863 { "configuring", RAID_BDEV_STATE_CONFIGURING }, 864 { "offline", RAID_BDEV_STATE_OFFLINE }, 865 { } 866 }; 867 868 /* We have to use the typedef in the function declaration to appease astyle. */ 869 typedef enum raid_level raid_level_t; 870 typedef enum raid_bdev_state raid_bdev_state_t; 871 872 raid_level_t 873 raid_bdev_str_to_level(const char *str) 874 { 875 unsigned int i; 876 877 assert(str != NULL); 878 879 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 880 if (strcasecmp(g_raid_level_names[i].name, str) == 0) { 881 return g_raid_level_names[i].value; 882 } 883 } 884 885 return INVALID_RAID_LEVEL; 886 } 887 888 const char * 889 raid_bdev_level_to_str(enum raid_level level) 890 { 891 unsigned int i; 892 893 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 894 if (g_raid_level_names[i].value == level) { 895 return g_raid_level_names[i].name; 896 } 897 } 898 899 return ""; 900 } 901 902 raid_bdev_state_t 903 raid_bdev_str_to_state(const char *str) 904 { 905 unsigned int i; 906 907 assert(str != NULL); 908 909 for (i = 0; g_raid_state_names[i].name != NULL; i++) { 910 if (strcasecmp(g_raid_state_names[i].name, str) == 0) { 911 return g_raid_state_names[i].value; 912 } 913 } 914 915 return RAID_BDEV_STATE_MAX; 916 } 917 918 const char * 919 raid_bdev_state_to_str(enum raid_bdev_state state) 920 { 921 unsigned int i; 922 923 for (i = 0; g_raid_state_names[i].name != NULL; i++) { 924 if (g_raid_state_names[i].value == state) { 925 return g_raid_state_names[i].name; 926 } 927 } 928 929 assert(false); 930 return ""; 931 } 932 933 /* 934 * brief: 935 * raid_bdev_fini_start is called when bdev layer is starting the 936 * shutdown process 937 * params: 938 * none 939 * returns: 940 * none 941 */ 942 static void 943 raid_bdev_fini_start(void) 944 { 945 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_fini_start\n"); 946 g_shutdown_started = true; 947 } 948 949 /* 950 * brief: 951 * raid_bdev_exit is called on raid bdev module exit time by bdev layer 952 * params: 953 * none 954 * returns: 955 * none 956 */ 957 static void 958 raid_bdev_exit(void) 959 { 960 struct raid_bdev *raid_bdev, *tmp; 961 962 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_exit\n"); 963 964 TAILQ_FOREACH_SAFE(raid_bdev, &g_raid_bdev_list, global_link, tmp) { 965 raid_bdev_cleanup_and_free(raid_bdev); 966 } 967 } 968 969 /* 970 * brief: 971 * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid 972 * module 973 * params: 974 * none 975 * returns: 976 * size of spdk_bdev_io context for raid 977 */ 978 static int 979 raid_bdev_get_ctx_size(void) 980 { 981 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_get_ctx_size\n"); 982 return sizeof(struct raid_bdev_io); 983 } 984 985 static struct spdk_bdev_module g_raid_if = { 986 .name = "raid", 987 .module_init = raid_bdev_init, 988 .fini_start = raid_bdev_fini_start, 989 .module_fini = raid_bdev_exit, 990 .get_ctx_size = raid_bdev_get_ctx_size, 991 .examine_disk = raid_bdev_examine, 992 .async_init = false, 993 .async_fini = false, 994 }; 995 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if) 996 997 /* 998 * brief: 999 * raid_bdev_init is the initialization function for raid bdev module 1000 * params: 1001 * none 1002 * returns: 1003 * 0 - success 1004 * non zero - failure 1005 */ 1006 static int 1007 raid_bdev_init(void) 1008 { 1009 return 0; 1010 } 1011 1012 static int 1013 _raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, 1014 enum raid_level level, bool superblock_enabled, const struct spdk_uuid *uuid, 1015 struct raid_bdev **raid_bdev_out) 1016 { 1017 struct raid_bdev *raid_bdev; 1018 struct spdk_bdev *raid_bdev_gen; 1019 struct raid_bdev_module *module; 1020 struct raid_base_bdev_info *base_info; 1021 uint8_t min_operational; 1022 1023 if (strnlen(name, RAID_BDEV_SB_NAME_SIZE) == RAID_BDEV_SB_NAME_SIZE) { 1024 SPDK_ERRLOG("Raid bdev name '%s' exceeds %d characters\n", name, RAID_BDEV_SB_NAME_SIZE - 1); 1025 return -EINVAL; 1026 } 1027 1028 if (raid_bdev_find_by_name(name) != NULL) { 1029 SPDK_ERRLOG("Duplicate raid bdev name found: %s\n", name); 1030 return -EEXIST; 1031 } 1032 1033 if (level == RAID1) { 1034 if (strip_size != 0) { 1035 SPDK_ERRLOG("Strip size is not supported by raid1\n"); 1036 return -EINVAL; 1037 } 1038 } else if (spdk_u32_is_pow2(strip_size) == false) { 1039 SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size); 1040 return -EINVAL; 1041 } 1042 1043 module = raid_bdev_module_find(level); 1044 if (module == NULL) { 1045 SPDK_ERRLOG("Unsupported raid level '%d'\n", level); 1046 return -EINVAL; 1047 } 1048 1049 assert(module->base_bdevs_min != 0); 1050 if (num_base_bdevs < module->base_bdevs_min) { 1051 SPDK_ERRLOG("At least %u base devices required for %s\n", 1052 module->base_bdevs_min, 1053 raid_bdev_level_to_str(level)); 1054 return -EINVAL; 1055 } 1056 1057 switch (module->base_bdevs_constraint.type) { 1058 case CONSTRAINT_MAX_BASE_BDEVS_REMOVED: 1059 min_operational = num_base_bdevs - module->base_bdevs_constraint.value; 1060 break; 1061 case CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL: 1062 min_operational = module->base_bdevs_constraint.value; 1063 break; 1064 case CONSTRAINT_UNSET: 1065 if (module->base_bdevs_constraint.value != 0) { 1066 SPDK_ERRLOG("Unexpected constraint value '%u' provided for raid bdev '%s'.\n", 1067 (uint8_t)module->base_bdevs_constraint.value, name); 1068 return -EINVAL; 1069 } 1070 min_operational = num_base_bdevs; 1071 break; 1072 default: 1073 SPDK_ERRLOG("Unrecognised constraint type '%u' in module for raid level '%s'.\n", 1074 (uint8_t)module->base_bdevs_constraint.type, 1075 raid_bdev_level_to_str(module->level)); 1076 return -EINVAL; 1077 }; 1078 1079 if (min_operational == 0 || min_operational > num_base_bdevs) { 1080 SPDK_ERRLOG("Wrong constraint value for raid level '%s'.\n", 1081 raid_bdev_level_to_str(module->level)); 1082 return -EINVAL; 1083 } 1084 1085 raid_bdev = calloc(1, sizeof(*raid_bdev)); 1086 if (!raid_bdev) { 1087 SPDK_ERRLOG("Unable to allocate memory for raid bdev\n"); 1088 return -ENOMEM; 1089 } 1090 1091 spdk_spin_init(&raid_bdev->base_bdev_lock); 1092 raid_bdev->module = module; 1093 raid_bdev->num_base_bdevs = num_base_bdevs; 1094 raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, 1095 sizeof(struct raid_base_bdev_info)); 1096 if (!raid_bdev->base_bdev_info) { 1097 SPDK_ERRLOG("Unable able to allocate base bdev info\n"); 1098 raid_bdev_free(raid_bdev); 1099 return -ENOMEM; 1100 } 1101 1102 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1103 base_info->raid_bdev = raid_bdev; 1104 } 1105 1106 /* strip_size_kb is from the rpc param. strip_size is in blocks and used 1107 * internally and set later. 1108 */ 1109 raid_bdev->strip_size = 0; 1110 raid_bdev->strip_size_kb = strip_size; 1111 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1112 raid_bdev->level = level; 1113 raid_bdev->min_base_bdevs_operational = min_operational; 1114 1115 if (superblock_enabled) { 1116 raid_bdev->sb = spdk_dma_zmalloc(RAID_BDEV_SB_MAX_LENGTH, 0x1000, NULL); 1117 if (!raid_bdev->sb) { 1118 SPDK_ERRLOG("Failed to allocate raid bdev sb buffer\n"); 1119 raid_bdev_free(raid_bdev); 1120 return -ENOMEM; 1121 } 1122 } 1123 1124 raid_bdev_gen = &raid_bdev->bdev; 1125 1126 raid_bdev_gen->name = strdup(name); 1127 if (!raid_bdev_gen->name) { 1128 SPDK_ERRLOG("Unable to allocate name for raid\n"); 1129 raid_bdev_free(raid_bdev); 1130 return -ENOMEM; 1131 } 1132 1133 raid_bdev_gen->product_name = "Raid Volume"; 1134 raid_bdev_gen->ctxt = raid_bdev; 1135 raid_bdev_gen->fn_table = &g_raid_bdev_fn_table; 1136 raid_bdev_gen->module = &g_raid_if; 1137 raid_bdev_gen->write_cache = 0; 1138 spdk_uuid_copy(&raid_bdev_gen->uuid, uuid); 1139 1140 TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link); 1141 1142 *raid_bdev_out = raid_bdev; 1143 1144 return 0; 1145 } 1146 1147 /* 1148 * brief: 1149 * raid_bdev_create allocates raid bdev based on passed configuration 1150 * params: 1151 * name - name for raid bdev 1152 * strip_size - strip size in KB 1153 * num_base_bdevs - number of base bdevs 1154 * level - raid level 1155 * superblock_enabled - true if raid should have superblock 1156 * uuid - uuid to set for the bdev 1157 * raid_bdev_out - the created raid bdev 1158 * returns: 1159 * 0 - success 1160 * non zero - failure 1161 */ 1162 int 1163 raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, 1164 enum raid_level level, bool superblock_enabled, const struct spdk_uuid *uuid, 1165 struct raid_bdev **raid_bdev_out) 1166 { 1167 struct raid_bdev *raid_bdev; 1168 int rc; 1169 1170 assert(uuid != NULL); 1171 1172 rc = _raid_bdev_create(name, strip_size, num_base_bdevs, level, superblock_enabled, uuid, 1173 &raid_bdev); 1174 if (rc != 0) { 1175 return rc; 1176 } 1177 1178 if (superblock_enabled && spdk_uuid_is_null(uuid)) { 1179 /* we need to have the uuid to store in the superblock before the bdev is registered */ 1180 spdk_uuid_generate(&raid_bdev->bdev.uuid); 1181 } 1182 1183 raid_bdev->num_base_bdevs_operational = num_base_bdevs; 1184 1185 *raid_bdev_out = raid_bdev; 1186 1187 return 0; 1188 } 1189 1190 /* 1191 * brief: 1192 * Check underlying block devices against support for metadata. Do not configure 1193 * md support when parameters from block devices are inconsistent. 1194 * params: 1195 * raid_bdev - pointer to raid bdev 1196 * returns: 1197 * 0 - The raid bdev md parameters were successfully configured. 1198 * non zero - Failed to configure md. 1199 */ 1200 static int 1201 raid_bdev_configure_md(struct raid_bdev *raid_bdev) 1202 { 1203 struct spdk_bdev *base_bdev; 1204 uint8_t i; 1205 1206 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1207 if (raid_bdev->base_bdev_info[i].desc == NULL) { 1208 continue; 1209 } 1210 base_bdev = spdk_bdev_desc_get_bdev(raid_bdev->base_bdev_info[i].desc); 1211 1212 /* Currently, RAID bdevs do not support DIF or DIX, so a RAID bdev cannot 1213 * be created on top of any bdev which supports it */ 1214 if (spdk_bdev_get_dif_type(base_bdev) != SPDK_DIF_DISABLE) { 1215 SPDK_ERRLOG("at least one base bdev has DIF or DIX enabled " 1216 "- unsupported RAID configuration\n"); 1217 return -EPERM; 1218 } 1219 1220 if (i == 0) { 1221 raid_bdev->bdev.md_len = spdk_bdev_get_md_size(base_bdev); 1222 raid_bdev->bdev.md_interleave = spdk_bdev_is_md_interleaved(base_bdev); 1223 continue; 1224 } 1225 1226 if (raid_bdev->bdev.md_len != spdk_bdev_get_md_size(base_bdev) || 1227 raid_bdev->bdev.md_interleave != spdk_bdev_is_md_interleaved(base_bdev)) { 1228 SPDK_ERRLOG("base bdevs are configured with different metadata formats\n"); 1229 return -EPERM; 1230 } 1231 } 1232 1233 return 0; 1234 } 1235 1236 static void 1237 raid_bdev_configure_cont(struct raid_bdev *raid_bdev) 1238 { 1239 struct spdk_bdev *raid_bdev_gen = &raid_bdev->bdev; 1240 int rc; 1241 1242 raid_bdev->state = RAID_BDEV_STATE_ONLINE; 1243 SPDK_DEBUGLOG(bdev_raid, "io device register %p\n", raid_bdev); 1244 SPDK_DEBUGLOG(bdev_raid, "blockcnt %" PRIu64 ", blocklen %u\n", 1245 raid_bdev_gen->blockcnt, raid_bdev_gen->blocklen); 1246 spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb, 1247 sizeof(struct raid_bdev_io_channel), 1248 raid_bdev_gen->name); 1249 rc = spdk_bdev_register(raid_bdev_gen); 1250 if (rc != 0) { 1251 SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n"); 1252 if (raid_bdev->module->stop != NULL) { 1253 raid_bdev->module->stop(raid_bdev); 1254 } 1255 spdk_io_device_unregister(raid_bdev, NULL); 1256 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1257 return; 1258 } 1259 SPDK_DEBUGLOG(bdev_raid, "raid bdev generic %p\n", raid_bdev_gen); 1260 SPDK_DEBUGLOG(bdev_raid, "raid bdev is created with name %s, raid_bdev %p\n", 1261 raid_bdev_gen->name, raid_bdev); 1262 } 1263 1264 static void 1265 raid_bdev_configure_write_sb_cb(int status, struct raid_bdev *raid_bdev, void *ctx) 1266 { 1267 if (status == 0) { 1268 raid_bdev_configure_cont(raid_bdev); 1269 } else { 1270 SPDK_ERRLOG("Failed to write raid bdev '%s' superblock: %s\n", 1271 raid_bdev->bdev.name, spdk_strerror(-status)); 1272 if (raid_bdev->module->stop != NULL) { 1273 raid_bdev->module->stop(raid_bdev); 1274 } 1275 } 1276 } 1277 1278 /* 1279 * brief: 1280 * If raid bdev config is complete, then only register the raid bdev to 1281 * bdev layer and remove this raid bdev from configuring list and 1282 * insert the raid bdev to configured list 1283 * params: 1284 * raid_bdev - pointer to raid bdev 1285 * returns: 1286 * 0 - success 1287 * non zero - failure 1288 */ 1289 static int 1290 raid_bdev_configure(struct raid_bdev *raid_bdev) 1291 { 1292 uint32_t blocklen = 0; 1293 struct raid_base_bdev_info *base_info; 1294 struct spdk_bdev *base_bdev; 1295 int rc = 0; 1296 1297 assert(raid_bdev->state == RAID_BDEV_STATE_CONFIGURING); 1298 assert(raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs_operational); 1299 1300 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1301 if (base_info->desc == NULL) { 1302 continue; 1303 } 1304 base_bdev = spdk_bdev_desc_get_bdev(base_info->desc); 1305 1306 /* Check blocklen for all base bdevs that it should be same */ 1307 if (blocklen == 0) { 1308 blocklen = base_bdev->blocklen; 1309 } else if (blocklen != base_bdev->blocklen) { 1310 /* 1311 * Assumption is that all the base bdevs for any raid bdev should 1312 * have same blocklen 1313 */ 1314 SPDK_ERRLOG("Blocklen of various bdevs not matching\n"); 1315 return -EINVAL; 1316 } 1317 } 1318 assert(blocklen > 0); 1319 1320 /* The strip_size_kb is read in from user in KB. Convert to blocks here for 1321 * internal use. 1322 */ 1323 raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / blocklen; 1324 if (raid_bdev->strip_size == 0 && raid_bdev->level != RAID1) { 1325 SPDK_ERRLOG("Strip size cannot be smaller than the device block size\n"); 1326 return -EINVAL; 1327 } 1328 raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size); 1329 raid_bdev->blocklen_shift = spdk_u32log2(blocklen); 1330 raid_bdev->bdev.blocklen = blocklen; 1331 1332 rc = raid_bdev_configure_md(raid_bdev); 1333 if (rc != 0) { 1334 SPDK_ERRLOG("raid metadata configuration failed\n"); 1335 return rc; 1336 } 1337 1338 rc = raid_bdev->module->start(raid_bdev); 1339 if (rc != 0) { 1340 SPDK_ERRLOG("raid module startup callback failed\n"); 1341 return rc; 1342 } 1343 1344 if (raid_bdev->sb != NULL) { 1345 if (spdk_uuid_is_null(&raid_bdev->sb->uuid)) { 1346 /* NULL UUID is not valid in the sb so it means that we are creating a new 1347 * raid bdev and should initialize the superblock. 1348 */ 1349 raid_bdev_init_superblock(raid_bdev); 1350 } else { 1351 assert(spdk_uuid_compare(&raid_bdev->sb->uuid, &raid_bdev->bdev.uuid) == 0); 1352 if (raid_bdev->sb->block_size != blocklen) { 1353 SPDK_ERRLOG("blocklen does not match value in superblock\n"); 1354 rc = -EINVAL; 1355 } 1356 if (raid_bdev->sb->raid_size != raid_bdev->bdev.blockcnt) { 1357 SPDK_ERRLOG("blockcnt does not match value in superblock\n"); 1358 rc = -EINVAL; 1359 } 1360 if (rc != 0) { 1361 if (raid_bdev->module->stop != NULL) { 1362 raid_bdev->module->stop(raid_bdev); 1363 } 1364 return rc; 1365 } 1366 } 1367 1368 raid_bdev_write_superblock(raid_bdev, raid_bdev_configure_write_sb_cb, NULL); 1369 } else { 1370 raid_bdev_configure_cont(raid_bdev); 1371 } 1372 1373 return 0; 1374 } 1375 1376 /* 1377 * brief: 1378 * If raid bdev is online and registered, change the bdev state to 1379 * configuring and unregister this raid device. Queue this raid device 1380 * in configuring list 1381 * params: 1382 * raid_bdev - pointer to raid bdev 1383 * cb_fn - callback function 1384 * cb_arg - argument to callback function 1385 * returns: 1386 * none 1387 */ 1388 static void 1389 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, 1390 void *cb_arg) 1391 { 1392 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1393 if (cb_fn) { 1394 cb_fn(cb_arg, 0); 1395 } 1396 return; 1397 } 1398 1399 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 1400 assert(raid_bdev->num_base_bdevs_discovered); 1401 SPDK_DEBUGLOG(bdev_raid, "raid bdev state changing from online to offline\n"); 1402 1403 spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg); 1404 } 1405 1406 /* 1407 * brief: 1408 * raid_bdev_find_base_info_by_bdev function finds the base bdev info by bdev. 1409 * params: 1410 * base_bdev - pointer to base bdev 1411 * returns: 1412 * base bdev info if found, otherwise NULL. 1413 */ 1414 static struct raid_base_bdev_info * 1415 raid_bdev_find_base_info_by_bdev(struct spdk_bdev *base_bdev) 1416 { 1417 struct raid_bdev *raid_bdev; 1418 struct raid_base_bdev_info *base_info; 1419 1420 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1421 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1422 if (base_info->desc != NULL && 1423 spdk_bdev_desc_get_bdev(base_info->desc) == base_bdev) { 1424 return base_info; 1425 } 1426 } 1427 } 1428 1429 return NULL; 1430 } 1431 1432 static void 1433 raid_bdev_remove_base_bdev_done(struct raid_base_bdev_info *base_info, int status) 1434 { 1435 assert(base_info->remove_scheduled); 1436 1437 base_info->remove_scheduled = false; 1438 if (base_info->remove_cb != NULL) { 1439 base_info->remove_cb(base_info->remove_cb_ctx, status); 1440 } 1441 } 1442 1443 static void 1444 raid_bdev_remove_base_bdev_write_sb_cb(int status, struct raid_bdev *raid_bdev, void *ctx) 1445 { 1446 struct raid_base_bdev_info *base_info = ctx; 1447 1448 if (status != 0) { 1449 SPDK_ERRLOG("Failed to write raid bdev '%s' superblock: %s\n", 1450 raid_bdev->bdev.name, spdk_strerror(-status)); 1451 } 1452 1453 raid_bdev_remove_base_bdev_done(base_info, status); 1454 } 1455 1456 static void 1457 raid_bdev_remove_base_bdev_on_unquiesced(void *ctx, int status) 1458 { 1459 struct raid_base_bdev_info *base_info = ctx; 1460 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1461 1462 if (status != 0) { 1463 SPDK_ERRLOG("Failed to unquiesce raid bdev %s: %s\n", 1464 raid_bdev->bdev.name, spdk_strerror(-status)); 1465 goto out; 1466 } 1467 1468 spdk_spin_lock(&raid_bdev->base_bdev_lock); 1469 raid_bdev_free_base_bdev_resource(base_info); 1470 spdk_spin_unlock(&raid_bdev->base_bdev_lock); 1471 1472 if (raid_bdev->sb) { 1473 struct raid_bdev_superblock *sb = raid_bdev->sb; 1474 struct raid_bdev_sb_base_bdev *sb_base_bdev = NULL; 1475 uint8_t slot = raid_bdev_base_bdev_slot(base_info); 1476 uint8_t i; 1477 1478 for (i = 0; i < sb->base_bdevs_size; i++) { 1479 sb_base_bdev = &sb->base_bdevs[i]; 1480 1481 if (sb_base_bdev->state == RAID_SB_BASE_BDEV_CONFIGURED && 1482 sb_base_bdev->slot == slot) { 1483 break; 1484 } 1485 } 1486 1487 assert(i < sb->base_bdevs_size); 1488 1489 /* TODO: distinguish between failure and intentional removal */ 1490 sb_base_bdev->state = RAID_SB_BASE_BDEV_FAILED; 1491 1492 raid_bdev_write_superblock(raid_bdev, raid_bdev_remove_base_bdev_write_sb_cb, base_info); 1493 return; 1494 } 1495 out: 1496 raid_bdev_remove_base_bdev_done(base_info, status); 1497 } 1498 1499 static void 1500 raid_bdev_channel_remove_base_bdev(struct spdk_io_channel_iter *i) 1501 { 1502 struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i); 1503 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 1504 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch); 1505 uint8_t idx = raid_bdev_base_bdev_slot(base_info); 1506 1507 SPDK_DEBUGLOG(bdev_raid, "slot: %u raid_ch: %p\n", idx, raid_ch); 1508 1509 if (raid_ch->base_channel[idx] != NULL) { 1510 spdk_put_io_channel(raid_ch->base_channel[idx]); 1511 raid_ch->base_channel[idx] = NULL; 1512 } 1513 1514 spdk_for_each_channel_continue(i, 0); 1515 } 1516 1517 static void 1518 raid_bdev_channels_remove_base_bdev_done(struct spdk_io_channel_iter *i, int status) 1519 { 1520 struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i); 1521 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1522 1523 spdk_bdev_unquiesce(&raid_bdev->bdev, &g_raid_if, raid_bdev_remove_base_bdev_on_unquiesced, 1524 base_info); 1525 } 1526 1527 static void 1528 raid_bdev_remove_base_bdev_on_quiesced(void *ctx, int status) 1529 { 1530 struct raid_base_bdev_info *base_info = ctx; 1531 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1532 1533 if (status != 0) { 1534 SPDK_ERRLOG("Failed to quiesce raid bdev %s: %s\n", 1535 raid_bdev->bdev.name, spdk_strerror(-status)); 1536 raid_bdev_remove_base_bdev_done(base_info, status); 1537 return; 1538 } 1539 1540 spdk_for_each_channel(raid_bdev, raid_bdev_channel_remove_base_bdev, base_info, 1541 raid_bdev_channels_remove_base_bdev_done); 1542 } 1543 1544 /* 1545 * brief: 1546 * raid_bdev_remove_base_bdev function is called by below layers when base_bdev 1547 * is removed. This function checks if this base bdev is part of any raid bdev 1548 * or not. If yes, it takes necessary action on that particular raid bdev. 1549 * params: 1550 * base_bdev - pointer to base bdev which got removed 1551 * cb_fn - callback function 1552 * cb_arg - argument to callback function 1553 * returns: 1554 * 0 - success 1555 * non zero - failure 1556 */ 1557 int 1558 raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev, raid_bdev_remove_base_bdev_cb cb_fn, 1559 void *cb_ctx) 1560 { 1561 struct raid_bdev *raid_bdev; 1562 struct raid_base_bdev_info *base_info; 1563 1564 SPDK_DEBUGLOG(bdev_raid, "%s\n", base_bdev->name); 1565 1566 /* Find the raid_bdev which has claimed this base_bdev */ 1567 base_info = raid_bdev_find_base_info_by_bdev(base_bdev); 1568 if (!base_info) { 1569 SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); 1570 return -ENODEV; 1571 } 1572 raid_bdev = base_info->raid_bdev; 1573 1574 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1575 1576 if (base_info->remove_scheduled) { 1577 return 0; 1578 } 1579 1580 assert(base_info->desc); 1581 base_info->remove_scheduled = true; 1582 base_info->remove_cb = cb_fn; 1583 base_info->remove_cb_ctx = cb_ctx; 1584 1585 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1586 /* 1587 * As raid bdev is not registered yet or already unregistered, 1588 * so cleanup should be done here itself. 1589 * 1590 * Removing a base bdev at this stage does not change the number of operational 1591 * base bdevs, only the number of discovered base bdevs. 1592 */ 1593 raid_bdev_free_base_bdev_resource(base_info); 1594 if (raid_bdev->num_base_bdevs_discovered == 0) { 1595 /* There is no base bdev for this raid, so free the raid device. */ 1596 raid_bdev_cleanup_and_free(raid_bdev); 1597 } 1598 } else if (raid_bdev->num_base_bdevs_operational-- == raid_bdev->min_base_bdevs_operational) { 1599 /* 1600 * After this base bdev is removed there will not be enough base bdevs 1601 * to keep the raid bdev operational. 1602 */ 1603 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_ctx); 1604 } else { 1605 int ret; 1606 1607 ret = spdk_bdev_quiesce(&raid_bdev->bdev, &g_raid_if, 1608 raid_bdev_remove_base_bdev_on_quiesced, base_info); 1609 if (ret != 0) { 1610 base_info->remove_scheduled = false; 1611 } 1612 } 1613 1614 return 0; 1615 } 1616 1617 /* 1618 * brief: 1619 * raid_bdev_resize_base_bdev function is called by below layers when base_bdev 1620 * is resized. This function checks if the smallest size of the base_bdevs is changed. 1621 * If yes, call module handler to resize the raid_bdev if implemented. 1622 * params: 1623 * base_bdev - pointer to base bdev which got resized. 1624 * returns: 1625 * none 1626 */ 1627 static void 1628 raid_bdev_resize_base_bdev(struct spdk_bdev *base_bdev) 1629 { 1630 struct raid_bdev *raid_bdev; 1631 struct raid_base_bdev_info *base_info; 1632 1633 SPDK_DEBUGLOG(bdev_raid, "raid_bdev_resize_base_bdev\n"); 1634 1635 base_info = raid_bdev_find_base_info_by_bdev(base_bdev); 1636 1637 /* Find the raid_bdev which has claimed this base_bdev */ 1638 if (!base_info) { 1639 SPDK_ERRLOG("raid_bdev whose base_bdev '%s' not found\n", base_bdev->name); 1640 return; 1641 } 1642 raid_bdev = base_info->raid_bdev; 1643 1644 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1645 1646 SPDK_NOTICELOG("base_bdev '%s' was resized: old size %" PRIu64 ", new size %" PRIu64 "\n", 1647 base_bdev->name, base_info->blockcnt, base_bdev->blockcnt); 1648 1649 if (raid_bdev->module->resize) { 1650 raid_bdev->module->resize(raid_bdev); 1651 } 1652 } 1653 1654 /* 1655 * brief: 1656 * raid_bdev_event_base_bdev function is called by below layers when base_bdev 1657 * triggers asynchronous event. 1658 * params: 1659 * type - event details. 1660 * bdev - bdev that triggered event. 1661 * event_ctx - context for event. 1662 * returns: 1663 * none 1664 */ 1665 static void 1666 raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 1667 void *event_ctx) 1668 { 1669 int rc; 1670 1671 switch (type) { 1672 case SPDK_BDEV_EVENT_REMOVE: 1673 rc = raid_bdev_remove_base_bdev(bdev, NULL, NULL); 1674 if (rc != 0) { 1675 SPDK_ERRLOG("Failed to remove base bdev %s: %s\n", 1676 spdk_bdev_get_name(bdev), spdk_strerror(-rc)); 1677 } 1678 break; 1679 case SPDK_BDEV_EVENT_RESIZE: 1680 raid_bdev_resize_base_bdev(bdev); 1681 break; 1682 default: 1683 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 1684 break; 1685 } 1686 } 1687 1688 /* 1689 * brief: 1690 * Deletes the specified raid bdev 1691 * params: 1692 * raid_bdev - pointer to raid bdev 1693 * cb_fn - callback function 1694 * cb_arg - argument to callback function 1695 */ 1696 void 1697 raid_bdev_delete(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, void *cb_arg) 1698 { 1699 struct raid_base_bdev_info *base_info; 1700 1701 SPDK_DEBUGLOG(bdev_raid, "delete raid bdev: %s\n", raid_bdev->bdev.name); 1702 1703 if (raid_bdev->destroy_started) { 1704 SPDK_DEBUGLOG(bdev_raid, "destroying raid bdev %s is already started\n", 1705 raid_bdev->bdev.name); 1706 if (cb_fn) { 1707 cb_fn(cb_arg, -EALREADY); 1708 } 1709 return; 1710 } 1711 1712 raid_bdev->destroy_started = true; 1713 1714 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 1715 base_info->remove_scheduled = true; 1716 1717 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1718 /* 1719 * As raid bdev is not registered yet or already unregistered, 1720 * so cleanup should be done here itself. 1721 */ 1722 raid_bdev_free_base_bdev_resource(base_info); 1723 } 1724 } 1725 1726 if (raid_bdev->num_base_bdevs_discovered == 0) { 1727 /* There is no base bdev for this raid, so free the raid device. */ 1728 raid_bdev_cleanup_and_free(raid_bdev); 1729 if (cb_fn) { 1730 cb_fn(cb_arg, 0); 1731 } 1732 } else { 1733 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg); 1734 } 1735 } 1736 1737 static void 1738 raid_bdev_configure_base_bdev_cont(struct raid_base_bdev_info *base_info) 1739 { 1740 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1741 int rc; 1742 1743 base_info->is_configured = true; 1744 1745 raid_bdev->num_base_bdevs_discovered++; 1746 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1747 assert(raid_bdev->num_base_bdevs_operational <= raid_bdev->num_base_bdevs); 1748 assert(raid_bdev->num_base_bdevs_operational >= raid_bdev->min_base_bdevs_operational); 1749 1750 /* 1751 * Configure the raid bdev when the number of discovered base bdevs reaches the number 1752 * of base bdevs we know to be operational members of the array. Usually this is equal 1753 * to the total number of base bdevs (num_base_bdevs) but can be less - when the array is 1754 * degraded. 1755 */ 1756 if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs_operational) { 1757 rc = raid_bdev_configure(raid_bdev); 1758 if (rc != 0) { 1759 SPDK_ERRLOG("Failed to configure raid bdev: %s\n", spdk_strerror(-rc)); 1760 } 1761 } 1762 } 1763 1764 static void 1765 raid_bdev_configure_base_bdev_check_sb_cb(const struct raid_bdev_superblock *sb, int status, 1766 void *ctx) 1767 { 1768 struct raid_base_bdev_info *base_info = ctx; 1769 1770 switch (status) { 1771 case 0: 1772 /* valid superblock found */ 1773 SPDK_ERRLOG("Existing raid superblock found on bdev %s\n", base_info->name); 1774 raid_bdev_free_base_bdev_resource(base_info); 1775 break; 1776 case -EINVAL: 1777 /* no valid superblock */ 1778 raid_bdev_configure_base_bdev_cont(base_info); 1779 break; 1780 default: 1781 SPDK_ERRLOG("Failed to examine bdev %s: %s\n", 1782 base_info->name, spdk_strerror(-status)); 1783 break; 1784 } 1785 } 1786 1787 static int 1788 raid_bdev_configure_base_bdev(struct raid_base_bdev_info *base_info, bool existing) 1789 { 1790 struct raid_bdev *raid_bdev = base_info->raid_bdev; 1791 struct spdk_bdev_desc *desc; 1792 struct spdk_bdev *bdev; 1793 const struct spdk_uuid *bdev_uuid; 1794 int rc; 1795 1796 assert(spdk_get_thread() == spdk_thread_get_app_thread()); 1797 assert(base_info->desc == NULL); 1798 1799 /* 1800 * Base bdev can be added by name or uuid. Here we assure both properties are set and valid 1801 * before claiming the bdev. 1802 */ 1803 1804 if (!spdk_uuid_is_null(&base_info->uuid)) { 1805 char uuid_str[SPDK_UUID_STRING_LEN]; 1806 const char *bdev_name; 1807 1808 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &base_info->uuid); 1809 1810 /* UUID of a bdev is registered as its alias */ 1811 bdev = spdk_bdev_get_by_name(uuid_str); 1812 if (bdev == NULL) { 1813 return -ENODEV; 1814 } 1815 1816 bdev_name = spdk_bdev_get_name(bdev); 1817 1818 if (base_info->name == NULL) { 1819 assert(existing == true); 1820 base_info->name = strdup(bdev_name); 1821 if (base_info->name == NULL) { 1822 return -ENOMEM; 1823 } 1824 } else if (strcmp(base_info->name, bdev_name) != 0) { 1825 SPDK_ERRLOG("Name mismatch for base bdev '%s' - expected '%s'\n", 1826 bdev_name, base_info->name); 1827 return -EINVAL; 1828 } 1829 } 1830 1831 assert(base_info->name != NULL); 1832 1833 rc = spdk_bdev_open_ext(base_info->name, true, raid_bdev_event_base_bdev, NULL, &desc); 1834 if (rc != 0) { 1835 if (rc != -ENODEV) { 1836 SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", base_info->name); 1837 } 1838 return rc; 1839 } 1840 1841 bdev = spdk_bdev_desc_get_bdev(desc); 1842 bdev_uuid = spdk_bdev_get_uuid(bdev); 1843 1844 if (spdk_uuid_is_null(&base_info->uuid)) { 1845 spdk_uuid_copy(&base_info->uuid, bdev_uuid); 1846 } else if (spdk_uuid_compare(&base_info->uuid, bdev_uuid) != 0) { 1847 SPDK_ERRLOG("UUID mismatch for base bdev '%s'\n", base_info->name); 1848 spdk_bdev_close(desc); 1849 return -EINVAL; 1850 } 1851 1852 rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if); 1853 if (rc != 0) { 1854 SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); 1855 spdk_bdev_close(desc); 1856 return rc; 1857 } 1858 1859 SPDK_DEBUGLOG(bdev_raid, "bdev %s is claimed\n", bdev->name); 1860 1861 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 1862 1863 base_info->app_thread_ch = spdk_bdev_get_io_channel(desc); 1864 if (base_info->app_thread_ch == NULL) { 1865 SPDK_ERRLOG("Failed to get io channel\n"); 1866 spdk_bdev_module_release_bdev(bdev); 1867 spdk_bdev_close(desc); 1868 return -ENOMEM; 1869 } 1870 1871 base_info->desc = desc; 1872 base_info->blockcnt = bdev->blockcnt; 1873 1874 if (raid_bdev->sb != NULL) { 1875 uint64_t data_offset; 1876 1877 if (base_info->data_offset == 0) { 1878 assert((RAID_BDEV_MIN_DATA_OFFSET_SIZE % bdev->blocklen) == 0); 1879 data_offset = RAID_BDEV_MIN_DATA_OFFSET_SIZE / bdev->blocklen; 1880 } else { 1881 data_offset = base_info->data_offset; 1882 } 1883 1884 if (bdev->optimal_io_boundary != 0) { 1885 data_offset = spdk_divide_round_up(data_offset, 1886 bdev->optimal_io_boundary) * bdev->optimal_io_boundary; 1887 if (base_info->data_offset != 0 && base_info->data_offset != data_offset) { 1888 SPDK_WARNLOG("Data offset %lu on bdev '%s' is different than optimal value %lu\n", 1889 base_info->data_offset, base_info->name, data_offset); 1890 data_offset = base_info->data_offset; 1891 } 1892 } 1893 1894 base_info->data_offset = data_offset; 1895 } 1896 1897 if (base_info->data_offset >= bdev->blockcnt) { 1898 SPDK_ERRLOG("Data offset %lu exceeds base bdev capacity %lu on bdev '%s'\n", 1899 base_info->data_offset, bdev->blockcnt, base_info->name); 1900 rc = -EINVAL; 1901 goto out; 1902 } 1903 1904 if (base_info->data_size == 0) { 1905 base_info->data_size = bdev->blockcnt - base_info->data_offset; 1906 } else if (base_info->data_offset + base_info->data_size > bdev->blockcnt) { 1907 SPDK_ERRLOG("Data offset and size exceeds base bdev capacity %lu on bdev '%s'\n", 1908 bdev->blockcnt, base_info->name); 1909 rc = -EINVAL; 1910 goto out; 1911 } 1912 1913 if (existing) { 1914 raid_bdev_configure_base_bdev_cont(base_info); 1915 } else { 1916 /* check for existing superblock when using a new bdev */ 1917 rc = raid_bdev_load_base_bdev_superblock(desc, base_info->app_thread_ch, 1918 raid_bdev_configure_base_bdev_check_sb_cb, base_info); 1919 if (rc) { 1920 SPDK_ERRLOG("Failed to read bdev %s superblock: %s\n", 1921 bdev->name, spdk_strerror(-rc)); 1922 } 1923 } 1924 out: 1925 if (rc != 0) { 1926 raid_bdev_free_base_bdev_resource(base_info); 1927 } 1928 return rc; 1929 } 1930 1931 /* 1932 * brief: 1933 * raid_bdev_add_base_device function is the actual function which either adds 1934 * the nvme base device to existing raid bdev or create a new raid bdev. It also claims 1935 * the base device and keep the open descriptor. 1936 * params: 1937 * raid_bdev - pointer to raid bdev 1938 * name - name of the base bdev 1939 * slot - position to add base bdev 1940 * returns: 1941 * 0 - success 1942 * non zero - failure 1943 */ 1944 int 1945 raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name, uint8_t slot) 1946 { 1947 struct raid_base_bdev_info *base_info; 1948 int rc; 1949 1950 if (slot >= raid_bdev->num_base_bdevs) { 1951 return -EINVAL; 1952 } 1953 1954 base_info = &raid_bdev->base_bdev_info[slot]; 1955 1956 if (base_info->name != NULL) { 1957 SPDK_ERRLOG("Slot %u on raid bdev '%s' already assigned to bdev '%s'\n", 1958 slot, raid_bdev->bdev.name, base_info->name); 1959 return -EBUSY; 1960 } 1961 1962 if (!spdk_uuid_is_null(&base_info->uuid)) { 1963 char uuid_str[SPDK_UUID_STRING_LEN]; 1964 1965 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &base_info->uuid); 1966 SPDK_ERRLOG("Slot %u on raid bdev '%s' already assigned to bdev with uuid %s\n", 1967 slot, raid_bdev->bdev.name, uuid_str); 1968 return -EBUSY; 1969 } 1970 1971 base_info->name = strdup(name); 1972 if (base_info->name == NULL) { 1973 return -ENOMEM; 1974 } 1975 1976 rc = raid_bdev_configure_base_bdev(base_info, false); 1977 if (rc != 0) { 1978 if (rc != -ENODEV) { 1979 SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", name); 1980 } 1981 return rc; 1982 } 1983 1984 return 0; 1985 } 1986 1987 static int 1988 raid_bdev_create_from_sb(const struct raid_bdev_superblock *sb, struct raid_bdev **raid_bdev_out) 1989 { 1990 struct raid_bdev *raid_bdev; 1991 uint8_t i; 1992 int rc; 1993 1994 rc = _raid_bdev_create(sb->name, (sb->strip_size * sb->block_size) / 1024, sb->num_base_bdevs, 1995 sb->level, true, &sb->uuid, &raid_bdev); 1996 if (rc != 0) { 1997 return rc; 1998 } 1999 2000 assert(sb->length <= RAID_BDEV_SB_MAX_LENGTH); 2001 memcpy(raid_bdev->sb, sb, sb->length); 2002 2003 for (i = 0; i < sb->base_bdevs_size; i++) { 2004 const struct raid_bdev_sb_base_bdev *sb_base_bdev = &sb->base_bdevs[i]; 2005 struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[sb_base_bdev->slot]; 2006 2007 if (sb_base_bdev->state == RAID_SB_BASE_BDEV_CONFIGURED) { 2008 spdk_uuid_copy(&base_info->uuid, &sb_base_bdev->uuid); 2009 raid_bdev->num_base_bdevs_operational++; 2010 } 2011 2012 base_info->data_offset = sb_base_bdev->data_offset; 2013 base_info->data_size = sb_base_bdev->data_size; 2014 } 2015 2016 *raid_bdev_out = raid_bdev; 2017 return 0; 2018 } 2019 2020 static void 2021 raid_bdev_examine_no_sb(struct spdk_bdev *bdev) 2022 { 2023 struct raid_bdev *raid_bdev; 2024 struct raid_base_bdev_info *base_info; 2025 2026 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 2027 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 2028 if (base_info->desc == NULL && base_info->name != NULL && 2029 strcmp(bdev->name, base_info->name) == 0) { 2030 raid_bdev_configure_base_bdev(base_info, true); 2031 break; 2032 } 2033 } 2034 } 2035 } 2036 2037 static void 2038 raid_bdev_examine_sb(const struct raid_bdev_superblock *sb, struct spdk_bdev *bdev) 2039 { 2040 const struct raid_bdev_sb_base_bdev *sb_base_bdev; 2041 struct raid_bdev *raid_bdev; 2042 struct raid_base_bdev_info *iter, *base_info; 2043 uint8_t i; 2044 int rc; 2045 2046 if (sb->block_size != bdev->blocklen) { 2047 SPDK_WARNLOG("Bdev %s block size (%u) does not match the value in superblock (%u)\n", 2048 bdev->name, sb->block_size, bdev->blocklen); 2049 return; 2050 } 2051 2052 if (spdk_uuid_is_null(&sb->uuid)) { 2053 SPDK_WARNLOG("NULL raid bdev UUID in superblock on bdev %s\n", bdev->name); 2054 return; 2055 } 2056 2057 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 2058 if (spdk_uuid_compare(&raid_bdev->bdev.uuid, &sb->uuid) == 0) { 2059 break; 2060 } 2061 } 2062 2063 if (raid_bdev) { 2064 if (sb->seq_number > raid_bdev->sb->seq_number) { 2065 SPDK_DEBUGLOG(bdev_raid, 2066 "raid superblock seq_number on bdev %s (%lu) greater than existing raid bdev %s (%lu)\n", 2067 bdev->name, sb->seq_number, raid_bdev->bdev.name, raid_bdev->sb->seq_number); 2068 2069 if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) { 2070 SPDK_WARNLOG("Newer version of raid bdev %s superblock found on bdev %s but raid bdev is not in configuring state.\n", 2071 raid_bdev->bdev.name, bdev->name); 2072 return; 2073 } 2074 2075 /* remove and then recreate the raid bdev using the newer superblock */ 2076 raid_bdev_delete(raid_bdev, NULL, NULL); 2077 raid_bdev = NULL; 2078 } else if (sb->seq_number < raid_bdev->sb->seq_number) { 2079 SPDK_DEBUGLOG(bdev_raid, 2080 "raid superblock seq_number on bdev %s (%lu) smaller than existing raid bdev %s (%lu)\n", 2081 bdev->name, sb->seq_number, raid_bdev->bdev.name, raid_bdev->sb->seq_number); 2082 /* use the current raid bdev superblock */ 2083 sb = raid_bdev->sb; 2084 } 2085 } 2086 2087 for (i = 0; i < sb->base_bdevs_size; i++) { 2088 sb_base_bdev = &sb->base_bdevs[i]; 2089 2090 assert(spdk_uuid_is_null(&sb_base_bdev->uuid) == false); 2091 2092 if (spdk_uuid_compare(&sb_base_bdev->uuid, spdk_bdev_get_uuid(bdev)) == 0) { 2093 break; 2094 } 2095 } 2096 2097 if (i == sb->base_bdevs_size) { 2098 SPDK_DEBUGLOG(bdev_raid, "raid superblock does not contain this bdev's uuid\n"); 2099 return; 2100 } 2101 2102 if (!raid_bdev) { 2103 rc = raid_bdev_create_from_sb(sb, &raid_bdev); 2104 if (rc != 0) { 2105 SPDK_ERRLOG("Failed to create raid bdev %s: %s\n", 2106 sb->name, spdk_strerror(-rc)); 2107 } 2108 } 2109 2110 if (sb_base_bdev->state != RAID_SB_BASE_BDEV_CONFIGURED) { 2111 SPDK_NOTICELOG("Bdev %s is not an active member of raid bdev %s. Ignoring.\n", 2112 bdev->name, raid_bdev->bdev.name); 2113 return; 2114 } 2115 2116 base_info = NULL; 2117 RAID_FOR_EACH_BASE_BDEV(raid_bdev, iter) { 2118 if (spdk_uuid_compare(&iter->uuid, spdk_bdev_get_uuid(bdev)) == 0) { 2119 base_info = iter; 2120 break; 2121 } 2122 } 2123 2124 if (base_info == NULL) { 2125 SPDK_ERRLOG("Bdev %s is not a member of raid bdev %s\n", 2126 bdev->name, raid_bdev->bdev.name); 2127 return; 2128 } 2129 2130 rc = raid_bdev_configure_base_bdev(base_info, true); 2131 if (rc != 0) { 2132 SPDK_ERRLOG("Failed to configure bdev %s as base bdev of raid %s: %s\n", 2133 bdev->name, raid_bdev->bdev.name, spdk_strerror(-rc)); 2134 } 2135 } 2136 2137 struct raid_bdev_examine_ctx { 2138 struct spdk_bdev_desc *desc; 2139 struct spdk_io_channel *ch; 2140 }; 2141 2142 static void 2143 raid_bdev_examine_ctx_free(struct raid_bdev_examine_ctx *ctx) 2144 { 2145 if (!ctx) { 2146 return; 2147 } 2148 2149 if (ctx->ch) { 2150 spdk_put_io_channel(ctx->ch); 2151 } 2152 2153 if (ctx->desc) { 2154 spdk_bdev_close(ctx->desc); 2155 } 2156 2157 free(ctx); 2158 } 2159 2160 static void 2161 raid_bdev_examine_load_sb_cb(const struct raid_bdev_superblock *sb, int status, void *_ctx) 2162 { 2163 struct raid_bdev_examine_ctx *ctx = _ctx; 2164 struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(ctx->desc); 2165 2166 switch (status) { 2167 case 0: 2168 /* valid superblock found */ 2169 SPDK_DEBUGLOG(bdev_raid, "raid superblock found on bdev %s\n", bdev->name); 2170 raid_bdev_examine_sb(sb, bdev); 2171 break; 2172 case -EINVAL: 2173 /* no valid superblock, check if it can be claimed anyway */ 2174 raid_bdev_examine_no_sb(bdev); 2175 break; 2176 default: 2177 SPDK_ERRLOG("Failed to examine bdev %s: %s\n", 2178 bdev->name, spdk_strerror(-status)); 2179 break; 2180 } 2181 2182 raid_bdev_examine_ctx_free(ctx); 2183 spdk_bdev_module_examine_done(&g_raid_if); 2184 } 2185 2186 static void 2187 raid_bdev_examine_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) 2188 { 2189 } 2190 2191 /* 2192 * brief: 2193 * raid_bdev_examine function is the examine function call by the below layers 2194 * like bdev_nvme layer. This function will check if this base bdev can be 2195 * claimed by this raid bdev or not. 2196 * params: 2197 * bdev - pointer to base bdev 2198 * returns: 2199 * none 2200 */ 2201 static void 2202 raid_bdev_examine(struct spdk_bdev *bdev) 2203 { 2204 struct raid_bdev_examine_ctx *ctx; 2205 int rc; 2206 2207 ctx = calloc(1, sizeof(*ctx)); 2208 if (!ctx) { 2209 SPDK_ERRLOG("Failed to examine bdev %s: %s\n", 2210 bdev->name, spdk_strerror(ENOMEM)); 2211 goto err; 2212 } 2213 2214 rc = spdk_bdev_open_ext(spdk_bdev_get_name(bdev), false, raid_bdev_examine_event_cb, NULL, 2215 &ctx->desc); 2216 if (rc) { 2217 SPDK_ERRLOG("Failed to open bdev %s: %s\n", 2218 bdev->name, spdk_strerror(-rc)); 2219 goto err; 2220 } 2221 2222 ctx->ch = spdk_bdev_get_io_channel(ctx->desc); 2223 if (!ctx->ch) { 2224 SPDK_ERRLOG("Failed to get io channel for bdev %s\n", bdev->name); 2225 goto err; 2226 } 2227 2228 rc = raid_bdev_load_base_bdev_superblock(ctx->desc, ctx->ch, raid_bdev_examine_load_sb_cb, ctx); 2229 if (rc) { 2230 SPDK_ERRLOG("Failed to read bdev %s superblock: %s\n", 2231 bdev->name, spdk_strerror(-rc)); 2232 goto err; 2233 } 2234 2235 return; 2236 err: 2237 raid_bdev_examine_ctx_free(ctx); 2238 spdk_bdev_module_examine_done(&g_raid_if); 2239 } 2240 2241 /* Log component for bdev raid bdev module */ 2242 SPDK_LOG_REGISTER_COMPONENT(bdev_raid) 2243