1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "bdev_raid.h" 35 #include "spdk/env.h" 36 #include "spdk/io_channel.h" 37 #include "spdk/conf.h" 38 #include "spdk_internal/log.h" 39 #include "spdk/string.h" 40 #include "spdk/util.h" 41 #include "spdk/json.h" 42 #include "spdk/string.h" 43 44 static bool g_shutdown_started = false; 45 46 /* raid bdev config as read from config file */ 47 struct raid_config g_raid_config = { 48 .raid_bdev_config_head = TAILQ_HEAD_INITIALIZER(g_raid_config.raid_bdev_config_head), 49 }; 50 51 /* 52 * List of raid bdev in configured list, these raid bdevs are registered with 53 * bdev layer 54 */ 55 struct raid_configured_tailq g_raid_bdev_configured_list = TAILQ_HEAD_INITIALIZER( 56 g_raid_bdev_configured_list); 57 58 /* List of raid bdev in configuring list */ 59 struct raid_configuring_tailq g_raid_bdev_configuring_list = TAILQ_HEAD_INITIALIZER( 60 g_raid_bdev_configuring_list); 61 62 /* List of all raid bdevs */ 63 struct raid_all_tailq g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list); 64 65 /* List of all raid bdevs that are offline */ 66 struct raid_offline_tailq g_raid_bdev_offline_list = TAILQ_HEAD_INITIALIZER( 67 g_raid_bdev_offline_list); 68 69 /* Function declarations */ 70 static void raid_bdev_examine(struct spdk_bdev *bdev); 71 static int raid_bdev_init(void); 72 static void raid0_waitq_io_process(void *ctx); 73 static void raid_bdev_deconfigure(struct raid_bdev *raid_bdev, 74 raid_bdev_destruct_cb cb_fn, void *cb_arg); 75 static void raid_bdev_remove_base_bdev(void *ctx); 76 77 /* 78 * brief: 79 * raid_bdev_create_cb function is a cb function for raid bdev which creates the 80 * hierarchy from raid bdev to base bdev io channels. It will be called per core 81 * params: 82 * io_device - pointer to raid bdev io device represented by raid_bdev 83 * ctx_buf - pointer to context buffer for raid bdev io channel 84 * returns: 85 * 0 - success 86 * non zero - failure 87 */ 88 static int 89 raid_bdev_create_cb(void *io_device, void *ctx_buf) 90 { 91 struct raid_bdev *raid_bdev = io_device; 92 struct raid_bdev_io_channel *raid_ch = ctx_buf; 93 94 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_create_cb, %p\n", raid_ch); 95 96 assert(raid_bdev != NULL); 97 assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); 98 99 raid_ch->num_channels = raid_bdev->num_base_bdevs; 100 101 raid_ch->base_channel = calloc(raid_ch->num_channels, 102 sizeof(struct spdk_io_channel *)); 103 if (!raid_ch->base_channel) { 104 SPDK_ERRLOG("Unable to allocate base bdevs io channel\n"); 105 return -ENOMEM; 106 } 107 for (uint8_t i = 0; i < raid_ch->num_channels; i++) { 108 /* 109 * Get the spdk_io_channel for all the base bdevs. This is used during 110 * split logic to send the respective child bdev ios to respective base 111 * bdev io channel. 112 */ 113 raid_ch->base_channel[i] = spdk_bdev_get_io_channel( 114 raid_bdev->base_bdev_info[i].desc); 115 if (!raid_ch->base_channel[i]) { 116 for (uint8_t j = 0; j < i; j++) { 117 spdk_put_io_channel(raid_ch->base_channel[j]); 118 } 119 free(raid_ch->base_channel); 120 raid_ch->base_channel = NULL; 121 SPDK_ERRLOG("Unable to create io channel for base bdev\n"); 122 return -ENOMEM; 123 } 124 } 125 126 return 0; 127 } 128 129 /* 130 * brief: 131 * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the 132 * hierarchy from raid bdev to base bdev io channels. It will be called per core 133 * params: 134 * io_device - pointer to raid bdev io device represented by raid_bdev 135 * ctx_buf - pointer to context buffer for raid bdev io channel 136 * returns: 137 * none 138 */ 139 static void 140 raid_bdev_destroy_cb(void *io_device, void *ctx_buf) 141 { 142 struct raid_bdev_io_channel *raid_ch = ctx_buf; 143 144 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_destroy_cb\n"); 145 146 assert(raid_ch != NULL); 147 assert(raid_ch->base_channel); 148 for (uint8_t i = 0; i < raid_ch->num_channels; i++) { 149 /* Free base bdev channels */ 150 assert(raid_ch->base_channel[i] != NULL); 151 spdk_put_io_channel(raid_ch->base_channel[i]); 152 } 153 free(raid_ch->base_channel); 154 raid_ch->base_channel = NULL; 155 } 156 157 /* 158 * brief: 159 * raid_bdev_cleanup is used to cleanup and free raid_bdev related data 160 * structures. 161 * params: 162 * raid_bdev - pointer to raid_bdev 163 * returns: 164 * none 165 */ 166 static void 167 raid_bdev_cleanup(struct raid_bdev *raid_bdev) 168 { 169 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_cleanup, %p name %s, state %u, config %p\n", 170 raid_bdev, 171 raid_bdev->bdev.name, raid_bdev->state, raid_bdev->config); 172 if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 173 TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link); 174 } else if (raid_bdev->state == RAID_BDEV_STATE_OFFLINE) { 175 TAILQ_REMOVE(&g_raid_bdev_offline_list, raid_bdev, state_link); 176 } else { 177 assert(0); 178 } 179 TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link); 180 free(raid_bdev->bdev.name); 181 free(raid_bdev->base_bdev_info); 182 if (raid_bdev->config) { 183 raid_bdev->config->raid_bdev = NULL; 184 } 185 free(raid_bdev); 186 } 187 188 /* 189 * brief: 190 * free resource of base bdev for raid bdev 191 * params: 192 * raid_bdev - pointer to raid bdev 193 * base_bdev_slot - position to base bdev in raid bdev 194 * returns: 195 * 0 - success 196 * non zero - failure 197 */ 198 static void 199 raid_bdev_free_base_bdev_resource(struct raid_bdev *raid_bdev, uint8_t base_bdev_slot) 200 { 201 struct raid_base_bdev_info *info; 202 203 info = &raid_bdev->base_bdev_info[base_bdev_slot]; 204 205 spdk_bdev_module_release_bdev(info->bdev); 206 spdk_bdev_close(info->desc); 207 info->desc = NULL; 208 info->bdev = NULL; 209 210 assert(raid_bdev->num_base_bdevs_discovered); 211 raid_bdev->num_base_bdevs_discovered--; 212 } 213 214 /* 215 * brief: 216 * raid_bdev_destruct is the destruct function table pointer for raid bdev 217 * params: 218 * ctxt - pointer to raid_bdev 219 * returns: 220 * 0 - success 221 * non zero - failure 222 */ 223 static int 224 raid_bdev_destruct(void *ctxt) 225 { 226 struct raid_bdev *raid_bdev = ctxt; 227 228 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_destruct\n"); 229 230 raid_bdev->destruct_called = true; 231 for (uint8_t i = 0; i < raid_bdev->num_base_bdevs; i++) { 232 /* 233 * Close all base bdev descriptors for which call has come from below 234 * layers. Also close the descriptors if we have started shutdown. 235 */ 236 if (g_shutdown_started || 237 ((raid_bdev->base_bdev_info[i].remove_scheduled == true) && 238 (raid_bdev->base_bdev_info[i].bdev != NULL))) { 239 raid_bdev_free_base_bdev_resource(raid_bdev, i); 240 } 241 } 242 243 if (g_shutdown_started) { 244 TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link); 245 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 246 TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link); 247 } 248 249 spdk_io_device_unregister(raid_bdev, NULL); 250 251 if (raid_bdev->num_base_bdevs_discovered == 0) { 252 /* Free raid_bdev when there are no base bdevs left */ 253 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev base bdevs is 0, going to free all in destruct\n"); 254 raid_bdev_cleanup(raid_bdev); 255 } 256 257 return 0; 258 } 259 260 /* 261 * brief: 262 * raid_bdev_io_completion function is called by lower layers to notify raid 263 * module that particular bdev_io is completed. 264 * params: 265 * bdev_io - pointer to bdev io submitted to lower layers, like child io 266 * success - bdev_io status 267 * cb_arg - function callback context, like parent io pointer 268 * returns: 269 * none 270 */ 271 static void 272 raid_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 273 { 274 struct spdk_bdev_io *parent_io = cb_arg; 275 276 spdk_bdev_free_io(bdev_io); 277 278 if (success) { 279 spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_SUCCESS); 280 } else { 281 spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_FAILED); 282 } 283 } 284 285 /* 286 * brief: 287 * raid0_submit_rw_request function is used to submit I/O to the correct 288 * member disk for raid0 bdevs. 289 * params: 290 * bdev_io - parent bdev io 291 * start_strip - start strip number of this io 292 * returns: 293 * 0 - success 294 * non zero - failure 295 */ 296 static int 297 raid0_submit_rw_request(struct spdk_bdev_io *bdev_io, uint64_t start_strip) 298 { 299 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 300 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(raid_io->ch); 301 struct raid_bdev *raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 302 uint64_t pd_strip; 303 uint32_t offset_in_strip; 304 uint64_t pd_lba; 305 uint64_t pd_blocks; 306 uint8_t pd_idx; 307 int ret = 0; 308 309 pd_strip = start_strip / raid_bdev->num_base_bdevs; 310 pd_idx = start_strip % raid_bdev->num_base_bdevs; 311 offset_in_strip = bdev_io->u.bdev.offset_blocks & (raid_bdev->strip_size - 1); 312 pd_lba = (pd_strip << raid_bdev->strip_size_shift) + offset_in_strip; 313 pd_blocks = bdev_io->u.bdev.num_blocks; 314 if (raid_bdev->base_bdev_info[pd_idx].desc == NULL) { 315 SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx); 316 assert(0); 317 } 318 319 /* 320 * Submit child io to bdev layer with using base bdev descriptors, base 321 * bdev lba, base bdev child io length in blocks, buffer, completion 322 * function and function callback context 323 */ 324 assert(raid_ch != NULL); 325 assert(raid_ch->base_channel); 326 if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) { 327 ret = spdk_bdev_readv_blocks(raid_bdev->base_bdev_info[pd_idx].desc, 328 raid_ch->base_channel[pd_idx], 329 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 330 pd_lba, pd_blocks, raid_bdev_io_completion, 331 bdev_io); 332 } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { 333 ret = spdk_bdev_writev_blocks(raid_bdev->base_bdev_info[pd_idx].desc, 334 raid_ch->base_channel[pd_idx], 335 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 336 pd_lba, pd_blocks, raid_bdev_io_completion, 337 bdev_io); 338 } else { 339 SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type); 340 assert(0); 341 } 342 343 return ret; 344 } 345 346 /* 347 * brief: 348 * raid0_get_curr_base_bdev_index function calculates the base bdev index 349 * for raid0 bdevs. 350 * params: 351 * raid_bdev - pointer to raid bdev 352 * raid_io - pointer to parent io context 353 * returns: 354 * base bdev index 355 */ 356 static uint8_t 357 raid0_get_curr_base_bdev_index(struct raid_bdev *raid_bdev, struct raid_bdev_io *raid_io) 358 { 359 struct spdk_bdev_io *bdev_io; 360 uint64_t start_strip; 361 362 bdev_io = SPDK_CONTAINEROF(raid_io, struct spdk_bdev_io, driver_ctx); 363 start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; 364 365 return (start_strip % raid_bdev->num_base_bdevs); 366 } 367 368 /* 369 * brief: 370 * raid_bdev_io_submit_fail_process function processes the IO which failed to submit. 371 * It will try to queue the IOs after storing the context to bdev wait queue logic. 372 * params: 373 * bdev_io - pointer to bdev_io 374 * raid_io - pointer to raid bdev io 375 * ret - return code 376 * returns: 377 * none 378 */ 379 static void 380 raid_bdev_io_submit_fail_process(struct raid_bdev *raid_bdev, struct spdk_bdev_io *bdev_io, 381 struct raid_bdev_io *raid_io, int ret) 382 { 383 struct raid_bdev_io_channel *raid_ch; 384 uint8_t pd_idx; 385 386 if (ret != -ENOMEM) { 387 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 388 } else { 389 /* Queue the IO to bdev layer wait queue */ 390 pd_idx = raid_bdev->fn_table->get_curr_base_index(raid_bdev, raid_io); 391 raid_io->waitq_entry.bdev = raid_bdev->base_bdev_info[pd_idx].bdev; 392 raid_io->waitq_entry.cb_fn = raid_bdev->fn_table->waitq_io_process; 393 raid_io->waitq_entry.cb_arg = raid_io; 394 raid_ch = spdk_io_channel_get_ctx(raid_io->ch); 395 if (spdk_bdev_queue_io_wait(raid_bdev->base_bdev_info[pd_idx].bdev, 396 raid_ch->base_channel[pd_idx], 397 &raid_io->waitq_entry) != 0) { 398 SPDK_ERRLOG("bdev io waitq error, it should not happen\n"); 399 assert(0); 400 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 401 } 402 } 403 } 404 405 /* 406 * brief: 407 * raid0_waitq_io_process function is the callback function 408 * registered by raid bdev module to bdev when bdev_io was unavailable 409 * for raid0 bdevs. 410 * params: 411 * ctx - pointer to raid_bdev_io 412 * returns: 413 * none 414 */ 415 static void 416 raid0_waitq_io_process(void *ctx) 417 { 418 struct raid_bdev_io *raid_io = ctx; 419 struct spdk_bdev_io *bdev_io; 420 struct raid_bdev *raid_bdev; 421 int ret; 422 uint64_t start_strip; 423 424 bdev_io = SPDK_CONTAINEROF(raid_io, struct spdk_bdev_io, driver_ctx); 425 /* 426 * Try to submit childs of parent bdev io. If failed due to resource 427 * crunch then break the loop and don't try to process other queued IOs. 428 */ 429 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 430 start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; 431 ret = raid0_submit_rw_request(bdev_io, start_strip); 432 if (ret != 0) { 433 raid_bdev_io_submit_fail_process(raid_bdev, bdev_io, raid_io, ret); 434 } 435 } 436 437 /* 438 * brief: 439 * raid0_start_rw_request function is the submit_request function for 440 * read/write requests for raid0 bdevs. 441 * params: 442 * ch - pointer to raid bdev io channel 443 * bdev_io - pointer to parent bdev_io on raid bdev device 444 * returns: 445 * none 446 */ 447 static void 448 raid0_start_rw_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 449 { 450 struct raid_bdev_io *raid_io; 451 struct raid_bdev *raid_bdev; 452 uint64_t start_strip = 0; 453 uint64_t end_strip = 0; 454 int ret; 455 456 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 457 raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 458 raid_io->ch = ch; 459 start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; 460 end_strip = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> 461 raid_bdev->strip_size_shift; 462 if (start_strip != end_strip && raid_bdev->num_base_bdevs > 1) { 463 assert(false); 464 SPDK_ERRLOG("I/O spans strip boundary!\n"); 465 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 466 return; 467 } 468 ret = raid0_submit_rw_request(bdev_io, start_strip); 469 if (ret != 0) { 470 raid_bdev_io_submit_fail_process(raid_bdev, bdev_io, raid_io, ret); 471 } 472 } 473 474 /* 475 * brief: 476 * raid_bdev_base_io_completion is the completion callback for member disk requests 477 * params: 478 * bdev_io - pointer to member disk requested bdev_io 479 * success - true if successful, false if unsuccessful 480 * cb_arg - callback argument (parent raid bdev_io) 481 * returns: 482 * none 483 */ 484 static void 485 raid_bdev_base_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 486 { 487 struct spdk_bdev_io *parent_io = cb_arg; 488 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)parent_io->driver_ctx; 489 490 spdk_bdev_free_io(bdev_io); 491 492 if (!success) { 493 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; 494 } 495 496 raid_io->base_bdev_io_completed++; 497 if (raid_io->base_bdev_io_completed == raid_io->base_bdev_io_expected) { 498 spdk_bdev_io_complete(parent_io, raid_io->base_bdev_io_status); 499 } 500 } 501 502 /* 503 * brief: 504 * raid_bdev_base_io_submit_fail_process processes IO requests for member disk 505 * which failed to submit 506 * params: 507 * raid_bdev_io - pointer to raid bdev_io 508 * pd_idx - base_dev index in raid_bdev 509 * cb_fn - callback when the spdk_bdev_io for base_bdev becomes available 510 * ret - return code 511 * returns: 512 * none 513 */ 514 static void 515 raid_bdev_base_io_submit_fail_process(struct spdk_bdev_io *raid_bdev_io, uint8_t pd_idx, 516 spdk_bdev_io_wait_cb cb_fn, int ret) 517 { 518 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)raid_bdev_io->driver_ctx; 519 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(raid_io->ch); 520 struct raid_bdev *raid_bdev = (struct raid_bdev *)raid_bdev_io->bdev->ctxt; 521 522 assert(ret != 0); 523 524 if (ret == -ENOMEM) { 525 raid_io->waitq_entry.bdev = raid_bdev->base_bdev_info[pd_idx].bdev; 526 raid_io->waitq_entry.cb_fn = cb_fn; 527 raid_io->waitq_entry.cb_arg = raid_bdev_io; 528 spdk_bdev_queue_io_wait(raid_bdev->base_bdev_info[pd_idx].bdev, 529 raid_ch->base_channel[pd_idx], 530 &raid_io->waitq_entry); 531 return; 532 } 533 534 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 535 assert(false); 536 spdk_bdev_io_complete(raid_bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 537 } 538 539 /* 540 * brief: 541 * _raid_bdev_submit_reset_request_next function submits the next batch of reset requests 542 * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in 543 * which case it will queue it for later submission 544 * params: 545 * bdev_io - pointer to parent bdev_io on raid bdev device 546 * returns: 547 * none 548 */ 549 static void 550 _raid_bdev_submit_reset_request_next(void *_bdev_io) 551 { 552 struct spdk_bdev_io *bdev_io = _bdev_io; 553 struct raid_bdev_io *raid_io; 554 struct raid_bdev *raid_bdev; 555 struct raid_bdev_io_channel *raid_ch; 556 int ret; 557 uint8_t i; 558 559 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 560 raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 561 raid_ch = spdk_io_channel_get_ctx(raid_io->ch); 562 563 while (raid_io->base_bdev_io_submitted < raid_bdev->num_base_bdevs) { 564 i = raid_io->base_bdev_io_submitted; 565 ret = spdk_bdev_reset(raid_bdev->base_bdev_info[i].desc, 566 raid_ch->base_channel[i], 567 raid_bdev_base_io_completion, bdev_io); 568 if (ret == 0) { 569 raid_io->base_bdev_io_submitted++; 570 } else { 571 raid_bdev_base_io_submit_fail_process(bdev_io, i, 572 _raid_bdev_submit_reset_request_next, ret); 573 return; 574 } 575 } 576 } 577 578 /* 579 * brief: 580 * _raid_bdev_submit_reset_request function is the submit_request function for 581 * reset requests 582 * params: 583 * ch - pointer to raid bdev io channel 584 * bdev_io - pointer to parent bdev_io on raid bdev device 585 * returns: 586 * none 587 */ 588 static void 589 _raid_bdev_submit_reset_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 590 { 591 struct raid_bdev_io *raid_io; 592 struct raid_bdev *raid_bdev; 593 594 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 595 raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 596 raid_io->ch = ch; 597 raid_io->base_bdev_io_submitted = 0; 598 raid_io->base_bdev_io_completed = 0; 599 raid_io->base_bdev_io_expected = raid_bdev->num_base_bdevs; 600 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 601 _raid_bdev_submit_reset_request_next(bdev_io); 602 } 603 604 static inline void 605 _raid0_get_io_range(struct raid_bdev_io_range *io_range, 606 uint8_t num_base_bdevs, uint64_t strip_size, uint64_t strip_size_shift, 607 uint64_t offset_blocks, uint64_t num_blocks) 608 { 609 uint64_t start_strip; 610 uint64_t end_strip; 611 612 io_range->strip_size = strip_size; 613 614 /* The start and end strip index in raid0 bdev scope */ 615 start_strip = offset_blocks >> strip_size_shift; 616 end_strip = (offset_blocks + num_blocks - 1) >> strip_size_shift; 617 io_range->start_strip_in_disk = start_strip / num_base_bdevs; 618 io_range->end_strip_in_disk = end_strip / num_base_bdevs; 619 620 /* The first strip may have unaligned start LBA offset. 621 * The end strip may have unaligned end LBA offset. 622 * Strips between them certainly have aligned offset and length to boundaries. 623 */ 624 io_range->start_offset_in_strip = offset_blocks % strip_size; 625 io_range->end_offset_in_strip = (offset_blocks + num_blocks - 1) % strip_size; 626 627 /* The base bdev indexes in which start and end strips are located */ 628 io_range->start_disk = start_strip % num_base_bdevs; 629 io_range->end_disk = end_strip % num_base_bdevs; 630 631 /* Calculate how many base_bdevs are involved in io operation. 632 * Number of base bdevs involved is between 1 and num_base_bdevs. 633 * It will be 1 if the first strip and last strip are the same one. 634 */ 635 io_range->n_disks_involved = spdk_min((end_strip - start_strip + 1), num_base_bdevs); 636 } 637 638 static inline void 639 _raid0_split_io_range(struct raid_bdev_io_range *io_range, uint8_t disk_idx, 640 uint64_t *_offset_in_disk, uint64_t *_nblocks_in_disk) 641 { 642 uint64_t n_strips_in_disk; 643 uint64_t start_offset_in_disk; 644 uint64_t end_offset_in_disk; 645 uint64_t offset_in_disk; 646 uint64_t nblocks_in_disk; 647 uint64_t start_strip_in_disk; 648 uint64_t end_strip_in_disk; 649 650 start_strip_in_disk = io_range->start_strip_in_disk; 651 if (disk_idx < io_range->start_disk) { 652 start_strip_in_disk += 1; 653 } 654 655 end_strip_in_disk = io_range->end_strip_in_disk; 656 if (disk_idx > io_range->end_disk) { 657 end_strip_in_disk -= 1; 658 } 659 660 assert(end_strip_in_disk >= start_strip_in_disk); 661 n_strips_in_disk = end_strip_in_disk - start_strip_in_disk + 1; 662 663 if (disk_idx == io_range->start_disk) { 664 start_offset_in_disk = io_range->start_offset_in_strip; 665 } else { 666 start_offset_in_disk = 0; 667 } 668 669 if (disk_idx == io_range->end_disk) { 670 end_offset_in_disk = io_range->end_offset_in_strip; 671 } else { 672 end_offset_in_disk = io_range->strip_size - 1; 673 } 674 675 offset_in_disk = start_offset_in_disk + start_strip_in_disk * io_range->strip_size; 676 nblocks_in_disk = (n_strips_in_disk - 1) * io_range->strip_size 677 + end_offset_in_disk - start_offset_in_disk + 1; 678 679 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, 680 "raid_bdev (strip_size 0x%lx) splits IO to base_bdev (%u) at (0x%lx, 0x%lx).\n", 681 io_range->strip_size, disk_idx, offset_in_disk, nblocks_in_disk); 682 683 *_offset_in_disk = offset_in_disk; 684 *_nblocks_in_disk = nblocks_in_disk; 685 } 686 687 /* 688 * brief: 689 * _raid_bdev_submit_null_payload_request_next function submits the next batch of 690 * io requests with range but without payload, like FLUSH and UNMAP, to member disks; 691 * it will submit as many as possible unless one base io request fails with -ENOMEM, 692 * in which case it will queue itself for later submission. 693 * params: 694 * bdev_io - pointer to parent bdev_io on raid bdev device 695 * returns: 696 * none 697 */ 698 static void 699 _raid_bdev_submit_null_payload_request_next(void *_bdev_io) 700 { 701 struct spdk_bdev_io *bdev_io = _bdev_io; 702 struct raid_bdev_io *raid_io; 703 struct raid_bdev *raid_bdev; 704 struct raid_bdev_io_channel *raid_ch; 705 struct raid_bdev_io_range io_range; 706 int ret; 707 708 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 709 raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 710 raid_ch = spdk_io_channel_get_ctx(raid_io->ch); 711 712 raid_bdev->fn_table->get_io_range(&io_range, raid_bdev->num_base_bdevs, 713 raid_bdev->strip_size, raid_bdev->strip_size_shift, 714 bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks); 715 716 raid_io->base_bdev_io_expected = io_range.n_disks_involved; 717 718 while (raid_io->base_bdev_io_submitted < raid_io->base_bdev_io_expected) { 719 uint8_t disk_idx; 720 uint64_t offset_in_disk; 721 uint64_t nblocks_in_disk; 722 723 /* base_bdev is started from start_disk to end_disk. 724 * It is possible that index of start_disk is larger than end_disk's. 725 */ 726 disk_idx = (io_range.start_disk + raid_io->base_bdev_io_submitted) % raid_bdev->num_base_bdevs; 727 728 raid_bdev->fn_table->split_io_range(&io_range, disk_idx, &offset_in_disk, &nblocks_in_disk); 729 730 switch (bdev_io->type) { 731 case SPDK_BDEV_IO_TYPE_UNMAP: 732 ret = spdk_bdev_unmap_blocks(raid_bdev->base_bdev_info[disk_idx].desc, 733 raid_ch->base_channel[disk_idx], 734 offset_in_disk, nblocks_in_disk, 735 raid_bdev_base_io_completion, bdev_io); 736 break; 737 738 case SPDK_BDEV_IO_TYPE_FLUSH: 739 ret = spdk_bdev_flush_blocks(raid_bdev->base_bdev_info[disk_idx].desc, 740 raid_ch->base_channel[disk_idx], 741 offset_in_disk, nblocks_in_disk, 742 raid_bdev_base_io_completion, bdev_io); 743 break; 744 745 default: 746 SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type); 747 assert(false); 748 ret = -EIO; 749 } 750 751 if (ret == 0) { 752 raid_io->base_bdev_io_submitted++; 753 } else { 754 raid_bdev_base_io_submit_fail_process(bdev_io, disk_idx, 755 _raid_bdev_submit_null_payload_request_next, ret); 756 return; 757 } 758 } 759 } 760 761 /* 762 * brief: 763 * _raid_bdev_submit_null_payload_request function is the submit_request function 764 * for io requests with range but without payload, like UNMAP and FLUSH. 765 * params: 766 * ch - pointer to raid bdev io channel 767 * bdev_io - pointer to parent bdev_io on raid bdev device 768 * returns: 769 * none 770 */ 771 static void 772 _raid_bdev_submit_null_payload_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 773 { 774 struct raid_bdev_io *raid_io; 775 776 raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 777 raid_io->ch = ch; 778 raid_io->base_bdev_io_submitted = 0; 779 raid_io->base_bdev_io_completed = 0; 780 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 781 782 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev: type %d, range (0x%lx, 0x%lx)\n", 783 bdev_io->type, bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks); 784 785 _raid_bdev_submit_null_payload_request_next(bdev_io); 786 } 787 788 /* 789 * brief: 790 * Callback function to spdk_bdev_io_get_buf. 791 * params: 792 * ch - pointer to raid bdev io channel 793 * bdev_io - pointer to parent bdev_io on raid bdev device 794 * success - True if buffer is allocated or false otherwise. 795 * returns: 796 * none 797 */ 798 static void 799 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 800 bool success) 801 { 802 struct raid_bdev *raid_bdev; 803 804 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 805 806 if (!success) { 807 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 808 return; 809 } 810 811 raid_bdev->fn_table->start_rw_request(ch, bdev_io); 812 } 813 814 /* 815 * brief: 816 * raid_bdev_submit_request function is the submit_request function pointer of 817 * raid bdev function table. This is used to submit the io on raid_bdev to below 818 * layers. 819 * params: 820 * ch - pointer to raid bdev io channel 821 * bdev_io - pointer to parent bdev_io on raid bdev device 822 * returns: 823 * none 824 */ 825 static void 826 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 827 { 828 struct raid_bdev *raid_bdev; 829 830 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 831 832 switch (bdev_io->type) { 833 case SPDK_BDEV_IO_TYPE_READ: 834 spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb, 835 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 836 break; 837 case SPDK_BDEV_IO_TYPE_WRITE: 838 raid_bdev->fn_table->start_rw_request(ch, bdev_io); 839 break; 840 841 case SPDK_BDEV_IO_TYPE_RESET: 842 _raid_bdev_submit_reset_request(ch, bdev_io); 843 break; 844 845 case SPDK_BDEV_IO_TYPE_FLUSH: 846 case SPDK_BDEV_IO_TYPE_UNMAP: 847 _raid_bdev_submit_null_payload_request(ch, bdev_io); 848 break; 849 850 default: 851 SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type); 852 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 853 break; 854 } 855 856 } 857 858 /* 859 * brief: 860 * _raid_bdev_io_type_supported checks whether io_type is supported in 861 * all base bdev modules of raid bdev module. If anyone among the base_bdevs 862 * doesn't support, the raid device doesn't supports. 863 * 864 * params: 865 * raid_bdev - pointer to raid bdev context 866 * io_type - io type 867 * returns: 868 * true - io_type is supported 869 * false - io_type is not supported 870 */ 871 inline static bool 872 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type) 873 { 874 uint8_t i; 875 876 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 877 if (raid_bdev->base_bdev_info[i].bdev == NULL) { 878 assert(false); 879 continue; 880 } 881 882 if (spdk_bdev_io_type_supported(raid_bdev->base_bdev_info[i].bdev, 883 io_type) == false) { 884 return false; 885 } 886 } 887 888 return true; 889 } 890 891 /* 892 * brief: 893 * raid_bdev_io_type_supported is the io_supported function for bdev function 894 * table which returns whether the particular io type is supported or not by 895 * raid bdev module 896 * params: 897 * ctx - pointer to raid bdev context 898 * type - io type 899 * returns: 900 * true - io_type is supported 901 * false - io_type is not supported 902 */ 903 static bool 904 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 905 { 906 switch (io_type) { 907 case SPDK_BDEV_IO_TYPE_READ: 908 case SPDK_BDEV_IO_TYPE_WRITE: 909 return true; 910 911 case SPDK_BDEV_IO_TYPE_FLUSH: 912 case SPDK_BDEV_IO_TYPE_RESET: 913 case SPDK_BDEV_IO_TYPE_UNMAP: 914 return _raid_bdev_io_type_supported(ctx, io_type); 915 916 default: 917 return false; 918 } 919 920 return false; 921 } 922 923 /* 924 * brief: 925 * raid_bdev_get_io_channel is the get_io_channel function table pointer for 926 * raid bdev. This is used to return the io channel for this raid bdev 927 * params: 928 * ctxt - pointer to raid_bdev 929 * returns: 930 * pointer to io channel for raid bdev 931 */ 932 static struct spdk_io_channel * 933 raid_bdev_get_io_channel(void *ctxt) 934 { 935 struct raid_bdev *raid_bdev = ctxt; 936 937 return spdk_get_io_channel(raid_bdev); 938 } 939 940 /* 941 * brief: 942 * raid_bdev_dump_info_json is the function table pointer for raid bdev 943 * params: 944 * ctx - pointer to raid_bdev 945 * w - pointer to json context 946 * returns: 947 * 0 - success 948 * non zero - failure 949 */ 950 static int 951 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 952 { 953 struct raid_bdev *raid_bdev = ctx; 954 955 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_dump_config_json\n"); 956 assert(raid_bdev != NULL); 957 958 /* Dump the raid bdev configuration related information */ 959 spdk_json_write_named_object_begin(w, "raid"); 960 spdk_json_write_named_uint32(w, "strip_size", raid_bdev->strip_size); 961 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 962 spdk_json_write_named_uint32(w, "state", raid_bdev->state); 963 spdk_json_write_named_uint32(w, "raid_level", raid_bdev->raid_level); 964 spdk_json_write_named_uint32(w, "destruct_called", raid_bdev->destruct_called); 965 spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs); 966 spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered); 967 spdk_json_write_name(w, "base_bdevs_list"); 968 spdk_json_write_array_begin(w); 969 for (uint8_t i = 0; i < raid_bdev->num_base_bdevs; i++) { 970 if (raid_bdev->base_bdev_info[i].bdev) { 971 spdk_json_write_string(w, raid_bdev->base_bdev_info[i].bdev->name); 972 } else { 973 spdk_json_write_null(w); 974 } 975 } 976 spdk_json_write_array_end(w); 977 spdk_json_write_object_end(w); 978 979 return 0; 980 } 981 982 /* 983 * brief: 984 * raid_bdev_write_config_json is the function table pointer for raid bdev 985 * params: 986 * bdev - pointer to spdk_bdev 987 * w - pointer to json context 988 * returns: 989 * none 990 */ 991 static void 992 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 993 { 994 struct raid_bdev *raid_bdev = bdev->ctxt; 995 struct spdk_bdev *base; 996 uint8_t i; 997 998 spdk_json_write_object_begin(w); 999 1000 spdk_json_write_named_string(w, "method", "bdev_raid_create"); 1001 1002 spdk_json_write_named_object_begin(w, "params"); 1003 spdk_json_write_named_string(w, "name", bdev->name); 1004 spdk_json_write_named_uint32(w, "strip_size", raid_bdev->strip_size_kb); 1005 spdk_json_write_named_uint32(w, "raid_level", raid_bdev->raid_level); 1006 1007 spdk_json_write_named_array_begin(w, "base_bdevs"); 1008 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1009 base = raid_bdev->base_bdev_info[i].bdev; 1010 if (base) { 1011 spdk_json_write_string(w, base->name); 1012 } 1013 } 1014 spdk_json_write_array_end(w); 1015 spdk_json_write_object_end(w); 1016 1017 spdk_json_write_object_end(w); 1018 } 1019 1020 /* g_raid_bdev_fn_table is the function table for raid bdev */ 1021 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = { 1022 .destruct = raid_bdev_destruct, 1023 .submit_request = raid_bdev_submit_request, 1024 .io_type_supported = raid_bdev_io_type_supported, 1025 .get_io_channel = raid_bdev_get_io_channel, 1026 .dump_info_json = raid_bdev_dump_info_json, 1027 .write_config_json = raid_bdev_write_config_json, 1028 }; 1029 1030 /* 1031 * brief: 1032 * raid_bdev_config_cleanup function is used to free memory for one raid_bdev in configuration 1033 * params: 1034 * raid_cfg - pointer to raid_bdev_config structure 1035 * returns: 1036 * none 1037 */ 1038 void 1039 raid_bdev_config_cleanup(struct raid_bdev_config *raid_cfg) 1040 { 1041 uint8_t i; 1042 1043 TAILQ_REMOVE(&g_raid_config.raid_bdev_config_head, raid_cfg, link); 1044 g_raid_config.total_raid_bdev--; 1045 1046 if (raid_cfg->base_bdev) { 1047 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 1048 free(raid_cfg->base_bdev[i].name); 1049 } 1050 free(raid_cfg->base_bdev); 1051 } 1052 free(raid_cfg->name); 1053 free(raid_cfg); 1054 } 1055 1056 /* 1057 * brief: 1058 * raid_bdev_free is the raid bdev function table function pointer. This is 1059 * called on bdev free path 1060 * params: 1061 * none 1062 * returns: 1063 * none 1064 */ 1065 static void 1066 raid_bdev_free(void) 1067 { 1068 struct raid_bdev_config *raid_cfg, *tmp; 1069 1070 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_free\n"); 1071 TAILQ_FOREACH_SAFE(raid_cfg, &g_raid_config.raid_bdev_config_head, link, tmp) { 1072 raid_bdev_config_cleanup(raid_cfg); 1073 } 1074 } 1075 1076 /* brief 1077 * raid_bdev_config_find_by_name is a helper function to find raid bdev config 1078 * by name as key. 1079 * 1080 * params: 1081 * raid_name - name for raid bdev. 1082 */ 1083 struct raid_bdev_config * 1084 raid_bdev_config_find_by_name(const char *raid_name) 1085 { 1086 struct raid_bdev_config *raid_cfg; 1087 1088 TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) { 1089 if (!strcmp(raid_cfg->name, raid_name)) { 1090 return raid_cfg; 1091 } 1092 } 1093 1094 return raid_cfg; 1095 } 1096 1097 /* 1098 * brief 1099 * raid_bdev_config_add function adds config for newly created raid bdev. 1100 * 1101 * params: 1102 * raid_name - name for raid bdev. 1103 * strip_size - strip size in KB 1104 * num_base_bdevs - number of base bdevs. 1105 * raid_level - raid level, only raid level 0 is supported. 1106 * _raid_cfg - Pointer to newly added configuration 1107 */ 1108 int 1109 raid_bdev_config_add(const char *raid_name, uint32_t strip_size, uint8_t num_base_bdevs, 1110 uint8_t raid_level, struct raid_bdev_config **_raid_cfg) 1111 { 1112 struct raid_bdev_config *raid_cfg; 1113 1114 raid_cfg = raid_bdev_config_find_by_name(raid_name); 1115 if (raid_cfg != NULL) { 1116 SPDK_ERRLOG("Duplicate raid bdev name found in config file %s\n", 1117 raid_name); 1118 return -EEXIST; 1119 } 1120 1121 if (spdk_u32_is_pow2(strip_size) == false) { 1122 SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size); 1123 return -EINVAL; 1124 } 1125 1126 if (num_base_bdevs == 0) { 1127 SPDK_ERRLOG("Invalid base device count %u\n", num_base_bdevs); 1128 return -EINVAL; 1129 } 1130 1131 if (raid_level != 0) { 1132 SPDK_ERRLOG("invalid raid level %u, only raid level 0 is supported\n", 1133 raid_level); 1134 return -EINVAL; 1135 } 1136 1137 raid_cfg = calloc(1, sizeof(*raid_cfg)); 1138 if (raid_cfg == NULL) { 1139 SPDK_ERRLOG("unable to allocate memory\n"); 1140 return -ENOMEM; 1141 } 1142 1143 raid_cfg->name = strdup(raid_name); 1144 if (!raid_cfg->name) { 1145 free(raid_cfg); 1146 SPDK_ERRLOG("unable to allocate memory\n"); 1147 return -ENOMEM; 1148 } 1149 raid_cfg->strip_size = strip_size; 1150 raid_cfg->num_base_bdevs = num_base_bdevs; 1151 raid_cfg->raid_level = raid_level; 1152 1153 raid_cfg->base_bdev = calloc(num_base_bdevs, sizeof(*raid_cfg->base_bdev)); 1154 if (raid_cfg->base_bdev == NULL) { 1155 free(raid_cfg->name); 1156 free(raid_cfg); 1157 SPDK_ERRLOG("unable to allocate memory\n"); 1158 return -ENOMEM; 1159 } 1160 1161 TAILQ_INSERT_TAIL(&g_raid_config.raid_bdev_config_head, raid_cfg, link); 1162 g_raid_config.total_raid_bdev++; 1163 1164 *_raid_cfg = raid_cfg; 1165 return 0; 1166 } 1167 1168 /* 1169 * brief: 1170 * raid_bdev_config_add_base_bdev function add base bdev to raid bdev config. 1171 * 1172 * params: 1173 * raid_cfg - pointer to raid bdev configuration 1174 * base_bdev_name - name of base bdev 1175 * slot - Position to add base bdev 1176 */ 1177 int 1178 raid_bdev_config_add_base_bdev(struct raid_bdev_config *raid_cfg, const char *base_bdev_name, 1179 uint8_t slot) 1180 { 1181 uint8_t i; 1182 struct raid_bdev_config *tmp; 1183 1184 if (slot >= raid_cfg->num_base_bdevs) { 1185 return -EINVAL; 1186 } 1187 1188 TAILQ_FOREACH(tmp, &g_raid_config.raid_bdev_config_head, link) { 1189 for (i = 0; i < tmp->num_base_bdevs; i++) { 1190 if (tmp->base_bdev[i].name != NULL) { 1191 if (!strcmp(tmp->base_bdev[i].name, base_bdev_name)) { 1192 SPDK_ERRLOG("duplicate base bdev name %s mentioned\n", 1193 base_bdev_name); 1194 return -EEXIST; 1195 } 1196 } 1197 } 1198 } 1199 1200 raid_cfg->base_bdev[slot].name = strdup(base_bdev_name); 1201 if (raid_cfg->base_bdev[slot].name == NULL) { 1202 SPDK_ERRLOG("unable to allocate memory\n"); 1203 return -ENOMEM; 1204 } 1205 1206 return 0; 1207 } 1208 /* 1209 * brief: 1210 * raid_bdev_parse_raid is used to parse the raid bdev from config file based on 1211 * pre-defined raid bdev format in config file. 1212 * Format of config file: 1213 * [RAID1] 1214 * Name raid1 1215 * StripSize 64 1216 * NumDevices 2 1217 * RaidLevel 0 1218 * Devices Nvme0n1 Nvme1n1 1219 * 1220 * [RAID2] 1221 * Name raid2 1222 * StripSize 64 1223 * NumDevices 3 1224 * RaidLevel 0 1225 * Devices Nvme2n1 Nvme3n1 Nvme4n1 1226 * 1227 * params: 1228 * conf_section - pointer to config section 1229 * returns: 1230 * 0 - success 1231 * non zero - failure 1232 */ 1233 static int 1234 raid_bdev_parse_raid(struct spdk_conf_section *conf_section) 1235 { 1236 const char *raid_name; 1237 uint32_t strip_size; 1238 uint8_t num_base_bdevs, raid_level; 1239 const char *base_bdev_name; 1240 struct raid_bdev_config *raid_cfg; 1241 int rc, i, val; 1242 1243 raid_name = spdk_conf_section_get_val(conf_section, "Name"); 1244 if (raid_name == NULL) { 1245 SPDK_ERRLOG("raid_name is null\n"); 1246 return -EINVAL; 1247 } 1248 1249 val = spdk_conf_section_get_intval(conf_section, "StripSize"); 1250 if (val < 0) { 1251 return -EINVAL; 1252 } 1253 strip_size = val; 1254 1255 val = spdk_conf_section_get_intval(conf_section, "NumDevices"); 1256 if (val < 0) { 1257 return -EINVAL; 1258 } 1259 num_base_bdevs = val; 1260 1261 val = spdk_conf_section_get_intval(conf_section, "RaidLevel"); 1262 if (val < 0) { 1263 return -EINVAL; 1264 } 1265 raid_level = val; 1266 1267 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "%s %" PRIu32 " %u %u\n", 1268 raid_name, strip_size, num_base_bdevs, raid_level); 1269 1270 rc = raid_bdev_config_add(raid_name, strip_size, num_base_bdevs, raid_level, 1271 &raid_cfg); 1272 if (rc != 0) { 1273 SPDK_ERRLOG("Failed to add raid bdev config\n"); 1274 return rc; 1275 } 1276 1277 for (i = 0; true; i++) { 1278 base_bdev_name = spdk_conf_section_get_nmval(conf_section, "Devices", 0, i); 1279 if (base_bdev_name == NULL) { 1280 break; 1281 } 1282 if (i >= num_base_bdevs) { 1283 raid_bdev_config_cleanup(raid_cfg); 1284 SPDK_ERRLOG("Number of devices mentioned is more than count\n"); 1285 return -EINVAL; 1286 } 1287 1288 rc = raid_bdev_config_add_base_bdev(raid_cfg, base_bdev_name, i); 1289 if (rc != 0) { 1290 raid_bdev_config_cleanup(raid_cfg); 1291 SPDK_ERRLOG("Failed to add base bdev to raid bdev config\n"); 1292 return rc; 1293 } 1294 } 1295 1296 if (i != raid_cfg->num_base_bdevs) { 1297 raid_bdev_config_cleanup(raid_cfg); 1298 SPDK_ERRLOG("Number of devices mentioned is less than count\n"); 1299 return -EINVAL; 1300 } 1301 1302 rc = raid_bdev_create(raid_cfg); 1303 if (rc != 0) { 1304 raid_bdev_config_cleanup(raid_cfg); 1305 SPDK_ERRLOG("Failed to create raid bdev\n"); 1306 return rc; 1307 } 1308 1309 rc = raid_bdev_add_base_devices(raid_cfg); 1310 if (rc != 0) { 1311 SPDK_ERRLOG("Failed to add any base bdev to raid bdev\n"); 1312 /* Config is not removed in this case. */ 1313 } 1314 1315 return 0; 1316 } 1317 1318 /* 1319 * brief: 1320 * raid_bdev_parse_config is used to find the raid bdev config section and parse it 1321 * Format of config file: 1322 * params: 1323 * none 1324 * returns: 1325 * 0 - success 1326 * non zero - failure 1327 */ 1328 static int 1329 raid_bdev_parse_config(void) 1330 { 1331 int ret; 1332 struct spdk_conf_section *conf_section; 1333 1334 conf_section = spdk_conf_first_section(NULL); 1335 while (conf_section != NULL) { 1336 if (spdk_conf_section_match_prefix(conf_section, "RAID")) { 1337 ret = raid_bdev_parse_raid(conf_section); 1338 if (ret < 0) { 1339 SPDK_ERRLOG("Unable to parse raid bdev section\n"); 1340 return ret; 1341 } 1342 } 1343 conf_section = spdk_conf_next_section(conf_section); 1344 } 1345 1346 return 0; 1347 } 1348 1349 /* 1350 * brief: 1351 * raid_bdev_fini_start is called when bdev layer is starting the 1352 * shutdown process 1353 * params: 1354 * none 1355 * returns: 1356 * none 1357 */ 1358 static void 1359 raid_bdev_fini_start(void) 1360 { 1361 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_fini_start\n"); 1362 g_shutdown_started = true; 1363 } 1364 1365 /* 1366 * brief: 1367 * raid_bdev_exit is called on raid bdev module exit time by bdev layer 1368 * params: 1369 * none 1370 * returns: 1371 * none 1372 */ 1373 static void 1374 raid_bdev_exit(void) 1375 { 1376 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_exit\n"); 1377 raid_bdev_free(); 1378 } 1379 1380 /* 1381 * brief: 1382 * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid 1383 * module 1384 * params: 1385 * none 1386 * returns: 1387 * size of spdk_bdev_io context for raid 1388 */ 1389 static int 1390 raid_bdev_get_ctx_size(void) 1391 { 1392 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_get_ctx_size\n"); 1393 return sizeof(struct raid_bdev_io); 1394 } 1395 1396 /* 1397 * brief: 1398 * raid_bdev_get_running_config is used to get the configuration options. 1399 * 1400 * params: 1401 * fp - The pointer to a file that will be written to the configuration options. 1402 * returns: 1403 * none 1404 */ 1405 static void 1406 raid_bdev_get_running_config(FILE *fp) 1407 { 1408 struct raid_bdev *raid_bdev; 1409 struct spdk_bdev *base; 1410 int index = 1; 1411 uint8_t i; 1412 1413 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_configured_list, state_link) { 1414 fprintf(fp, 1415 "\n" 1416 "[RAID%d]\n" 1417 " Name %s\n" 1418 " StripSize %" PRIu32 "\n" 1419 " NumDevices %u\n" 1420 " RaidLevel %hhu\n", 1421 index, raid_bdev->bdev.name, raid_bdev->strip_size_kb, 1422 raid_bdev->num_base_bdevs, raid_bdev->raid_level); 1423 fprintf(fp, 1424 " Devices "); 1425 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1426 base = raid_bdev->base_bdev_info[i].bdev; 1427 if (base) { 1428 fprintf(fp, 1429 "%s ", 1430 base->name); 1431 } 1432 } 1433 fprintf(fp, 1434 "\n"); 1435 index++; 1436 } 1437 } 1438 1439 /* 1440 * brief: 1441 * raid_bdev_can_claim_bdev is the function to check if this base_bdev can be 1442 * claimed by raid bdev or not. 1443 * params: 1444 * bdev_name - represents base bdev name 1445 * _raid_cfg - pointer to raid bdev config parsed from config file 1446 * base_bdev_slot - if bdev can be claimed, it represents the base_bdev correct 1447 * slot. This field is only valid if return value of this function is true 1448 * returns: 1449 * true - if bdev can be claimed 1450 * false - if bdev can't be claimed 1451 */ 1452 static bool 1453 raid_bdev_can_claim_bdev(const char *bdev_name, struct raid_bdev_config **_raid_cfg, 1454 uint8_t *base_bdev_slot) 1455 { 1456 struct raid_bdev_config *raid_cfg; 1457 uint8_t i; 1458 1459 TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) { 1460 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 1461 /* 1462 * Check if the base bdev name is part of raid bdev configuration. 1463 * If match is found then return true and the slot information where 1464 * this base bdev should be inserted in raid bdev 1465 */ 1466 if (!strcmp(bdev_name, raid_cfg->base_bdev[i].name)) { 1467 *_raid_cfg = raid_cfg; 1468 *base_bdev_slot = i; 1469 return true; 1470 } 1471 } 1472 } 1473 1474 return false; 1475 } 1476 1477 1478 static struct spdk_bdev_module g_raid_if = { 1479 .name = "raid", 1480 .module_init = raid_bdev_init, 1481 .fini_start = raid_bdev_fini_start, 1482 .module_fini = raid_bdev_exit, 1483 .get_ctx_size = raid_bdev_get_ctx_size, 1484 .examine_config = raid_bdev_examine, 1485 .config_text = raid_bdev_get_running_config, 1486 .async_init = false, 1487 .async_fini = false, 1488 }; 1489 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if) 1490 1491 /* 1492 * brief: 1493 * raid_bdev_init is the initialization function for raid bdev module 1494 * params: 1495 * none 1496 * returns: 1497 * 0 - success 1498 * non zero - failure 1499 */ 1500 static int 1501 raid_bdev_init(void) 1502 { 1503 int ret; 1504 1505 /* Parse config file for raids */ 1506 ret = raid_bdev_parse_config(); 1507 if (ret < 0) { 1508 SPDK_ERRLOG("raid bdev init failed parsing\n"); 1509 raid_bdev_free(); 1510 return ret; 1511 } 1512 1513 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_init completed successfully\n"); 1514 1515 return 0; 1516 } 1517 1518 static const struct raid_fn_table g_raid0_fn_table = { 1519 .start_rw_request = raid0_start_rw_request, 1520 .get_curr_base_index = raid0_get_curr_base_bdev_index, 1521 .waitq_io_process = raid0_waitq_io_process, 1522 .get_io_range = _raid0_get_io_range, 1523 .split_io_range = _raid0_split_io_range, 1524 }; 1525 1526 /* 1527 * brief: 1528 * raid_bdev_create allocates raid bdev based on passed configuration 1529 * params: 1530 * raid_cfg - configuration of raid bdev 1531 * returns: 1532 * 0 - success 1533 * non zero - failure 1534 */ 1535 int 1536 raid_bdev_create(struct raid_bdev_config *raid_cfg) 1537 { 1538 struct raid_bdev *raid_bdev; 1539 struct spdk_bdev *raid_bdev_gen; 1540 1541 raid_bdev = calloc(1, sizeof(*raid_bdev)); 1542 if (!raid_bdev) { 1543 SPDK_ERRLOG("Unable to allocate memory for raid bdev\n"); 1544 return -ENOMEM; 1545 } 1546 1547 assert(raid_cfg->num_base_bdevs != 0); 1548 raid_bdev->num_base_bdevs = raid_cfg->num_base_bdevs; 1549 raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, 1550 sizeof(struct raid_base_bdev_info)); 1551 if (!raid_bdev->base_bdev_info) { 1552 SPDK_ERRLOG("Unable able to allocate base bdev info\n"); 1553 free(raid_bdev); 1554 return -ENOMEM; 1555 } 1556 1557 /* strip_size_kb is from the rpc param. strip_size is in blocks and used 1558 * intnerally and set later. 1559 */ 1560 raid_bdev->strip_size = 0; 1561 raid_bdev->strip_size_kb = raid_cfg->strip_size; 1562 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1563 raid_bdev->config = raid_cfg; 1564 raid_bdev->raid_level = raid_cfg->raid_level; 1565 1566 switch (raid_bdev->raid_level) { 1567 case 0: 1568 raid_bdev->fn_table = &g_raid0_fn_table; 1569 break; 1570 default: 1571 SPDK_ERRLOG("invalid raid level %u\n", raid_bdev->raid_level); 1572 free(raid_bdev); 1573 return -EINVAL; 1574 } 1575 1576 raid_bdev_gen = &raid_bdev->bdev; 1577 1578 raid_bdev_gen->name = strdup(raid_cfg->name); 1579 if (!raid_bdev_gen->name) { 1580 SPDK_ERRLOG("Unable to allocate name for raid\n"); 1581 free(raid_bdev->base_bdev_info); 1582 free(raid_bdev); 1583 return -ENOMEM; 1584 } 1585 1586 raid_bdev_gen->product_name = "Raid Volume"; 1587 raid_bdev_gen->ctxt = raid_bdev; 1588 raid_bdev_gen->fn_table = &g_raid_bdev_fn_table; 1589 raid_bdev_gen->module = &g_raid_if; 1590 raid_bdev_gen->write_cache = 0; 1591 1592 TAILQ_INSERT_TAIL(&g_raid_bdev_configuring_list, raid_bdev, state_link); 1593 TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link); 1594 1595 raid_cfg->raid_bdev = raid_bdev; 1596 1597 return 0; 1598 } 1599 1600 /* 1601 * brief 1602 * raid_bdev_alloc_base_bdev_resource allocates resource of base bdev. 1603 * params: 1604 * raid_bdev - pointer to raid bdev 1605 * bdev - pointer to base bdev 1606 * base_bdev_slot - position to add base bdev 1607 * returns: 1608 * 0 - success 1609 * non zero - failure 1610 */ 1611 static int 1612 raid_bdev_alloc_base_bdev_resource(struct raid_bdev *raid_bdev, struct spdk_bdev *bdev, 1613 uint8_t base_bdev_slot) 1614 { 1615 struct spdk_bdev_desc *desc; 1616 int rc; 1617 1618 rc = spdk_bdev_open(bdev, true, raid_bdev_remove_base_bdev, bdev, &desc); 1619 if (rc != 0) { 1620 SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", bdev->name); 1621 return rc; 1622 } 1623 1624 rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if); 1625 if (rc != 0) { 1626 SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); 1627 spdk_bdev_close(desc); 1628 return rc; 1629 } 1630 1631 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "bdev %s is claimed\n", bdev->name); 1632 1633 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 1634 assert(base_bdev_slot < raid_bdev->num_base_bdevs); 1635 1636 raid_bdev->base_bdev_info[base_bdev_slot].bdev = bdev; 1637 raid_bdev->base_bdev_info[base_bdev_slot].desc = desc; 1638 raid_bdev->num_base_bdevs_discovered++; 1639 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1640 1641 return 0; 1642 } 1643 1644 /* 1645 * brief: 1646 * If raid bdev config is complete, then only register the raid bdev to 1647 * bdev layer and remove this raid bdev from configuring list and 1648 * insert the raid bdev to configured list 1649 * params: 1650 * raid_bdev - pointer to raid bdev 1651 * returns: 1652 * 0 - success 1653 * non zero - failure 1654 */ 1655 static int 1656 raid_bdev_configure(struct raid_bdev *raid_bdev) 1657 { 1658 uint32_t blocklen; 1659 uint64_t min_blockcnt; 1660 struct spdk_bdev *raid_bdev_gen; 1661 int rc = 0; 1662 1663 blocklen = raid_bdev->base_bdev_info[0].bdev->blocklen; 1664 min_blockcnt = raid_bdev->base_bdev_info[0].bdev->blockcnt; 1665 for (uint8_t i = 1; i < raid_bdev->num_base_bdevs; i++) { 1666 /* Calculate minimum block count from all base bdevs */ 1667 if (raid_bdev->base_bdev_info[i].bdev->blockcnt < min_blockcnt) { 1668 min_blockcnt = raid_bdev->base_bdev_info[i].bdev->blockcnt; 1669 } 1670 1671 /* Check blocklen for all base bdevs that it should be same */ 1672 if (blocklen != raid_bdev->base_bdev_info[i].bdev->blocklen) { 1673 /* 1674 * Assumption is that all the base bdevs for any raid bdev should 1675 * have same blocklen 1676 */ 1677 SPDK_ERRLOG("Blocklen of various bdevs not matching\n"); 1678 return -EINVAL; 1679 } 1680 } 1681 1682 /* The strip_size_kb is read in from user in KB. Convert to blocks here for 1683 * internal use. 1684 */ 1685 raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / blocklen; 1686 raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size); 1687 raid_bdev->blocklen_shift = spdk_u32log2(blocklen); 1688 1689 raid_bdev_gen = &raid_bdev->bdev; 1690 raid_bdev_gen->blocklen = blocklen; 1691 if (raid_bdev->num_base_bdevs > 1) { 1692 raid_bdev_gen->optimal_io_boundary = raid_bdev->strip_size; 1693 raid_bdev_gen->split_on_optimal_io_boundary = true; 1694 } else { 1695 /* Do not need to split reads/writes on single bdev RAID modules. */ 1696 raid_bdev_gen->optimal_io_boundary = 0; 1697 raid_bdev_gen->split_on_optimal_io_boundary = false; 1698 } 1699 1700 /* 1701 * RAID bdev logic is for striping so take the minimum block count based 1702 * approach where total block count of raid bdev is the number of base 1703 * bdev times the minimum block count of any base bdev 1704 */ 1705 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "min blockcount %lu, numbasedev %u, strip size shift %u\n", 1706 min_blockcnt, 1707 raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift); 1708 raid_bdev_gen->blockcnt = ((min_blockcnt >> raid_bdev->strip_size_shift) << 1709 raid_bdev->strip_size_shift) * raid_bdev->num_base_bdevs; 1710 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "io device register %p\n", raid_bdev); 1711 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "blockcnt %lu, blocklen %u\n", raid_bdev_gen->blockcnt, 1712 raid_bdev_gen->blocklen); 1713 if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 1714 raid_bdev->state = RAID_BDEV_STATE_ONLINE; 1715 spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb, 1716 sizeof(struct raid_bdev_io_channel), 1717 raid_bdev->bdev.name); 1718 rc = spdk_bdev_register(raid_bdev_gen); 1719 if (rc != 0) { 1720 SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n"); 1721 spdk_io_device_unregister(raid_bdev, NULL); 1722 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1723 return rc; 1724 } 1725 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev generic %p\n", raid_bdev_gen); 1726 TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link); 1727 TAILQ_INSERT_TAIL(&g_raid_bdev_configured_list, raid_bdev, state_link); 1728 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev is created with name %s, raid_bdev %p\n", 1729 raid_bdev_gen->name, raid_bdev); 1730 } 1731 1732 return 0; 1733 } 1734 1735 /* 1736 * brief: 1737 * If raid bdev is online and registered, change the bdev state to 1738 * configuring and unregister this raid device. Queue this raid device 1739 * in configuring list 1740 * params: 1741 * raid_bdev - pointer to raid bdev 1742 * cb_fn - callback function 1743 * cb_arg - argument to callback function 1744 * returns: 1745 * none 1746 */ 1747 static void 1748 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, 1749 void *cb_arg) 1750 { 1751 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1752 if (cb_fn) { 1753 cb_fn(cb_arg, 0); 1754 } 1755 return; 1756 } 1757 1758 assert(raid_bdev->num_base_bdevs == raid_bdev->num_base_bdevs_discovered); 1759 TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link); 1760 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 1761 assert(raid_bdev->num_base_bdevs_discovered); 1762 TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link); 1763 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev state chaning from online to offline\n"); 1764 1765 spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg); 1766 } 1767 1768 /* 1769 * brief: 1770 * raid_bdev_find_by_base_bdev function finds the raid bdev which has 1771 * claimed the base bdev. 1772 * params: 1773 * base_bdev - pointer to base bdev pointer 1774 * _raid_bdev - Referenct to pointer to raid bdev 1775 * _base_bdev_slot - Reference to the slot of the base bdev. 1776 * returns: 1777 * true - if the raid bdev is found. 1778 * false - if the raid bdev is not found. 1779 */ 1780 static bool 1781 raid_bdev_find_by_base_bdev(struct spdk_bdev *base_bdev, struct raid_bdev **_raid_bdev, 1782 uint8_t *_base_bdev_slot) 1783 { 1784 struct raid_bdev *raid_bdev; 1785 uint8_t i; 1786 1787 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1788 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1789 if (raid_bdev->base_bdev_info[i].bdev == base_bdev) { 1790 *_raid_bdev = raid_bdev; 1791 *_base_bdev_slot = i; 1792 return true; 1793 } 1794 } 1795 } 1796 1797 return false; 1798 } 1799 1800 /* 1801 * brief: 1802 * raid_bdev_remove_base_bdev function is called by below layers when base_bdev 1803 * is removed. This function checks if this base bdev is part of any raid bdev 1804 * or not. If yes, it takes necessary action on that particular raid bdev. 1805 * params: 1806 * ctx - pointer to base bdev pointer which got removed 1807 * returns: 1808 * none 1809 */ 1810 static void 1811 raid_bdev_remove_base_bdev(void *ctx) 1812 { 1813 struct spdk_bdev *base_bdev = ctx; 1814 struct raid_bdev *raid_bdev = NULL; 1815 uint8_t base_bdev_slot = 0; 1816 1817 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_remove_base_bdev\n"); 1818 1819 /* Find the raid_bdev which has claimed this base_bdev */ 1820 if (!raid_bdev_find_by_base_bdev(base_bdev, &raid_bdev, &base_bdev_slot)) { 1821 SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); 1822 return; 1823 } 1824 1825 assert(raid_bdev->base_bdev_info[base_bdev_slot].desc); 1826 raid_bdev->base_bdev_info[base_bdev_slot].remove_scheduled = true; 1827 1828 if (raid_bdev->destruct_called == true || 1829 raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 1830 /* 1831 * As raid bdev is not registered yet or already unregistered, 1832 * so cleanup should be done here itself. 1833 */ 1834 raid_bdev_free_base_bdev_resource(raid_bdev, base_bdev_slot); 1835 if (raid_bdev->num_base_bdevs_discovered == 0) { 1836 /* There is no base bdev for this raid, so free the raid device. */ 1837 raid_bdev_cleanup(raid_bdev); 1838 return; 1839 } 1840 } 1841 1842 raid_bdev_deconfigure(raid_bdev, NULL, NULL); 1843 } 1844 1845 /* 1846 * brief: 1847 * Remove base bdevs from the raid bdev one by one. Skip any base bdev which 1848 * doesn't exist. 1849 * params: 1850 * raid_cfg - pointer to raid bdev config. 1851 * cb_fn - callback function 1852 * cb_ctx - argument to callback function 1853 */ 1854 void 1855 raid_bdev_remove_base_devices(struct raid_bdev_config *raid_cfg, 1856 raid_bdev_destruct_cb cb_fn, void *cb_arg) 1857 { 1858 struct raid_bdev *raid_bdev; 1859 struct raid_base_bdev_info *info; 1860 uint8_t i; 1861 1862 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_remove_base_devices\n"); 1863 1864 raid_bdev = raid_cfg->raid_bdev; 1865 if (raid_bdev == NULL) { 1866 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev %s doesn't exist now\n", raid_cfg->name); 1867 if (cb_fn) { 1868 cb_fn(cb_arg, 0); 1869 } 1870 return; 1871 } 1872 1873 if (raid_bdev->destroy_started) { 1874 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "destroying raid bdev %s is already started\n", 1875 raid_cfg->name); 1876 if (cb_fn) { 1877 cb_fn(cb_arg, -EALREADY); 1878 } 1879 return; 1880 } 1881 1882 raid_bdev->destroy_started = true; 1883 1884 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1885 info = &raid_bdev->base_bdev_info[i]; 1886 1887 if (info->bdev == NULL) { 1888 continue; 1889 } 1890 1891 assert(info->desc); 1892 info->remove_scheduled = true; 1893 1894 if (raid_bdev->destruct_called == true || 1895 raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 1896 /* 1897 * As raid bdev is not registered yet or already unregistered, 1898 * so cleanup should be done here itself. 1899 */ 1900 raid_bdev_free_base_bdev_resource(raid_bdev, i); 1901 if (raid_bdev->num_base_bdevs_discovered == 0) { 1902 /* There is no base bdev for this raid, so free the raid device. */ 1903 raid_bdev_cleanup(raid_bdev); 1904 if (cb_fn) { 1905 cb_fn(cb_arg, 0); 1906 } 1907 return; 1908 } 1909 } 1910 } 1911 1912 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg); 1913 } 1914 1915 /* 1916 * brief: 1917 * raid_bdev_add_base_device function is the actual function which either adds 1918 * the nvme base device to existing raid bdev or create a new raid bdev. It also claims 1919 * the base device and keep the open descriptor. 1920 * params: 1921 * raid_cfg - pointer to raid bdev config 1922 * bdev - pointer to base bdev 1923 * base_bdev_slot - position to add base bdev 1924 * returns: 1925 * 0 - success 1926 * non zero - failure 1927 */ 1928 static int 1929 raid_bdev_add_base_device(struct raid_bdev_config *raid_cfg, struct spdk_bdev *bdev, 1930 uint8_t base_bdev_slot) 1931 { 1932 struct raid_bdev *raid_bdev; 1933 int rc; 1934 1935 raid_bdev = raid_cfg->raid_bdev; 1936 if (!raid_bdev) { 1937 SPDK_ERRLOG("Raid bdev '%s' is not created yet\n", raid_cfg->name); 1938 return -ENODEV; 1939 } 1940 1941 rc = raid_bdev_alloc_base_bdev_resource(raid_bdev, bdev, base_bdev_slot); 1942 if (rc != 0) { 1943 SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", bdev->name); 1944 return rc; 1945 } 1946 1947 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1948 1949 if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) { 1950 rc = raid_bdev_configure(raid_bdev); 1951 if (rc != 0) { 1952 SPDK_ERRLOG("Failed to configure raid bdev\n"); 1953 return rc; 1954 } 1955 } 1956 1957 return 0; 1958 } 1959 1960 /* 1961 * brief: 1962 * Add base bdevs to the raid bdev one by one. Skip any base bdev which doesn't 1963 * exist or fails to add. If all base bdevs are successfully added, the raid bdev 1964 * moves to the configured state and becomes available. Otherwise, the raid bdev 1965 * stays at the configuring state with added base bdevs. 1966 * params: 1967 * raid_cfg - pointer to raid bdev config 1968 * returns: 1969 * 0 - The raid bdev moves to the configured state or stays at the configuring 1970 * state with added base bdevs due to any nonexistent base bdev. 1971 * non zero - Failed to add any base bdev and stays at the configuring state with 1972 * added base bdevs. 1973 */ 1974 int 1975 raid_bdev_add_base_devices(struct raid_bdev_config *raid_cfg) 1976 { 1977 struct spdk_bdev *base_bdev; 1978 uint8_t i; 1979 int rc = 0, _rc; 1980 1981 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 1982 base_bdev = spdk_bdev_get_by_name(raid_cfg->base_bdev[i].name); 1983 if (base_bdev == NULL) { 1984 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "base bdev %s doesn't exist now\n", 1985 raid_cfg->base_bdev[i].name); 1986 continue; 1987 } 1988 1989 _rc = raid_bdev_add_base_device(raid_cfg, base_bdev, i); 1990 if (_rc != 0) { 1991 SPDK_ERRLOG("Failed to add base bdev %s to RAID bdev %s: %s\n", 1992 raid_cfg->base_bdev[i].name, raid_cfg->name, 1993 spdk_strerror(-_rc)); 1994 if (rc == 0) { 1995 rc = _rc; 1996 } 1997 } 1998 } 1999 2000 return rc; 2001 } 2002 2003 /* 2004 * brief: 2005 * raid_bdev_examine function is the examine function call by the below layers 2006 * like bdev_nvme layer. This function will check if this base bdev can be 2007 * claimed by this raid bdev or not. 2008 * params: 2009 * bdev - pointer to base bdev 2010 * returns: 2011 * none 2012 */ 2013 static void 2014 raid_bdev_examine(struct spdk_bdev *bdev) 2015 { 2016 struct raid_bdev_config *raid_cfg; 2017 uint8_t base_bdev_slot; 2018 2019 if (raid_bdev_can_claim_bdev(bdev->name, &raid_cfg, &base_bdev_slot)) { 2020 raid_bdev_add_base_device(raid_cfg, bdev, base_bdev_slot); 2021 } else { 2022 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "bdev %s can't be claimed\n", 2023 bdev->name); 2024 } 2025 2026 spdk_bdev_module_examine_done(&g_raid_if); 2027 } 2028 2029 /* Log component for bdev raid bdev module */ 2030 SPDK_LOG_REGISTER_COMPONENT("bdev_raid", SPDK_LOG_BDEV_RAID) 2031