1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "bdev_raid.h" 35 #include "spdk/env.h" 36 #include "spdk/io_channel.h" 37 #include "spdk/conf.h" 38 #include "spdk_internal/log.h" 39 #include "spdk/string.h" 40 #include "spdk/util.h" 41 #include "spdk/json.h" 42 #include "spdk/string.h" 43 44 static bool g_shutdown_started = false; 45 46 /* raid bdev config as read from config file */ 47 struct raid_config g_raid_config = { 48 .raid_bdev_config_head = TAILQ_HEAD_INITIALIZER(g_raid_config.raid_bdev_config_head), 49 }; 50 51 /* 52 * List of raid bdev in configured list, these raid bdevs are registered with 53 * bdev layer 54 */ 55 struct raid_configured_tailq g_raid_bdev_configured_list = TAILQ_HEAD_INITIALIZER( 56 g_raid_bdev_configured_list); 57 58 /* List of raid bdev in configuring list */ 59 struct raid_configuring_tailq g_raid_bdev_configuring_list = TAILQ_HEAD_INITIALIZER( 60 g_raid_bdev_configuring_list); 61 62 /* List of all raid bdevs */ 63 struct raid_all_tailq g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list); 64 65 /* List of all raid bdevs that are offline */ 66 struct raid_offline_tailq g_raid_bdev_offline_list = TAILQ_HEAD_INITIALIZER( 67 g_raid_bdev_offline_list); 68 69 /* Function declarations */ 70 static void raid_bdev_examine(struct spdk_bdev *bdev); 71 static int raid_bdev_init(void); 72 static void raid_bdev_waitq_io_process(void *ctx); 73 static void raid_bdev_deconfigure(struct raid_bdev *raid_bdev, 74 raid_bdev_destruct_cb cb_fn, void *cb_arg); 75 static void raid_bdev_remove_base_bdev(void *ctx); 76 77 /* 78 * brief: 79 * raid_bdev_create_cb function is a cb function for raid bdev which creates the 80 * hierarchy from raid bdev to base bdev io channels. It will be called per core 81 * params: 82 * io_device - pointer to raid bdev io device represented by raid_bdev 83 * ctx_buf - pointer to context buffer for raid bdev io channel 84 * returns: 85 * 0 - success 86 * non zero - failure 87 */ 88 static int 89 raid_bdev_create_cb(void *io_device, void *ctx_buf) 90 { 91 struct raid_bdev *raid_bdev = io_device; 92 struct raid_bdev_io_channel *raid_ch = ctx_buf; 93 94 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_create_cb, %p\n", raid_ch); 95 96 assert(raid_bdev != NULL); 97 assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); 98 99 raid_ch->num_channels = raid_bdev->num_base_bdevs; 100 101 raid_ch->base_channel = calloc(raid_ch->num_channels, 102 sizeof(struct spdk_io_channel *)); 103 if (!raid_ch->base_channel) { 104 SPDK_ERRLOG("Unable to allocate base bdevs io channel\n"); 105 return -ENOMEM; 106 } 107 for (uint8_t i = 0; i < raid_ch->num_channels; i++) { 108 /* 109 * Get the spdk_io_channel for all the base bdevs. This is used during 110 * split logic to send the respective child bdev ios to respective base 111 * bdev io channel. 112 */ 113 raid_ch->base_channel[i] = spdk_bdev_get_io_channel( 114 raid_bdev->base_bdev_info[i].desc); 115 if (!raid_ch->base_channel[i]) { 116 for (uint8_t j = 0; j < i; j++) { 117 spdk_put_io_channel(raid_ch->base_channel[j]); 118 } 119 free(raid_ch->base_channel); 120 raid_ch->base_channel = NULL; 121 SPDK_ERRLOG("Unable to create io channel for base bdev\n"); 122 return -ENOMEM; 123 } 124 } 125 126 return 0; 127 } 128 129 /* 130 * brief: 131 * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the 132 * hierarchy from raid bdev to base bdev io channels. It will be called per core 133 * params: 134 * io_device - pointer to raid bdev io device represented by raid_bdev 135 * ctx_buf - pointer to context buffer for raid bdev io channel 136 * returns: 137 * none 138 */ 139 static void 140 raid_bdev_destroy_cb(void *io_device, void *ctx_buf) 141 { 142 struct raid_bdev_io_channel *raid_ch = ctx_buf; 143 144 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_destroy_cb\n"); 145 146 assert(raid_ch != NULL); 147 assert(raid_ch->base_channel); 148 for (uint8_t i = 0; i < raid_ch->num_channels; i++) { 149 /* Free base bdev channels */ 150 assert(raid_ch->base_channel[i] != NULL); 151 spdk_put_io_channel(raid_ch->base_channel[i]); 152 } 153 free(raid_ch->base_channel); 154 raid_ch->base_channel = NULL; 155 } 156 157 /* 158 * brief: 159 * raid_bdev_cleanup is used to cleanup and free raid_bdev related data 160 * structures. 161 * params: 162 * raid_bdev - pointer to raid_bdev 163 * returns: 164 * none 165 */ 166 static void 167 raid_bdev_cleanup(struct raid_bdev *raid_bdev) 168 { 169 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_cleanup, %p name %s, state %u, config %p\n", 170 raid_bdev, 171 raid_bdev->bdev.name, raid_bdev->state, raid_bdev->config); 172 if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 173 TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link); 174 } else if (raid_bdev->state == RAID_BDEV_STATE_OFFLINE) { 175 TAILQ_REMOVE(&g_raid_bdev_offline_list, raid_bdev, state_link); 176 } else { 177 assert(0); 178 } 179 TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link); 180 free(raid_bdev->bdev.name); 181 free(raid_bdev->base_bdev_info); 182 if (raid_bdev->config) { 183 raid_bdev->config->raid_bdev = NULL; 184 } 185 free(raid_bdev); 186 } 187 188 /* 189 * brief: 190 * free resource of base bdev for raid bdev 191 * params: 192 * raid_bdev - pointer to raid bdev 193 * base_bdev_slot - position to base bdev in raid bdev 194 * returns: 195 * 0 - success 196 * non zero - failure 197 */ 198 static void 199 raid_bdev_free_base_bdev_resource(struct raid_bdev *raid_bdev, uint8_t base_bdev_slot) 200 { 201 struct raid_base_bdev_info *info; 202 203 info = &raid_bdev->base_bdev_info[base_bdev_slot]; 204 205 spdk_bdev_module_release_bdev(info->bdev); 206 spdk_bdev_close(info->desc); 207 info->desc = NULL; 208 info->bdev = NULL; 209 210 assert(raid_bdev->num_base_bdevs_discovered); 211 raid_bdev->num_base_bdevs_discovered--; 212 } 213 214 /* 215 * brief: 216 * raid_bdev_destruct is the destruct function table pointer for raid bdev 217 * params: 218 * ctxt - pointer to raid_bdev 219 * returns: 220 * 0 - success 221 * non zero - failure 222 */ 223 static int 224 raid_bdev_destruct(void *ctxt) 225 { 226 struct raid_bdev *raid_bdev = ctxt; 227 228 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_destruct\n"); 229 230 raid_bdev->destruct_called = true; 231 for (uint8_t i = 0; i < raid_bdev->num_base_bdevs; i++) { 232 /* 233 * Close all base bdev descriptors for which call has come from below 234 * layers. Also close the descriptors if we have started shutdown. 235 */ 236 if (g_shutdown_started || 237 ((raid_bdev->base_bdev_info[i].remove_scheduled == true) && 238 (raid_bdev->base_bdev_info[i].bdev != NULL))) { 239 raid_bdev_free_base_bdev_resource(raid_bdev, i); 240 } 241 } 242 243 if (g_shutdown_started) { 244 TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link); 245 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 246 TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link); 247 } 248 249 spdk_io_device_unregister(raid_bdev, NULL); 250 251 if (raid_bdev->num_base_bdevs_discovered == 0) { 252 /* Free raid_bdev when there are no base bdevs left */ 253 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev base bdevs is 0, going to free all in destruct\n"); 254 raid_bdev_cleanup(raid_bdev); 255 } 256 257 return 0; 258 } 259 260 /* 261 * brief: 262 * raid_bdev_io_completion function is called by lower layers to notify raid 263 * module that particular bdev_io is completed. 264 * params: 265 * bdev_io - pointer to bdev io submitted to lower layers, like child io 266 * success - bdev_io status 267 * cb_arg - function callback context, like parent io pointer 268 * returns: 269 * none 270 */ 271 static void 272 raid_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 273 { 274 struct spdk_bdev_io *parent_io = cb_arg; 275 276 spdk_bdev_free_io(bdev_io); 277 278 if (success) { 279 spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_SUCCESS); 280 } else { 281 spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_FAILED); 282 } 283 } 284 285 /* 286 * brief: 287 * raid_bdev_submit_rw_request function is used to submit I/O to the correct 288 * member disk 289 * params: 290 * bdev_io - parent bdev io 291 * start_strip - start strip number of this io 292 * returns: 293 * 0 - success 294 * non zero - failure 295 */ 296 static int 297 raid_bdev_submit_rw_request(struct spdk_bdev_io *bdev_io, uint64_t start_strip) 298 { 299 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 300 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(raid_io->ch); 301 struct raid_bdev *raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 302 uint64_t pd_strip; 303 uint32_t offset_in_strip; 304 uint64_t pd_lba; 305 uint64_t pd_blocks; 306 uint8_t pd_idx; 307 int ret = 0; 308 309 pd_strip = start_strip / raid_bdev->num_base_bdevs; 310 pd_idx = start_strip % raid_bdev->num_base_bdevs; 311 offset_in_strip = bdev_io->u.bdev.offset_blocks & (raid_bdev->strip_size - 1); 312 pd_lba = (pd_strip << raid_bdev->strip_size_shift) + offset_in_strip; 313 pd_blocks = bdev_io->u.bdev.num_blocks; 314 if (raid_bdev->base_bdev_info[pd_idx].desc == NULL) { 315 SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx); 316 assert(0); 317 } 318 319 /* 320 * Submit child io to bdev layer with using base bdev descriptors, base 321 * bdev lba, base bdev child io length in blocks, buffer, completion 322 * function and function callback context 323 */ 324 assert(raid_ch != NULL); 325 assert(raid_ch->base_channel); 326 if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) { 327 ret = spdk_bdev_readv_blocks(raid_bdev->base_bdev_info[pd_idx].desc, 328 raid_ch->base_channel[pd_idx], 329 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 330 pd_lba, pd_blocks, raid_bdev_io_completion, 331 bdev_io); 332 } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { 333 ret = spdk_bdev_writev_blocks(raid_bdev->base_bdev_info[pd_idx].desc, 334 raid_ch->base_channel[pd_idx], 335 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 336 pd_lba, pd_blocks, raid_bdev_io_completion, 337 bdev_io); 338 } else { 339 SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type); 340 assert(0); 341 } 342 343 return ret; 344 } 345 346 /* 347 * brief: 348 * get_curr_base_bdev_index function calculates the base bdev index 349 * params: 350 * raid_bdev - pointer to raid bdev 351 * raid_io - pointer to parent io context 352 * returns: 353 * base bdev index 354 */ 355 static uint8_t 356 get_curr_base_bdev_index(struct raid_bdev *raid_bdev, struct raid_bdev_io *raid_io) 357 { 358 struct spdk_bdev_io *bdev_io; 359 uint64_t start_strip; 360 361 bdev_io = SPDK_CONTAINEROF(raid_io, struct spdk_bdev_io, driver_ctx); 362 start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; 363 364 return (start_strip % raid_bdev->num_base_bdevs); 365 } 366 367 /* 368 * brief: 369 * raid_bdev_io_submit_fail_process function processes the IO which failed to submit. 370 * It will try to queue the IOs after storing the context to bdev wait queue logic. 371 * params: 372 * bdev_io - pointer to bdev_io 373 * raid_io - pointer to raid bdev io 374 * ret - return code 375 * returns: 376 * none 377 */ 378 static void 379 raid_bdev_io_submit_fail_process(struct raid_bdev *raid_bdev, struct spdk_bdev_io *bdev_io, 380 struct raid_bdev_io *raid_io, int ret) 381 { 382 struct raid_bdev_io_channel *raid_ch; 383 uint8_t pd_idx; 384 385 if (ret != -ENOMEM) { 386 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 387 } else { 388 /* Queue the IO to bdev layer wait queue */ 389 pd_idx = get_curr_base_bdev_index(raid_bdev, raid_io); 390 raid_io->waitq_entry.bdev = raid_bdev->base_bdev_info[pd_idx].bdev; 391 raid_io->waitq_entry.cb_fn = raid_bdev_waitq_io_process; 392 raid_io->waitq_entry.cb_arg = raid_io; 393 raid_ch = spdk_io_channel_get_ctx(raid_io->ch); 394 if (spdk_bdev_queue_io_wait(raid_bdev->base_bdev_info[pd_idx].bdev, 395 raid_ch->base_channel[pd_idx], 396 &raid_io->waitq_entry) != 0) { 397 SPDK_ERRLOG("bdev io waitq error, it should not happen\n"); 398 assert(0); 399 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 400 } 401 } 402 } 403 404 /* 405 * brief: 406 * raid_bdev_waitq_io_process function is the callback function 407 * registered by raid bdev module to bdev when bdev_io was unavailable. 408 * params: 409 * ctx - pointer to raid_bdev_io 410 * returns: 411 * none 412 */ 413 static void 414 raid_bdev_waitq_io_process(void *ctx) 415 { 416 struct raid_bdev_io *raid_io = ctx; 417 struct spdk_bdev_io *bdev_io; 418 struct raid_bdev *raid_bdev; 419 int ret; 420 uint64_t start_strip; 421 422 bdev_io = SPDK_CONTAINEROF(raid_io, struct spdk_bdev_io, driver_ctx); 423 /* 424 * Try to submit childs of parent bdev io. If failed due to resource 425 * crunch then break the loop and don't try to process other queued IOs. 426 */ 427 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 428 start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; 429 ret = raid_bdev_submit_rw_request(bdev_io, start_strip); 430 if (ret != 0) { 431 raid_bdev_io_submit_fail_process(raid_bdev, bdev_io, raid_io, ret); 432 } 433 } 434 435 /* 436 * brief: 437 * raid_bdev_start_rw_request function is the submit_request function for 438 * read/write requests 439 * params: 440 * ch - pointer to raid bdev io channel 441 * bdev_io - pointer to parent bdev_io on raid bdev device 442 * returns: 443 * none 444 */ 445 static void 446 raid_bdev_start_rw_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 447 { 448 struct raid_bdev_io *raid_io; 449 struct raid_bdev *raid_bdev; 450 uint64_t start_strip = 0; 451 uint64_t end_strip = 0; 452 int ret; 453 454 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 455 raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 456 raid_io->ch = ch; 457 start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; 458 end_strip = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> 459 raid_bdev->strip_size_shift; 460 if (start_strip != end_strip && raid_bdev->num_base_bdevs > 1) { 461 assert(false); 462 SPDK_ERRLOG("I/O spans strip boundary!\n"); 463 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 464 return; 465 } 466 ret = raid_bdev_submit_rw_request(bdev_io, start_strip); 467 if (ret != 0) { 468 raid_bdev_io_submit_fail_process(raid_bdev, bdev_io, raid_io, ret); 469 } 470 } 471 472 /* 473 * brief: 474 * raid_bdev_base_io_completion is the completion callback for member disk requests 475 * params: 476 * bdev_io - pointer to member disk requested bdev_io 477 * success - true if successful, false if unsuccessful 478 * cb_arg - callback argument (parent raid bdev_io) 479 * returns: 480 * none 481 */ 482 static void 483 raid_bdev_base_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 484 { 485 struct spdk_bdev_io *parent_io = cb_arg; 486 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)parent_io->driver_ctx; 487 488 spdk_bdev_free_io(bdev_io); 489 490 if (!success) { 491 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; 492 } 493 494 raid_io->base_bdev_io_completed++; 495 if (raid_io->base_bdev_io_completed == raid_io->base_bdev_io_expected) { 496 spdk_bdev_io_complete(parent_io, raid_io->base_bdev_io_status); 497 } 498 } 499 500 /* 501 * brief: 502 * raid_bdev_base_io_submit_fail_process processes IO requests for member disk 503 * which failed to submit 504 * params: 505 * raid_bdev_io - pointer to raid bdev_io 506 * pd_idx - base_dev index in raid_bdev 507 * cb_fn - callback when the spdk_bdev_io for base_bdev becomes available 508 * ret - return code 509 * returns: 510 * none 511 */ 512 static void 513 raid_bdev_base_io_submit_fail_process(struct spdk_bdev_io *raid_bdev_io, uint8_t pd_idx, 514 spdk_bdev_io_wait_cb cb_fn, int ret) 515 { 516 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)raid_bdev_io->driver_ctx; 517 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(raid_io->ch); 518 struct raid_bdev *raid_bdev = (struct raid_bdev *)raid_bdev_io->bdev->ctxt; 519 520 assert(ret != 0); 521 522 if (ret == -ENOMEM) { 523 raid_io->waitq_entry.bdev = raid_bdev->base_bdev_info[pd_idx].bdev; 524 raid_io->waitq_entry.cb_fn = cb_fn; 525 raid_io->waitq_entry.cb_arg = raid_bdev_io; 526 spdk_bdev_queue_io_wait(raid_bdev->base_bdev_info[pd_idx].bdev, 527 raid_ch->base_channel[pd_idx], 528 &raid_io->waitq_entry); 529 return; 530 } 531 532 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 533 assert(false); 534 spdk_bdev_io_complete(raid_bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 535 } 536 537 /* 538 * brief: 539 * _raid_bdev_submit_reset_request_next function submits the next batch of reset requests 540 * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in 541 * which case it will queue it for later submission 542 * params: 543 * bdev_io - pointer to parent bdev_io on raid bdev device 544 * returns: 545 * none 546 */ 547 static void 548 _raid_bdev_submit_reset_request_next(void *_bdev_io) 549 { 550 struct spdk_bdev_io *bdev_io = _bdev_io; 551 struct raid_bdev_io *raid_io; 552 struct raid_bdev *raid_bdev; 553 struct raid_bdev_io_channel *raid_ch; 554 int ret; 555 uint8_t i; 556 557 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 558 raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 559 raid_ch = spdk_io_channel_get_ctx(raid_io->ch); 560 561 while (raid_io->base_bdev_io_submitted < raid_bdev->num_base_bdevs) { 562 i = raid_io->base_bdev_io_submitted; 563 ret = spdk_bdev_reset(raid_bdev->base_bdev_info[i].desc, 564 raid_ch->base_channel[i], 565 raid_bdev_base_io_completion, bdev_io); 566 if (ret == 0) { 567 raid_io->base_bdev_io_submitted++; 568 } else { 569 raid_bdev_base_io_submit_fail_process(bdev_io, i, 570 _raid_bdev_submit_reset_request_next, ret); 571 return; 572 } 573 } 574 } 575 576 /* 577 * brief: 578 * _raid_bdev_submit_reset_request function is the submit_request function for 579 * reset requests 580 * params: 581 * ch - pointer to raid bdev io channel 582 * bdev_io - pointer to parent bdev_io on raid bdev device 583 * returns: 584 * none 585 */ 586 static void 587 _raid_bdev_submit_reset_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 588 { 589 struct raid_bdev_io *raid_io; 590 struct raid_bdev *raid_bdev; 591 592 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 593 raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 594 raid_io->ch = ch; 595 raid_io->base_bdev_io_submitted = 0; 596 raid_io->base_bdev_io_completed = 0; 597 raid_io->base_bdev_io_expected = raid_bdev->num_base_bdevs; 598 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 599 _raid_bdev_submit_reset_request_next(bdev_io); 600 } 601 602 static inline void 603 _raid_bdev_get_io_range(struct raid_bdev_io_range *io_range, 604 uint8_t num_base_bdevs, uint64_t strip_size, uint64_t strip_size_shift, 605 uint64_t offset_blocks, uint64_t num_blocks) 606 { 607 uint64_t start_strip; 608 uint64_t end_strip; 609 610 io_range->strip_size = strip_size; 611 612 /* The start and end strip index in raid0 bdev scope */ 613 start_strip = offset_blocks >> strip_size_shift; 614 end_strip = (offset_blocks + num_blocks - 1) >> strip_size_shift; 615 io_range->start_strip_in_disk = start_strip / num_base_bdevs; 616 io_range->end_strip_in_disk = end_strip / num_base_bdevs; 617 618 /* The first strip may have unaligned start LBA offset. 619 * The end strip may have unaligned end LBA offset. 620 * Strips between them certainly have aligned offset and length to boundaries. 621 */ 622 io_range->start_offset_in_strip = offset_blocks % strip_size; 623 io_range->end_offset_in_strip = (offset_blocks + num_blocks - 1) % strip_size; 624 625 /* The base bdev indexes in which start and end strips are located */ 626 io_range->start_disk = start_strip % num_base_bdevs; 627 io_range->end_disk = end_strip % num_base_bdevs; 628 629 /* Calculate how many base_bdevs are involved in io operation. 630 * Number of base bdevs involved is between 1 and num_base_bdevs. 631 * It will be 1 if the first strip and last strip are the same one. 632 */ 633 io_range->n_disks_involved = spdk_min((end_strip - start_strip + 1), num_base_bdevs); 634 } 635 636 static inline void 637 _raid_bdev_split_io_range(struct raid_bdev_io_range *io_range, uint8_t disk_idx, 638 uint64_t *_offset_in_disk, uint64_t *_nblocks_in_disk) 639 { 640 uint64_t n_strips_in_disk; 641 uint64_t start_offset_in_disk; 642 uint64_t end_offset_in_disk; 643 uint64_t offset_in_disk; 644 uint64_t nblocks_in_disk; 645 uint64_t start_strip_in_disk; 646 uint64_t end_strip_in_disk; 647 648 start_strip_in_disk = io_range->start_strip_in_disk; 649 if (disk_idx < io_range->start_disk) { 650 start_strip_in_disk += 1; 651 } 652 653 end_strip_in_disk = io_range->end_strip_in_disk; 654 if (disk_idx > io_range->end_disk) { 655 end_strip_in_disk -= 1; 656 } 657 658 assert(end_strip_in_disk >= start_strip_in_disk); 659 n_strips_in_disk = end_strip_in_disk - start_strip_in_disk + 1; 660 661 if (disk_idx == io_range->start_disk) { 662 start_offset_in_disk = io_range->start_offset_in_strip; 663 } else { 664 start_offset_in_disk = 0; 665 } 666 667 if (disk_idx == io_range->end_disk) { 668 end_offset_in_disk = io_range->end_offset_in_strip; 669 } else { 670 end_offset_in_disk = io_range->strip_size - 1; 671 } 672 673 offset_in_disk = start_offset_in_disk + start_strip_in_disk * io_range->strip_size; 674 nblocks_in_disk = (n_strips_in_disk - 1) * io_range->strip_size 675 + end_offset_in_disk - start_offset_in_disk + 1; 676 677 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, 678 "raid_bdev (strip_size 0x%lx) splits IO to base_bdev (%u) at (0x%lx, 0x%lx).\n", 679 io_range->strip_size, disk_idx, offset_in_disk, nblocks_in_disk); 680 681 *_offset_in_disk = offset_in_disk; 682 *_nblocks_in_disk = nblocks_in_disk; 683 } 684 685 /* 686 * brief: 687 * _raid_bdev_submit_null_payload_request_next function submits the next batch of 688 * io requests with range but without payload, like FLUSH and UNMAP, to member disks; 689 * it will submit as many as possible unless one base io request fails with -ENOMEM, 690 * in which case it will queue itself for later submission. 691 * params: 692 * bdev_io - pointer to parent bdev_io on raid bdev device 693 * returns: 694 * none 695 */ 696 static void 697 _raid_bdev_submit_null_payload_request_next(void *_bdev_io) 698 { 699 struct spdk_bdev_io *bdev_io = _bdev_io; 700 struct raid_bdev_io *raid_io; 701 struct raid_bdev *raid_bdev; 702 struct raid_bdev_io_channel *raid_ch; 703 struct raid_bdev_io_range io_range; 704 int ret; 705 706 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 707 raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 708 raid_ch = spdk_io_channel_get_ctx(raid_io->ch); 709 710 _raid_bdev_get_io_range(&io_range, raid_bdev->num_base_bdevs, 711 raid_bdev->strip_size, raid_bdev->strip_size_shift, 712 bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks); 713 714 raid_io->base_bdev_io_expected = io_range.n_disks_involved; 715 716 while (raid_io->base_bdev_io_submitted < raid_io->base_bdev_io_expected) { 717 uint8_t disk_idx; 718 uint64_t offset_in_disk; 719 uint64_t nblocks_in_disk; 720 721 /* base_bdev is started from start_disk to end_disk. 722 * It is possible that index of start_disk is larger than end_disk's. 723 */ 724 disk_idx = (io_range.start_disk + raid_io->base_bdev_io_submitted) % raid_bdev->num_base_bdevs; 725 726 _raid_bdev_split_io_range(&io_range, disk_idx, &offset_in_disk, &nblocks_in_disk); 727 728 switch (bdev_io->type) { 729 case SPDK_BDEV_IO_TYPE_UNMAP: 730 ret = spdk_bdev_unmap_blocks(raid_bdev->base_bdev_info[disk_idx].desc, 731 raid_ch->base_channel[disk_idx], 732 offset_in_disk, nblocks_in_disk, 733 raid_bdev_base_io_completion, bdev_io); 734 break; 735 736 case SPDK_BDEV_IO_TYPE_FLUSH: 737 ret = spdk_bdev_flush_blocks(raid_bdev->base_bdev_info[disk_idx].desc, 738 raid_ch->base_channel[disk_idx], 739 offset_in_disk, nblocks_in_disk, 740 raid_bdev_base_io_completion, bdev_io); 741 break; 742 743 default: 744 SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type); 745 assert(false); 746 ret = -EIO; 747 } 748 749 if (ret == 0) { 750 raid_io->base_bdev_io_submitted++; 751 } else { 752 raid_bdev_base_io_submit_fail_process(bdev_io, disk_idx, 753 _raid_bdev_submit_null_payload_request_next, ret); 754 return; 755 } 756 } 757 } 758 759 /* 760 * brief: 761 * _raid_bdev_submit_null_payload_request function is the submit_request function 762 * for io requests with range but without payload, like UNMAP and FLUSH. 763 * params: 764 * ch - pointer to raid bdev io channel 765 * bdev_io - pointer to parent bdev_io on raid bdev device 766 * returns: 767 * none 768 */ 769 static void 770 _raid_bdev_submit_null_payload_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 771 { 772 struct raid_bdev_io *raid_io; 773 774 raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 775 raid_io->ch = ch; 776 raid_io->base_bdev_io_submitted = 0; 777 raid_io->base_bdev_io_completed = 0; 778 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 779 780 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev: type %d, range (0x%lx, 0x%lx)\n", 781 bdev_io->type, bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks); 782 783 _raid_bdev_submit_null_payload_request_next(bdev_io); 784 } 785 786 /* 787 * brief: 788 * Callback function to spdk_bdev_io_get_buf. 789 * params: 790 * ch - pointer to raid bdev io channel 791 * bdev_io - pointer to parent bdev_io on raid bdev device 792 * success - True if buffer is allocated or false otherwise. 793 * returns: 794 * none 795 */ 796 static void 797 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 798 bool success) 799 { 800 if (!success) { 801 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 802 return; 803 } 804 805 raid_bdev_start_rw_request(ch, bdev_io); 806 } 807 808 /* 809 * brief: 810 * raid_bdev_submit_request function is the submit_request function pointer of 811 * raid bdev function table. This is used to submit the io on raid_bdev to below 812 * layers. 813 * params: 814 * ch - pointer to raid bdev io channel 815 * bdev_io - pointer to parent bdev_io on raid bdev device 816 * returns: 817 * none 818 */ 819 static void 820 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 821 { 822 switch (bdev_io->type) { 823 case SPDK_BDEV_IO_TYPE_READ: 824 spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb, 825 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 826 break; 827 case SPDK_BDEV_IO_TYPE_WRITE: 828 raid_bdev_start_rw_request(ch, bdev_io); 829 break; 830 831 case SPDK_BDEV_IO_TYPE_RESET: 832 _raid_bdev_submit_reset_request(ch, bdev_io); 833 break; 834 835 case SPDK_BDEV_IO_TYPE_FLUSH: 836 case SPDK_BDEV_IO_TYPE_UNMAP: 837 _raid_bdev_submit_null_payload_request(ch, bdev_io); 838 break; 839 840 default: 841 SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type); 842 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 843 break; 844 } 845 846 } 847 848 /* 849 * brief: 850 * _raid_bdev_io_type_supported checks whether io_type is supported in 851 * all base bdev modules of raid bdev module. If anyone among the base_bdevs 852 * doesn't support, the raid device doesn't supports. 853 * 854 * params: 855 * raid_bdev - pointer to raid bdev context 856 * io_type - io type 857 * returns: 858 * true - io_type is supported 859 * false - io_type is not supported 860 */ 861 inline static bool 862 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type) 863 { 864 uint8_t i; 865 866 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 867 if (raid_bdev->base_bdev_info[i].bdev == NULL) { 868 assert(false); 869 continue; 870 } 871 872 if (spdk_bdev_io_type_supported(raid_bdev->base_bdev_info[i].bdev, 873 io_type) == false) { 874 return false; 875 } 876 } 877 878 return true; 879 } 880 881 /* 882 * brief: 883 * raid_bdev_io_type_supported is the io_supported function for bdev function 884 * table which returns whether the particular io type is supported or not by 885 * raid bdev module 886 * params: 887 * ctx - pointer to raid bdev context 888 * type - io type 889 * returns: 890 * true - io_type is supported 891 * false - io_type is not supported 892 */ 893 static bool 894 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 895 { 896 switch (io_type) { 897 case SPDK_BDEV_IO_TYPE_READ: 898 case SPDK_BDEV_IO_TYPE_WRITE: 899 return true; 900 901 case SPDK_BDEV_IO_TYPE_FLUSH: 902 case SPDK_BDEV_IO_TYPE_RESET: 903 case SPDK_BDEV_IO_TYPE_UNMAP: 904 return _raid_bdev_io_type_supported(ctx, io_type); 905 906 default: 907 return false; 908 } 909 910 return false; 911 } 912 913 /* 914 * brief: 915 * raid_bdev_get_io_channel is the get_io_channel function table pointer for 916 * raid bdev. This is used to return the io channel for this raid bdev 917 * params: 918 * ctxt - pointer to raid_bdev 919 * returns: 920 * pointer to io channel for raid bdev 921 */ 922 static struct spdk_io_channel * 923 raid_bdev_get_io_channel(void *ctxt) 924 { 925 struct raid_bdev *raid_bdev = ctxt; 926 927 return spdk_get_io_channel(raid_bdev); 928 } 929 930 /* 931 * brief: 932 * raid_bdev_dump_info_json is the function table pointer for raid bdev 933 * params: 934 * ctx - pointer to raid_bdev 935 * w - pointer to json context 936 * returns: 937 * 0 - success 938 * non zero - failure 939 */ 940 static int 941 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 942 { 943 struct raid_bdev *raid_bdev = ctx; 944 945 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_dump_config_json\n"); 946 assert(raid_bdev != NULL); 947 948 /* Dump the raid bdev configuration related information */ 949 spdk_json_write_named_object_begin(w, "raid"); 950 spdk_json_write_named_uint32(w, "strip_size", raid_bdev->strip_size); 951 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 952 spdk_json_write_named_uint32(w, "state", raid_bdev->state); 953 spdk_json_write_named_uint32(w, "raid_level", raid_bdev->raid_level); 954 spdk_json_write_named_uint32(w, "destruct_called", raid_bdev->destruct_called); 955 spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs); 956 spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered); 957 spdk_json_write_name(w, "base_bdevs_list"); 958 spdk_json_write_array_begin(w); 959 for (uint8_t i = 0; i < raid_bdev->num_base_bdevs; i++) { 960 if (raid_bdev->base_bdev_info[i].bdev) { 961 spdk_json_write_string(w, raid_bdev->base_bdev_info[i].bdev->name); 962 } else { 963 spdk_json_write_null(w); 964 } 965 } 966 spdk_json_write_array_end(w); 967 spdk_json_write_object_end(w); 968 969 return 0; 970 } 971 972 /* 973 * brief: 974 * raid_bdev_write_config_json is the function table pointer for raid bdev 975 * params: 976 * bdev - pointer to spdk_bdev 977 * w - pointer to json context 978 * returns: 979 * none 980 */ 981 static void 982 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 983 { 984 struct raid_bdev *raid_bdev = bdev->ctxt; 985 struct spdk_bdev *base; 986 uint8_t i; 987 988 spdk_json_write_object_begin(w); 989 990 spdk_json_write_named_string(w, "method", "construct_raid_bdev"); 991 992 spdk_json_write_named_object_begin(w, "params"); 993 spdk_json_write_named_string(w, "name", bdev->name); 994 spdk_json_write_named_uint32(w, "strip_size", raid_bdev->strip_size_kb); 995 spdk_json_write_named_uint32(w, "raid_level", raid_bdev->raid_level); 996 997 spdk_json_write_named_array_begin(w, "base_bdevs"); 998 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 999 base = raid_bdev->base_bdev_info[i].bdev; 1000 if (base) { 1001 spdk_json_write_string(w, base->name); 1002 } 1003 } 1004 spdk_json_write_array_end(w); 1005 spdk_json_write_object_end(w); 1006 1007 spdk_json_write_object_end(w); 1008 } 1009 1010 /* g_raid_bdev_fn_table is the function table for raid bdev */ 1011 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = { 1012 .destruct = raid_bdev_destruct, 1013 .submit_request = raid_bdev_submit_request, 1014 .io_type_supported = raid_bdev_io_type_supported, 1015 .get_io_channel = raid_bdev_get_io_channel, 1016 .dump_info_json = raid_bdev_dump_info_json, 1017 .write_config_json = raid_bdev_write_config_json, 1018 }; 1019 1020 /* 1021 * brief: 1022 * raid_bdev_config_cleanup function is used to free memory for one raid_bdev in configuration 1023 * params: 1024 * raid_cfg - pointer to raid_bdev_config structure 1025 * returns: 1026 * none 1027 */ 1028 void 1029 raid_bdev_config_cleanup(struct raid_bdev_config *raid_cfg) 1030 { 1031 uint8_t i; 1032 1033 TAILQ_REMOVE(&g_raid_config.raid_bdev_config_head, raid_cfg, link); 1034 g_raid_config.total_raid_bdev--; 1035 1036 if (raid_cfg->base_bdev) { 1037 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 1038 free(raid_cfg->base_bdev[i].name); 1039 } 1040 free(raid_cfg->base_bdev); 1041 } 1042 free(raid_cfg->name); 1043 free(raid_cfg); 1044 } 1045 1046 /* 1047 * brief: 1048 * raid_bdev_free is the raid bdev function table function pointer. This is 1049 * called on bdev free path 1050 * params: 1051 * none 1052 * returns: 1053 * none 1054 */ 1055 static void 1056 raid_bdev_free(void) 1057 { 1058 struct raid_bdev_config *raid_cfg, *tmp; 1059 1060 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_free\n"); 1061 TAILQ_FOREACH_SAFE(raid_cfg, &g_raid_config.raid_bdev_config_head, link, tmp) { 1062 raid_bdev_config_cleanup(raid_cfg); 1063 } 1064 } 1065 1066 /* brief 1067 * raid_bdev_config_find_by_name is a helper function to find raid bdev config 1068 * by name as key. 1069 * 1070 * params: 1071 * raid_name - name for raid bdev. 1072 */ 1073 struct raid_bdev_config * 1074 raid_bdev_config_find_by_name(const char *raid_name) 1075 { 1076 struct raid_bdev_config *raid_cfg; 1077 1078 TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) { 1079 if (!strcmp(raid_cfg->name, raid_name)) { 1080 return raid_cfg; 1081 } 1082 } 1083 1084 return raid_cfg; 1085 } 1086 1087 /* 1088 * brief 1089 * raid_bdev_config_add function adds config for newly created raid bdev. 1090 * 1091 * params: 1092 * raid_name - name for raid bdev. 1093 * strip_size - strip size in KB 1094 * num_base_bdevs - number of base bdevs. 1095 * raid_level - raid level, only raid level 0 is supported. 1096 * _raid_cfg - Pointer to newly added configuration 1097 */ 1098 int 1099 raid_bdev_config_add(const char *raid_name, uint32_t strip_size, uint8_t num_base_bdevs, 1100 uint8_t raid_level, struct raid_bdev_config **_raid_cfg) 1101 { 1102 struct raid_bdev_config *raid_cfg; 1103 1104 raid_cfg = raid_bdev_config_find_by_name(raid_name); 1105 if (raid_cfg != NULL) { 1106 SPDK_ERRLOG("Duplicate raid bdev name found in config file %s\n", 1107 raid_name); 1108 return -EEXIST; 1109 } 1110 1111 if (spdk_u32_is_pow2(strip_size) == false) { 1112 SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size); 1113 return -EINVAL; 1114 } 1115 1116 if (num_base_bdevs == 0) { 1117 SPDK_ERRLOG("Invalid base device count %u\n", num_base_bdevs); 1118 return -EINVAL; 1119 } 1120 1121 if (raid_level != 0) { 1122 SPDK_ERRLOG("invalid raid level %u, only raid level 0 is supported\n", 1123 raid_level); 1124 return -EINVAL; 1125 } 1126 1127 raid_cfg = calloc(1, sizeof(*raid_cfg)); 1128 if (raid_cfg == NULL) { 1129 SPDK_ERRLOG("unable to allocate memory\n"); 1130 return -ENOMEM; 1131 } 1132 1133 raid_cfg->name = strdup(raid_name); 1134 if (!raid_cfg->name) { 1135 free(raid_cfg); 1136 SPDK_ERRLOG("unable to allocate memory\n"); 1137 return -ENOMEM; 1138 } 1139 raid_cfg->strip_size = strip_size; 1140 raid_cfg->num_base_bdevs = num_base_bdevs; 1141 raid_cfg->raid_level = raid_level; 1142 1143 raid_cfg->base_bdev = calloc(num_base_bdevs, sizeof(*raid_cfg->base_bdev)); 1144 if (raid_cfg->base_bdev == NULL) { 1145 free(raid_cfg->name); 1146 free(raid_cfg); 1147 SPDK_ERRLOG("unable to allocate memory\n"); 1148 return -ENOMEM; 1149 } 1150 1151 TAILQ_INSERT_TAIL(&g_raid_config.raid_bdev_config_head, raid_cfg, link); 1152 g_raid_config.total_raid_bdev++; 1153 1154 *_raid_cfg = raid_cfg; 1155 return 0; 1156 } 1157 1158 /* 1159 * brief: 1160 * raid_bdev_config_add_base_bdev function add base bdev to raid bdev config. 1161 * 1162 * params: 1163 * raid_cfg - pointer to raid bdev configuration 1164 * base_bdev_name - name of base bdev 1165 * slot - Position to add base bdev 1166 */ 1167 int 1168 raid_bdev_config_add_base_bdev(struct raid_bdev_config *raid_cfg, const char *base_bdev_name, 1169 uint8_t slot) 1170 { 1171 uint8_t i; 1172 struct raid_bdev_config *tmp; 1173 1174 if (slot >= raid_cfg->num_base_bdevs) { 1175 return -EINVAL; 1176 } 1177 1178 TAILQ_FOREACH(tmp, &g_raid_config.raid_bdev_config_head, link) { 1179 for (i = 0; i < tmp->num_base_bdevs; i++) { 1180 if (tmp->base_bdev[i].name != NULL) { 1181 if (!strcmp(tmp->base_bdev[i].name, base_bdev_name)) { 1182 SPDK_ERRLOG("duplicate base bdev name %s mentioned\n", 1183 base_bdev_name); 1184 return -EEXIST; 1185 } 1186 } 1187 } 1188 } 1189 1190 raid_cfg->base_bdev[slot].name = strdup(base_bdev_name); 1191 if (raid_cfg->base_bdev[slot].name == NULL) { 1192 SPDK_ERRLOG("unable to allocate memory\n"); 1193 return -ENOMEM; 1194 } 1195 1196 return 0; 1197 } 1198 /* 1199 * brief: 1200 * raid_bdev_parse_raid is used to parse the raid bdev from config file based on 1201 * pre-defined raid bdev format in config file. 1202 * Format of config file: 1203 * [RAID1] 1204 * Name raid1 1205 * StripSize 64 1206 * NumDevices 2 1207 * RaidLevel 0 1208 * Devices Nvme0n1 Nvme1n1 1209 * 1210 * [RAID2] 1211 * Name raid2 1212 * StripSize 64 1213 * NumDevices 3 1214 * RaidLevel 0 1215 * Devices Nvme2n1 Nvme3n1 Nvme4n1 1216 * 1217 * params: 1218 * conf_section - pointer to config section 1219 * returns: 1220 * 0 - success 1221 * non zero - failure 1222 */ 1223 static int 1224 raid_bdev_parse_raid(struct spdk_conf_section *conf_section) 1225 { 1226 const char *raid_name; 1227 uint32_t strip_size; 1228 uint8_t num_base_bdevs, raid_level; 1229 const char *base_bdev_name; 1230 struct raid_bdev_config *raid_cfg; 1231 int rc, i, val; 1232 1233 raid_name = spdk_conf_section_get_val(conf_section, "Name"); 1234 if (raid_name == NULL) { 1235 SPDK_ERRLOG("raid_name is null\n"); 1236 return -EINVAL; 1237 } 1238 1239 val = spdk_conf_section_get_intval(conf_section, "StripSize"); 1240 if (val < 0) { 1241 return -EINVAL; 1242 } 1243 strip_size = val; 1244 1245 val = spdk_conf_section_get_intval(conf_section, "NumDevices"); 1246 if (val < 0) { 1247 return -EINVAL; 1248 } 1249 num_base_bdevs = val; 1250 1251 val = spdk_conf_section_get_intval(conf_section, "RaidLevel"); 1252 if (val < 0) { 1253 return -EINVAL; 1254 } 1255 raid_level = val; 1256 1257 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "%s %" PRIu32 " %u %u\n", 1258 raid_name, strip_size, num_base_bdevs, raid_level); 1259 1260 rc = raid_bdev_config_add(raid_name, strip_size, num_base_bdevs, raid_level, 1261 &raid_cfg); 1262 if (rc != 0) { 1263 SPDK_ERRLOG("Failed to add raid bdev config\n"); 1264 return rc; 1265 } 1266 1267 for (i = 0; true; i++) { 1268 base_bdev_name = spdk_conf_section_get_nmval(conf_section, "Devices", 0, i); 1269 if (base_bdev_name == NULL) { 1270 break; 1271 } 1272 if (i >= num_base_bdevs) { 1273 raid_bdev_config_cleanup(raid_cfg); 1274 SPDK_ERRLOG("Number of devices mentioned is more than count\n"); 1275 return -EINVAL; 1276 } 1277 1278 rc = raid_bdev_config_add_base_bdev(raid_cfg, base_bdev_name, i); 1279 if (rc != 0) { 1280 raid_bdev_config_cleanup(raid_cfg); 1281 SPDK_ERRLOG("Failed to add base bdev to raid bdev config\n"); 1282 return rc; 1283 } 1284 } 1285 1286 if (i != raid_cfg->num_base_bdevs) { 1287 raid_bdev_config_cleanup(raid_cfg); 1288 SPDK_ERRLOG("Number of devices mentioned is less than count\n"); 1289 return -EINVAL; 1290 } 1291 1292 rc = raid_bdev_create(raid_cfg); 1293 if (rc != 0) { 1294 raid_bdev_config_cleanup(raid_cfg); 1295 SPDK_ERRLOG("Failed to create raid bdev\n"); 1296 return rc; 1297 } 1298 1299 rc = raid_bdev_add_base_devices(raid_cfg); 1300 if (rc != 0) { 1301 SPDK_ERRLOG("Failed to add any base bdev to raid bdev\n"); 1302 /* Config is not removed in this case. */ 1303 } 1304 1305 return 0; 1306 } 1307 1308 /* 1309 * brief: 1310 * raid_bdev_parse_config is used to find the raid bdev config section and parse it 1311 * Format of config file: 1312 * params: 1313 * none 1314 * returns: 1315 * 0 - success 1316 * non zero - failure 1317 */ 1318 static int 1319 raid_bdev_parse_config(void) 1320 { 1321 int ret; 1322 struct spdk_conf_section *conf_section; 1323 1324 conf_section = spdk_conf_first_section(NULL); 1325 while (conf_section != NULL) { 1326 if (spdk_conf_section_match_prefix(conf_section, "RAID")) { 1327 ret = raid_bdev_parse_raid(conf_section); 1328 if (ret < 0) { 1329 SPDK_ERRLOG("Unable to parse raid bdev section\n"); 1330 return ret; 1331 } 1332 } 1333 conf_section = spdk_conf_next_section(conf_section); 1334 } 1335 1336 return 0; 1337 } 1338 1339 /* 1340 * brief: 1341 * raid_bdev_fini_start is called when bdev layer is starting the 1342 * shutdown process 1343 * params: 1344 * none 1345 * returns: 1346 * none 1347 */ 1348 static void 1349 raid_bdev_fini_start(void) 1350 { 1351 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_fini_start\n"); 1352 g_shutdown_started = true; 1353 } 1354 1355 /* 1356 * brief: 1357 * raid_bdev_exit is called on raid bdev module exit time by bdev layer 1358 * params: 1359 * none 1360 * returns: 1361 * none 1362 */ 1363 static void 1364 raid_bdev_exit(void) 1365 { 1366 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_exit\n"); 1367 raid_bdev_free(); 1368 } 1369 1370 /* 1371 * brief: 1372 * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid 1373 * module 1374 * params: 1375 * none 1376 * returns: 1377 * size of spdk_bdev_io context for raid 1378 */ 1379 static int 1380 raid_bdev_get_ctx_size(void) 1381 { 1382 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_get_ctx_size\n"); 1383 return sizeof(struct raid_bdev_io); 1384 } 1385 1386 /* 1387 * brief: 1388 * raid_bdev_get_running_config is used to get the configuration options. 1389 * 1390 * params: 1391 * fp - The pointer to a file that will be written to the configuration options. 1392 * returns: 1393 * none 1394 */ 1395 static void 1396 raid_bdev_get_running_config(FILE *fp) 1397 { 1398 struct raid_bdev *raid_bdev; 1399 struct spdk_bdev *base; 1400 int index = 1; 1401 uint8_t i; 1402 1403 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_configured_list, state_link) { 1404 fprintf(fp, 1405 "\n" 1406 "[RAID%d]\n" 1407 " Name %s\n" 1408 " StripSize %" PRIu32 "\n" 1409 " NumDevices %u\n" 1410 " RaidLevel %hhu\n", 1411 index, raid_bdev->bdev.name, raid_bdev->strip_size_kb, 1412 raid_bdev->num_base_bdevs, raid_bdev->raid_level); 1413 fprintf(fp, 1414 " Devices "); 1415 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1416 base = raid_bdev->base_bdev_info[i].bdev; 1417 if (base) { 1418 fprintf(fp, 1419 "%s ", 1420 base->name); 1421 } 1422 } 1423 fprintf(fp, 1424 "\n"); 1425 index++; 1426 } 1427 } 1428 1429 /* 1430 * brief: 1431 * raid_bdev_can_claim_bdev is the function to check if this base_bdev can be 1432 * claimed by raid bdev or not. 1433 * params: 1434 * bdev_name - represents base bdev name 1435 * _raid_cfg - pointer to raid bdev config parsed from config file 1436 * base_bdev_slot - if bdev can be claimed, it represents the base_bdev correct 1437 * slot. This field is only valid if return value of this function is true 1438 * returns: 1439 * true - if bdev can be claimed 1440 * false - if bdev can't be claimed 1441 */ 1442 static bool 1443 raid_bdev_can_claim_bdev(const char *bdev_name, struct raid_bdev_config **_raid_cfg, 1444 uint8_t *base_bdev_slot) 1445 { 1446 struct raid_bdev_config *raid_cfg; 1447 uint8_t i; 1448 1449 TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) { 1450 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 1451 /* 1452 * Check if the base bdev name is part of raid bdev configuration. 1453 * If match is found then return true and the slot information where 1454 * this base bdev should be inserted in raid bdev 1455 */ 1456 if (!strcmp(bdev_name, raid_cfg->base_bdev[i].name)) { 1457 *_raid_cfg = raid_cfg; 1458 *base_bdev_slot = i; 1459 return true; 1460 } 1461 } 1462 } 1463 1464 return false; 1465 } 1466 1467 1468 static struct spdk_bdev_module g_raid_if = { 1469 .name = "raid", 1470 .module_init = raid_bdev_init, 1471 .fini_start = raid_bdev_fini_start, 1472 .module_fini = raid_bdev_exit, 1473 .get_ctx_size = raid_bdev_get_ctx_size, 1474 .examine_config = raid_bdev_examine, 1475 .config_text = raid_bdev_get_running_config, 1476 .async_init = false, 1477 .async_fini = false, 1478 }; 1479 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if) 1480 1481 /* 1482 * brief: 1483 * raid_bdev_init is the initialization function for raid bdev module 1484 * params: 1485 * none 1486 * returns: 1487 * 0 - success 1488 * non zero - failure 1489 */ 1490 static int 1491 raid_bdev_init(void) 1492 { 1493 int ret; 1494 1495 /* Parse config file for raids */ 1496 ret = raid_bdev_parse_config(); 1497 if (ret < 0) { 1498 SPDK_ERRLOG("raid bdev init failed parsing\n"); 1499 raid_bdev_free(); 1500 return ret; 1501 } 1502 1503 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_init completed successfully\n"); 1504 1505 return 0; 1506 } 1507 1508 /* 1509 * brief: 1510 * raid_bdev_create allocates raid bdev based on passed configuration 1511 * params: 1512 * raid_cfg - configuration of raid bdev 1513 * returns: 1514 * 0 - success 1515 * non zero - failure 1516 */ 1517 int 1518 raid_bdev_create(struct raid_bdev_config *raid_cfg) 1519 { 1520 struct raid_bdev *raid_bdev; 1521 struct spdk_bdev *raid_bdev_gen; 1522 1523 raid_bdev = calloc(1, sizeof(*raid_bdev)); 1524 if (!raid_bdev) { 1525 SPDK_ERRLOG("Unable to allocate memory for raid bdev\n"); 1526 return -ENOMEM; 1527 } 1528 1529 assert(raid_cfg->num_base_bdevs != 0); 1530 raid_bdev->num_base_bdevs = raid_cfg->num_base_bdevs; 1531 raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, 1532 sizeof(struct raid_base_bdev_info)); 1533 if (!raid_bdev->base_bdev_info) { 1534 SPDK_ERRLOG("Unable able to allocate base bdev info\n"); 1535 free(raid_bdev); 1536 return -ENOMEM; 1537 } 1538 1539 /* strip_size_kb is from the rpc param. strip_size is in blocks and used 1540 * intnerally and set later. 1541 */ 1542 raid_bdev->strip_size = 0; 1543 raid_bdev->strip_size_kb = raid_cfg->strip_size; 1544 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1545 raid_bdev->config = raid_cfg; 1546 1547 raid_bdev_gen = &raid_bdev->bdev; 1548 1549 raid_bdev_gen->name = strdup(raid_cfg->name); 1550 if (!raid_bdev_gen->name) { 1551 SPDK_ERRLOG("Unable to allocate name for raid\n"); 1552 free(raid_bdev->base_bdev_info); 1553 free(raid_bdev); 1554 return -ENOMEM; 1555 } 1556 1557 raid_bdev_gen->product_name = "Raid Volume"; 1558 raid_bdev_gen->ctxt = raid_bdev; 1559 raid_bdev_gen->fn_table = &g_raid_bdev_fn_table; 1560 raid_bdev_gen->module = &g_raid_if; 1561 raid_bdev_gen->write_cache = 0; 1562 1563 TAILQ_INSERT_TAIL(&g_raid_bdev_configuring_list, raid_bdev, state_link); 1564 TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link); 1565 1566 raid_cfg->raid_bdev = raid_bdev; 1567 1568 return 0; 1569 } 1570 1571 /* 1572 * brief 1573 * raid_bdev_alloc_base_bdev_resource allocates resource of base bdev. 1574 * params: 1575 * raid_bdev - pointer to raid bdev 1576 * bdev - pointer to base bdev 1577 * base_bdev_slot - position to add base bdev 1578 * returns: 1579 * 0 - success 1580 * non zero - failure 1581 */ 1582 static int 1583 raid_bdev_alloc_base_bdev_resource(struct raid_bdev *raid_bdev, struct spdk_bdev *bdev, 1584 uint8_t base_bdev_slot) 1585 { 1586 struct spdk_bdev_desc *desc; 1587 int rc; 1588 1589 rc = spdk_bdev_open(bdev, true, raid_bdev_remove_base_bdev, bdev, &desc); 1590 if (rc != 0) { 1591 SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", bdev->name); 1592 return rc; 1593 } 1594 1595 rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if); 1596 if (rc != 0) { 1597 SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); 1598 spdk_bdev_close(desc); 1599 return rc; 1600 } 1601 1602 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "bdev %s is claimed\n", bdev->name); 1603 1604 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 1605 assert(base_bdev_slot < raid_bdev->num_base_bdevs); 1606 1607 raid_bdev->base_bdev_info[base_bdev_slot].bdev = bdev; 1608 raid_bdev->base_bdev_info[base_bdev_slot].desc = desc; 1609 raid_bdev->num_base_bdevs_discovered++; 1610 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1611 1612 return 0; 1613 } 1614 1615 /* 1616 * brief: 1617 * If raid bdev config is complete, then only register the raid bdev to 1618 * bdev layer and remove this raid bdev from configuring list and 1619 * insert the raid bdev to configured list 1620 * params: 1621 * raid_bdev - pointer to raid bdev 1622 * returns: 1623 * 0 - success 1624 * non zero - failure 1625 */ 1626 static int 1627 raid_bdev_configure(struct raid_bdev *raid_bdev) 1628 { 1629 uint32_t blocklen; 1630 uint64_t min_blockcnt; 1631 struct spdk_bdev *raid_bdev_gen; 1632 int rc = 0; 1633 1634 blocklen = raid_bdev->base_bdev_info[0].bdev->blocklen; 1635 min_blockcnt = raid_bdev->base_bdev_info[0].bdev->blockcnt; 1636 for (uint8_t i = 1; i < raid_bdev->num_base_bdevs; i++) { 1637 /* Calculate minimum block count from all base bdevs */ 1638 if (raid_bdev->base_bdev_info[i].bdev->blockcnt < min_blockcnt) { 1639 min_blockcnt = raid_bdev->base_bdev_info[i].bdev->blockcnt; 1640 } 1641 1642 /* Check blocklen for all base bdevs that it should be same */ 1643 if (blocklen != raid_bdev->base_bdev_info[i].bdev->blocklen) { 1644 /* 1645 * Assumption is that all the base bdevs for any raid bdev should 1646 * have same blocklen 1647 */ 1648 SPDK_ERRLOG("Blocklen of various bdevs not matching\n"); 1649 return -EINVAL; 1650 } 1651 } 1652 1653 /* The strip_size_kb is read in from user in KB. Convert to blocks here for 1654 * internal use. 1655 */ 1656 raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / blocklen; 1657 raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size); 1658 raid_bdev->blocklen_shift = spdk_u32log2(blocklen); 1659 1660 raid_bdev_gen = &raid_bdev->bdev; 1661 raid_bdev_gen->blocklen = blocklen; 1662 if (raid_bdev->num_base_bdevs > 1) { 1663 raid_bdev_gen->optimal_io_boundary = raid_bdev->strip_size; 1664 raid_bdev_gen->split_on_optimal_io_boundary = true; 1665 } else { 1666 /* Do not need to split reads/writes on single bdev RAID modules. */ 1667 raid_bdev_gen->optimal_io_boundary = 0; 1668 raid_bdev_gen->split_on_optimal_io_boundary = false; 1669 } 1670 1671 /* 1672 * RAID bdev logic is for striping so take the minimum block count based 1673 * approach where total block count of raid bdev is the number of base 1674 * bdev times the minimum block count of any base bdev 1675 */ 1676 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "min blockcount %lu, numbasedev %u, strip size shift %u\n", 1677 min_blockcnt, 1678 raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift); 1679 raid_bdev_gen->blockcnt = ((min_blockcnt >> raid_bdev->strip_size_shift) << 1680 raid_bdev->strip_size_shift) * raid_bdev->num_base_bdevs; 1681 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "io device register %p\n", raid_bdev); 1682 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "blockcnt %lu, blocklen %u\n", raid_bdev_gen->blockcnt, 1683 raid_bdev_gen->blocklen); 1684 if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 1685 raid_bdev->state = RAID_BDEV_STATE_ONLINE; 1686 spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb, 1687 sizeof(struct raid_bdev_io_channel), 1688 raid_bdev->bdev.name); 1689 rc = spdk_bdev_register(raid_bdev_gen); 1690 if (rc != 0) { 1691 SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n"); 1692 spdk_io_device_unregister(raid_bdev, NULL); 1693 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1694 return rc; 1695 } 1696 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev generic %p\n", raid_bdev_gen); 1697 TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link); 1698 TAILQ_INSERT_TAIL(&g_raid_bdev_configured_list, raid_bdev, state_link); 1699 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev is created with name %s, raid_bdev %p\n", 1700 raid_bdev_gen->name, raid_bdev); 1701 } 1702 1703 return 0; 1704 } 1705 1706 /* 1707 * brief: 1708 * If raid bdev is online and registered, change the bdev state to 1709 * configuring and unregister this raid device. Queue this raid device 1710 * in configuring list 1711 * params: 1712 * raid_bdev - pointer to raid bdev 1713 * cb_fn - callback function 1714 * cb_arg - argument to callback function 1715 * returns: 1716 * none 1717 */ 1718 static void 1719 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, 1720 void *cb_arg) 1721 { 1722 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1723 if (cb_fn) { 1724 cb_fn(cb_arg, 0); 1725 } 1726 return; 1727 } 1728 1729 assert(raid_bdev->num_base_bdevs == raid_bdev->num_base_bdevs_discovered); 1730 TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link); 1731 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 1732 assert(raid_bdev->num_base_bdevs_discovered); 1733 TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link); 1734 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev state chaning from online to offline\n"); 1735 1736 spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg); 1737 } 1738 1739 /* 1740 * brief: 1741 * raid_bdev_find_by_base_bdev function finds the raid bdev which has 1742 * claimed the base bdev. 1743 * params: 1744 * base_bdev - pointer to base bdev pointer 1745 * _raid_bdev - Referenct to pointer to raid bdev 1746 * _base_bdev_slot - Reference to the slot of the base bdev. 1747 * returns: 1748 * true - if the raid bdev is found. 1749 * false - if the raid bdev is not found. 1750 */ 1751 static bool 1752 raid_bdev_find_by_base_bdev(struct spdk_bdev *base_bdev, struct raid_bdev **_raid_bdev, 1753 uint8_t *_base_bdev_slot) 1754 { 1755 struct raid_bdev *raid_bdev; 1756 uint8_t i; 1757 1758 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1759 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1760 if (raid_bdev->base_bdev_info[i].bdev == base_bdev) { 1761 *_raid_bdev = raid_bdev; 1762 *_base_bdev_slot = i; 1763 return true; 1764 } 1765 } 1766 } 1767 1768 return false; 1769 } 1770 1771 /* 1772 * brief: 1773 * raid_bdev_remove_base_bdev function is called by below layers when base_bdev 1774 * is removed. This function checks if this base bdev is part of any raid bdev 1775 * or not. If yes, it takes necessary action on that particular raid bdev. 1776 * params: 1777 * ctx - pointer to base bdev pointer which got removed 1778 * returns: 1779 * none 1780 */ 1781 static void 1782 raid_bdev_remove_base_bdev(void *ctx) 1783 { 1784 struct spdk_bdev *base_bdev = ctx; 1785 struct raid_bdev *raid_bdev = NULL; 1786 uint8_t base_bdev_slot = 0; 1787 1788 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_remove_base_bdev\n"); 1789 1790 /* Find the raid_bdev which has claimed this base_bdev */ 1791 if (!raid_bdev_find_by_base_bdev(base_bdev, &raid_bdev, &base_bdev_slot)) { 1792 SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); 1793 return; 1794 } 1795 1796 assert(raid_bdev->base_bdev_info[base_bdev_slot].desc); 1797 raid_bdev->base_bdev_info[base_bdev_slot].remove_scheduled = true; 1798 1799 if (raid_bdev->destruct_called == true || 1800 raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 1801 /* 1802 * As raid bdev is not registered yet or already unregistered, 1803 * so cleanup should be done here itself. 1804 */ 1805 raid_bdev_free_base_bdev_resource(raid_bdev, base_bdev_slot); 1806 if (raid_bdev->num_base_bdevs_discovered == 0) { 1807 /* There is no base bdev for this raid, so free the raid device. */ 1808 raid_bdev_cleanup(raid_bdev); 1809 return; 1810 } 1811 } 1812 1813 raid_bdev_deconfigure(raid_bdev, NULL, NULL); 1814 } 1815 1816 /* 1817 * brief: 1818 * Remove base bdevs from the raid bdev one by one. Skip any base bdev which 1819 * doesn't exist. 1820 * params: 1821 * raid_cfg - pointer to raid bdev config. 1822 * cb_fn - callback function 1823 * cb_ctx - argument to callback function 1824 */ 1825 void 1826 raid_bdev_remove_base_devices(struct raid_bdev_config *raid_cfg, 1827 raid_bdev_destruct_cb cb_fn, void *cb_arg) 1828 { 1829 struct raid_bdev *raid_bdev; 1830 struct raid_base_bdev_info *info; 1831 uint8_t i; 1832 1833 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_remove_base_devices\n"); 1834 1835 raid_bdev = raid_cfg->raid_bdev; 1836 if (raid_bdev == NULL) { 1837 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev %s doesn't exist now\n", raid_cfg->name); 1838 if (cb_fn) { 1839 cb_fn(cb_arg, 0); 1840 } 1841 return; 1842 } 1843 1844 if (raid_bdev->destroy_started) { 1845 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "destroying raid bdev %s is already started\n", 1846 raid_cfg->name); 1847 if (cb_fn) { 1848 cb_fn(cb_arg, -EALREADY); 1849 } 1850 return; 1851 } 1852 1853 raid_bdev->destroy_started = true; 1854 1855 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1856 info = &raid_bdev->base_bdev_info[i]; 1857 1858 if (info->bdev == NULL) { 1859 continue; 1860 } 1861 1862 assert(info->desc); 1863 info->remove_scheduled = true; 1864 1865 if (raid_bdev->destruct_called == true || 1866 raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 1867 /* 1868 * As raid bdev is not registered yet or already unregistered, 1869 * so cleanup should be done here itself. 1870 */ 1871 raid_bdev_free_base_bdev_resource(raid_bdev, i); 1872 if (raid_bdev->num_base_bdevs_discovered == 0) { 1873 /* There is no base bdev for this raid, so free the raid device. */ 1874 raid_bdev_cleanup(raid_bdev); 1875 if (cb_fn) { 1876 cb_fn(cb_arg, 0); 1877 } 1878 return; 1879 } 1880 } 1881 } 1882 1883 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg); 1884 } 1885 1886 /* 1887 * brief: 1888 * raid_bdev_add_base_device function is the actual function which either adds 1889 * the nvme base device to existing raid bdev or create a new raid bdev. It also claims 1890 * the base device and keep the open descriptor. 1891 * params: 1892 * raid_cfg - pointer to raid bdev config 1893 * bdev - pointer to base bdev 1894 * base_bdev_slot - position to add base bdev 1895 * returns: 1896 * 0 - success 1897 * non zero - failure 1898 */ 1899 static int 1900 raid_bdev_add_base_device(struct raid_bdev_config *raid_cfg, struct spdk_bdev *bdev, 1901 uint8_t base_bdev_slot) 1902 { 1903 struct raid_bdev *raid_bdev; 1904 int rc; 1905 1906 raid_bdev = raid_cfg->raid_bdev; 1907 if (!raid_bdev) { 1908 SPDK_ERRLOG("Raid bdev '%s' is not created yet\n", raid_cfg->name); 1909 return -ENODEV; 1910 } 1911 1912 rc = raid_bdev_alloc_base_bdev_resource(raid_bdev, bdev, base_bdev_slot); 1913 if (rc != 0) { 1914 SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", bdev->name); 1915 return rc; 1916 } 1917 1918 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1919 1920 if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) { 1921 rc = raid_bdev_configure(raid_bdev); 1922 if (rc != 0) { 1923 SPDK_ERRLOG("Failed to configure raid bdev\n"); 1924 return rc; 1925 } 1926 } 1927 1928 return 0; 1929 } 1930 1931 /* 1932 * brief: 1933 * Add base bdevs to the raid bdev one by one. Skip any base bdev which doesn't 1934 * exist or fails to add. If all base bdevs are successfully added, the raid bdev 1935 * moves to the configured state and becomes available. Otherwise, the raid bdev 1936 * stays at the configuring state with added base bdevs. 1937 * params: 1938 * raid_cfg - pointer to raid bdev config 1939 * returns: 1940 * 0 - The raid bdev moves to the configured state or stays at the configuring 1941 * state with added base bdevs due to any nonexistent base bdev. 1942 * non zero - Failed to add any base bdev and stays at the configuring state with 1943 * added base bdevs. 1944 */ 1945 int 1946 raid_bdev_add_base_devices(struct raid_bdev_config *raid_cfg) 1947 { 1948 struct spdk_bdev *base_bdev; 1949 uint8_t i; 1950 int rc = 0, _rc; 1951 1952 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 1953 base_bdev = spdk_bdev_get_by_name(raid_cfg->base_bdev[i].name); 1954 if (base_bdev == NULL) { 1955 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "base bdev %s doesn't exist now\n", 1956 raid_cfg->base_bdev[i].name); 1957 continue; 1958 } 1959 1960 _rc = raid_bdev_add_base_device(raid_cfg, base_bdev, i); 1961 if (_rc != 0) { 1962 SPDK_ERRLOG("Failed to add base bdev %s to RAID bdev %s: %s\n", 1963 raid_cfg->base_bdev[i].name, raid_cfg->name, 1964 spdk_strerror(-_rc)); 1965 if (rc == 0) { 1966 rc = _rc; 1967 } 1968 } 1969 } 1970 1971 return rc; 1972 } 1973 1974 /* 1975 * brief: 1976 * raid_bdev_examine function is the examine function call by the below layers 1977 * like bdev_nvme layer. This function will check if this base bdev can be 1978 * claimed by this raid bdev or not. 1979 * params: 1980 * bdev - pointer to base bdev 1981 * returns: 1982 * none 1983 */ 1984 static void 1985 raid_bdev_examine(struct spdk_bdev *bdev) 1986 { 1987 struct raid_bdev_config *raid_cfg; 1988 uint8_t base_bdev_slot; 1989 1990 if (raid_bdev_can_claim_bdev(bdev->name, &raid_cfg, &base_bdev_slot)) { 1991 raid_bdev_add_base_device(raid_cfg, bdev, base_bdev_slot); 1992 } else { 1993 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "bdev %s can't be claimed\n", 1994 bdev->name); 1995 } 1996 1997 spdk_bdev_module_examine_done(&g_raid_if); 1998 } 1999 2000 /* Log component for bdev raid bdev module */ 2001 SPDK_LOG_REGISTER_COMPONENT("bdev_raid", SPDK_LOG_BDEV_RAID) 2002