1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "bdev_raid.h" 35 #include "spdk/env.h" 36 #include "spdk/io_channel.h" 37 #include "spdk/conf.h" 38 #include "spdk_internal/log.h" 39 #include "spdk/string.h" 40 #include "spdk/util.h" 41 #include "spdk/json.h" 42 #include "spdk/string.h" 43 44 static bool g_shutdown_started = false; 45 46 /* raid bdev config as read from config file */ 47 struct raid_config g_raid_config = { 48 .raid_bdev_config_head = TAILQ_HEAD_INITIALIZER(g_raid_config.raid_bdev_config_head), 49 }; 50 51 /* 52 * List of raid bdev in configured list, these raid bdevs are registered with 53 * bdev layer 54 */ 55 struct raid_configured_tailq g_raid_bdev_configured_list = TAILQ_HEAD_INITIALIZER( 56 g_raid_bdev_configured_list); 57 58 /* List of raid bdev in configuring list */ 59 struct raid_configuring_tailq g_raid_bdev_configuring_list = TAILQ_HEAD_INITIALIZER( 60 g_raid_bdev_configuring_list); 61 62 /* List of all raid bdevs */ 63 struct raid_all_tailq g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list); 64 65 /* List of all raid bdevs that are offline */ 66 struct raid_offline_tailq g_raid_bdev_offline_list = TAILQ_HEAD_INITIALIZER( 67 g_raid_bdev_offline_list); 68 69 /* Function declarations */ 70 static void raid_bdev_examine(struct spdk_bdev *bdev); 71 static int raid_bdev_init(void); 72 static void raid_bdev_waitq_io_process(void *ctx); 73 static void raid_bdev_deconfigure(struct raid_bdev *raid_bdev, 74 raid_bdev_destruct_cb cb_fn, void *cb_arg); 75 static void raid_bdev_remove_base_bdev(void *ctx); 76 77 /* 78 * brief: 79 * raid_bdev_create_cb function is a cb function for raid bdev which creates the 80 * hierarchy from raid bdev to base bdev io channels. It will be called per core 81 * params: 82 * io_device - pointer to raid bdev io device represented by raid_bdev 83 * ctx_buf - pointer to context buffer for raid bdev io channel 84 * returns: 85 * 0 - success 86 * non zero - failure 87 */ 88 static int 89 raid_bdev_create_cb(void *io_device, void *ctx_buf) 90 { 91 struct raid_bdev *raid_bdev = io_device; 92 struct raid_bdev_io_channel *raid_ch = ctx_buf; 93 94 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_create_cb, %p\n", raid_ch); 95 96 assert(raid_bdev != NULL); 97 assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); 98 99 raid_ch->num_channels = raid_bdev->num_base_bdevs; 100 101 raid_ch->base_channel = calloc(raid_ch->num_channels, 102 sizeof(struct spdk_io_channel *)); 103 if (!raid_ch->base_channel) { 104 SPDK_ERRLOG("Unable to allocate base bdevs io channel\n"); 105 return -ENOMEM; 106 } 107 for (uint8_t i = 0; i < raid_ch->num_channels; i++) { 108 /* 109 * Get the spdk_io_channel for all the base bdevs. This is used during 110 * split logic to send the respective child bdev ios to respective base 111 * bdev io channel. 112 */ 113 raid_ch->base_channel[i] = spdk_bdev_get_io_channel( 114 raid_bdev->base_bdev_info[i].desc); 115 if (!raid_ch->base_channel[i]) { 116 for (uint8_t j = 0; j < i; j++) { 117 spdk_put_io_channel(raid_ch->base_channel[j]); 118 } 119 free(raid_ch->base_channel); 120 raid_ch->base_channel = NULL; 121 SPDK_ERRLOG("Unable to create io channel for base bdev\n"); 122 return -ENOMEM; 123 } 124 } 125 126 return 0; 127 } 128 129 /* 130 * brief: 131 * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the 132 * hierarchy from raid bdev to base bdev io channels. It will be called per core 133 * params: 134 * io_device - pointer to raid bdev io device represented by raid_bdev 135 * ctx_buf - pointer to context buffer for raid bdev io channel 136 * returns: 137 * none 138 */ 139 static void 140 raid_bdev_destroy_cb(void *io_device, void *ctx_buf) 141 { 142 struct raid_bdev_io_channel *raid_ch = ctx_buf; 143 144 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_destroy_cb\n"); 145 146 assert(raid_ch != NULL); 147 assert(raid_ch->base_channel); 148 for (uint8_t i = 0; i < raid_ch->num_channels; i++) { 149 /* Free base bdev channels */ 150 assert(raid_ch->base_channel[i] != NULL); 151 spdk_put_io_channel(raid_ch->base_channel[i]); 152 } 153 free(raid_ch->base_channel); 154 raid_ch->base_channel = NULL; 155 } 156 157 /* 158 * brief: 159 * raid_bdev_cleanup is used to cleanup and free raid_bdev related data 160 * structures. 161 * params: 162 * raid_bdev - pointer to raid_bdev 163 * returns: 164 * none 165 */ 166 static void 167 raid_bdev_cleanup(struct raid_bdev *raid_bdev) 168 { 169 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_cleanup, %p name %s, state %u, config %p\n", 170 raid_bdev, 171 raid_bdev->bdev.name, raid_bdev->state, raid_bdev->config); 172 if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 173 TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link); 174 } else if (raid_bdev->state == RAID_BDEV_STATE_OFFLINE) { 175 TAILQ_REMOVE(&g_raid_bdev_offline_list, raid_bdev, state_link); 176 } else { 177 assert(0); 178 } 179 TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link); 180 free(raid_bdev->bdev.name); 181 free(raid_bdev->base_bdev_info); 182 if (raid_bdev->config) { 183 raid_bdev->config->raid_bdev = NULL; 184 } 185 free(raid_bdev); 186 } 187 188 /* 189 * brief: 190 * free resource of base bdev for raid bdev 191 * params: 192 * raid_bdev - pointer to raid bdev 193 * base_bdev_slot - position to base bdev in raid bdev 194 * returns: 195 * 0 - success 196 * non zero - failure 197 */ 198 static void 199 raid_bdev_free_base_bdev_resource(struct raid_bdev *raid_bdev, uint8_t base_bdev_slot) 200 { 201 struct raid_base_bdev_info *info; 202 203 info = &raid_bdev->base_bdev_info[base_bdev_slot]; 204 205 spdk_bdev_module_release_bdev(info->bdev); 206 spdk_bdev_close(info->desc); 207 info->desc = NULL; 208 info->bdev = NULL; 209 210 assert(raid_bdev->num_base_bdevs_discovered); 211 raid_bdev->num_base_bdevs_discovered--; 212 } 213 214 /* 215 * brief: 216 * raid_bdev_destruct is the destruct function table pointer for raid bdev 217 * params: 218 * ctxt - pointer to raid_bdev 219 * returns: 220 * 0 - success 221 * non zero - failure 222 */ 223 static int 224 raid_bdev_destruct(void *ctxt) 225 { 226 struct raid_bdev *raid_bdev = ctxt; 227 228 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_destruct\n"); 229 230 raid_bdev->destruct_called = true; 231 for (uint8_t i = 0; i < raid_bdev->num_base_bdevs; i++) { 232 /* 233 * Close all base bdev descriptors for which call has come from below 234 * layers. Also close the descriptors if we have started shutdown. 235 */ 236 if (g_shutdown_started || 237 ((raid_bdev->base_bdev_info[i].remove_scheduled == true) && 238 (raid_bdev->base_bdev_info[i].bdev != NULL))) { 239 raid_bdev_free_base_bdev_resource(raid_bdev, i); 240 } 241 } 242 243 if (g_shutdown_started) { 244 TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link); 245 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 246 TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link); 247 } 248 249 spdk_io_device_unregister(raid_bdev, NULL); 250 251 if (raid_bdev->num_base_bdevs_discovered == 0) { 252 /* Free raid_bdev when there are no base bdevs left */ 253 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev base bdevs is 0, going to free all in destruct\n"); 254 raid_bdev_cleanup(raid_bdev); 255 } 256 257 return 0; 258 } 259 260 /* 261 * brief: 262 * raid_bdev_io_completion function is called by lower layers to notify raid 263 * module that particular bdev_io is completed. 264 * params: 265 * bdev_io - pointer to bdev io submitted to lower layers, like child io 266 * success - bdev_io status 267 * cb_arg - function callback context, like parent io pointer 268 * returns: 269 * none 270 */ 271 static void 272 raid_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 273 { 274 struct spdk_bdev_io *parent_io = cb_arg; 275 276 spdk_bdev_free_io(bdev_io); 277 278 if (success) { 279 spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_SUCCESS); 280 } else { 281 spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_FAILED); 282 } 283 } 284 285 /* 286 * brief: 287 * raid_bdev_submit_rw_request function is used to submit I/O to the correct 288 * member disk 289 * params: 290 * bdev_io - parent bdev io 291 * start_strip - start strip number of this io 292 * returns: 293 * 0 - success 294 * non zero - failure 295 */ 296 static int 297 raid_bdev_submit_rw_request(struct spdk_bdev_io *bdev_io, uint64_t start_strip) 298 { 299 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 300 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(raid_io->ch); 301 struct raid_bdev *raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 302 uint64_t pd_strip; 303 uint32_t offset_in_strip; 304 uint64_t pd_lba; 305 uint64_t pd_blocks; 306 uint8_t pd_idx; 307 int ret = 0; 308 309 pd_strip = start_strip / raid_bdev->num_base_bdevs; 310 pd_idx = start_strip % raid_bdev->num_base_bdevs; 311 offset_in_strip = bdev_io->u.bdev.offset_blocks & (raid_bdev->strip_size - 1); 312 pd_lba = (pd_strip << raid_bdev->strip_size_shift) + offset_in_strip; 313 pd_blocks = bdev_io->u.bdev.num_blocks; 314 if (raid_bdev->base_bdev_info[pd_idx].desc == NULL) { 315 SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx); 316 assert(0); 317 } 318 319 /* 320 * Submit child io to bdev layer with using base bdev descriptors, base 321 * bdev lba, base bdev child io length in blocks, buffer, completion 322 * function and function callback context 323 */ 324 assert(raid_ch != NULL); 325 assert(raid_ch->base_channel); 326 if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) { 327 ret = spdk_bdev_readv_blocks(raid_bdev->base_bdev_info[pd_idx].desc, 328 raid_ch->base_channel[pd_idx], 329 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 330 pd_lba, pd_blocks, raid_bdev_io_completion, 331 bdev_io); 332 } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { 333 ret = spdk_bdev_writev_blocks(raid_bdev->base_bdev_info[pd_idx].desc, 334 raid_ch->base_channel[pd_idx], 335 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 336 pd_lba, pd_blocks, raid_bdev_io_completion, 337 bdev_io); 338 } else { 339 SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type); 340 assert(0); 341 } 342 343 return ret; 344 } 345 346 /* 347 * brief: 348 * get_curr_base_bdev_index function calculates the base bdev index 349 * params: 350 * raid_bdev - pointer to raid bdev 351 * raid_io - pointer to parent io context 352 * returns: 353 * base bdev index 354 */ 355 static uint8_t 356 get_curr_base_bdev_index(struct raid_bdev *raid_bdev, struct raid_bdev_io *raid_io) 357 { 358 struct spdk_bdev_io *bdev_io; 359 uint64_t start_strip; 360 361 bdev_io = SPDK_CONTAINEROF(raid_io, struct spdk_bdev_io, driver_ctx); 362 start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; 363 364 return (start_strip % raid_bdev->num_base_bdevs); 365 } 366 367 /* 368 * brief: 369 * raid_bdev_io_submit_fail_process function processes the IO which failed to submit. 370 * It will try to queue the IOs after storing the context to bdev wait queue logic. 371 * params: 372 * bdev_io - pointer to bdev_io 373 * raid_io - pointer to raid bdev io 374 * ret - return code 375 * returns: 376 * none 377 */ 378 static void 379 raid_bdev_io_submit_fail_process(struct raid_bdev *raid_bdev, struct spdk_bdev_io *bdev_io, 380 struct raid_bdev_io *raid_io, int ret) 381 { 382 struct raid_bdev_io_channel *raid_ch; 383 uint8_t pd_idx; 384 385 if (ret != -ENOMEM) { 386 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 387 } else { 388 /* Queue the IO to bdev layer wait queue */ 389 pd_idx = get_curr_base_bdev_index(raid_bdev, raid_io); 390 raid_io->waitq_entry.bdev = raid_bdev->base_bdev_info[pd_idx].bdev; 391 raid_io->waitq_entry.cb_fn = raid_bdev_waitq_io_process; 392 raid_io->waitq_entry.cb_arg = raid_io; 393 raid_ch = spdk_io_channel_get_ctx(raid_io->ch); 394 if (spdk_bdev_queue_io_wait(raid_bdev->base_bdev_info[pd_idx].bdev, 395 raid_ch->base_channel[pd_idx], 396 &raid_io->waitq_entry) != 0) { 397 SPDK_ERRLOG("bdev io waitq error, it should not happen\n"); 398 assert(0); 399 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 400 } 401 } 402 } 403 404 /* 405 * brief: 406 * raid_bdev_waitq_io_process function is the callback function 407 * registered by raid bdev module to bdev when bdev_io was unavailable. 408 * params: 409 * ctx - pointer to raid_bdev_io 410 * returns: 411 * none 412 */ 413 static void 414 raid_bdev_waitq_io_process(void *ctx) 415 { 416 struct raid_bdev_io *raid_io = ctx; 417 struct spdk_bdev_io *bdev_io; 418 struct raid_bdev *raid_bdev; 419 int ret; 420 uint64_t start_strip; 421 422 bdev_io = SPDK_CONTAINEROF(raid_io, struct spdk_bdev_io, driver_ctx); 423 /* 424 * Try to submit childs of parent bdev io. If failed due to resource 425 * crunch then break the loop and don't try to process other queued IOs. 426 */ 427 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 428 start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; 429 ret = raid_bdev_submit_rw_request(bdev_io, start_strip); 430 if (ret != 0) { 431 raid_bdev_io_submit_fail_process(raid_bdev, bdev_io, raid_io, ret); 432 } 433 } 434 435 /* 436 * brief: 437 * raid_bdev_start_rw_request function is the submit_request function for 438 * read/write requests 439 * params: 440 * ch - pointer to raid bdev io channel 441 * bdev_io - pointer to parent bdev_io on raid bdev device 442 * returns: 443 * none 444 */ 445 static void 446 raid_bdev_start_rw_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 447 { 448 struct raid_bdev_io *raid_io; 449 struct raid_bdev *raid_bdev; 450 uint64_t start_strip = 0; 451 uint64_t end_strip = 0; 452 int ret; 453 454 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 455 raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 456 raid_io->ch = ch; 457 start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; 458 end_strip = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> 459 raid_bdev->strip_size_shift; 460 if (start_strip != end_strip && raid_bdev->num_base_bdevs > 1) { 461 assert(false); 462 SPDK_ERRLOG("I/O spans strip boundary!\n"); 463 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 464 return; 465 } 466 ret = raid_bdev_submit_rw_request(bdev_io, start_strip); 467 if (ret != 0) { 468 raid_bdev_io_submit_fail_process(raid_bdev, bdev_io, raid_io, ret); 469 } 470 } 471 472 /* 473 * brief: 474 * raid_bdev_base_io_completion is the completion callback for member disk requests 475 * params: 476 * bdev_io - pointer to member disk requested bdev_io 477 * success - true if successful, false if unsuccessful 478 * cb_arg - callback argument (parent raid bdev_io) 479 * returns: 480 * none 481 */ 482 static void 483 raid_bdev_base_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 484 { 485 struct spdk_bdev_io *parent_io = cb_arg; 486 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)parent_io->driver_ctx; 487 488 spdk_bdev_free_io(bdev_io); 489 490 if (!success) { 491 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; 492 } 493 494 raid_io->base_bdev_io_completed++; 495 if (raid_io->base_bdev_io_completed == raid_io->base_bdev_io_expected) { 496 spdk_bdev_io_complete(parent_io, raid_io->base_bdev_io_status); 497 } 498 } 499 500 /* 501 * brief: 502 * raid_bdev_base_io_submit_fail_process processes IO requests for member disk 503 * which failed to submit 504 * params: 505 * raid_bdev_io - pointer to raid bdev_io 506 * pd_idx - base_dev index in raid_bdev 507 * cb_fn - callback when the spdk_bdev_io for base_bdev becomes available 508 * ret - return code 509 * returns: 510 * none 511 */ 512 static void 513 raid_bdev_base_io_submit_fail_process(struct spdk_bdev_io *raid_bdev_io, uint8_t pd_idx, 514 spdk_bdev_io_wait_cb cb_fn, int ret) 515 { 516 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)raid_bdev_io->driver_ctx; 517 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(raid_io->ch); 518 struct raid_bdev *raid_bdev = (struct raid_bdev *)raid_bdev_io->bdev->ctxt; 519 520 assert(ret != 0); 521 522 if (ret == -ENOMEM) { 523 raid_io->waitq_entry.bdev = raid_bdev->base_bdev_info[pd_idx].bdev; 524 raid_io->waitq_entry.cb_fn = cb_fn; 525 raid_io->waitq_entry.cb_arg = raid_bdev_io; 526 spdk_bdev_queue_io_wait(raid_bdev->base_bdev_info[pd_idx].bdev, 527 raid_ch->base_channel[pd_idx], 528 &raid_io->waitq_entry); 529 return; 530 } 531 532 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 533 assert(false); 534 spdk_bdev_io_complete(raid_bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 535 } 536 537 /* 538 * brief: 539 * _raid_bdev_submit_reset_request_next function submits the next batch of reset requests 540 * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in 541 * which case it will queue it for later submission 542 * params: 543 * bdev_io - pointer to parent bdev_io on raid bdev device 544 * returns: 545 * none 546 */ 547 static void 548 _raid_bdev_submit_reset_request_next(void *_bdev_io) 549 { 550 struct spdk_bdev_io *bdev_io = _bdev_io; 551 struct raid_bdev_io *raid_io; 552 struct raid_bdev *raid_bdev; 553 struct raid_bdev_io_channel *raid_ch; 554 int ret; 555 uint8_t i; 556 557 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 558 raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 559 raid_ch = spdk_io_channel_get_ctx(raid_io->ch); 560 561 while (raid_io->base_bdev_io_submitted < raid_bdev->num_base_bdevs) { 562 i = raid_io->base_bdev_io_submitted; 563 ret = spdk_bdev_reset(raid_bdev->base_bdev_info[i].desc, 564 raid_ch->base_channel[i], 565 raid_bdev_base_io_completion, bdev_io); 566 if (ret == 0) { 567 raid_io->base_bdev_io_submitted++; 568 } else { 569 raid_bdev_base_io_submit_fail_process(bdev_io, i, 570 _raid_bdev_submit_reset_request_next, ret); 571 return; 572 } 573 } 574 } 575 576 /* 577 * brief: 578 * _raid_bdev_submit_reset_request function is the submit_request function for 579 * reset requests 580 * params: 581 * ch - pointer to raid bdev io channel 582 * bdev_io - pointer to parent bdev_io on raid bdev device 583 * returns: 584 * none 585 */ 586 static void 587 _raid_bdev_submit_reset_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 588 { 589 struct raid_bdev_io *raid_io; 590 struct raid_bdev *raid_bdev; 591 592 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 593 raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 594 raid_io->ch = ch; 595 raid_io->base_bdev_io_submitted = 0; 596 raid_io->base_bdev_io_completed = 0; 597 raid_io->base_bdev_io_expected = raid_bdev->num_base_bdevs; 598 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 599 _raid_bdev_submit_reset_request_next(bdev_io); 600 } 601 602 /* raid0 IO range */ 603 struct raid_bdev_io_range { 604 uint64_t strip_size; 605 uint64_t start_strip_in_disk; 606 uint64_t end_strip_in_disk; 607 uint64_t start_offset_in_strip; 608 uint64_t end_offset_in_strip; 609 uint8_t start_disk; 610 uint8_t end_disk; 611 uint8_t n_disks_involved; 612 }; 613 614 static inline void 615 _raid_bdev_get_io_range(struct raid_bdev_io_range *io_range, 616 uint8_t num_base_bdevs, uint64_t strip_size, uint64_t strip_size_shift, 617 uint64_t offset_blocks, uint64_t num_blocks) 618 { 619 uint64_t start_strip; 620 uint64_t end_strip; 621 622 io_range->strip_size = strip_size; 623 624 /* The start and end strip index in raid0 bdev scope */ 625 start_strip = offset_blocks >> strip_size_shift; 626 end_strip = (offset_blocks + num_blocks - 1) >> strip_size_shift; 627 io_range->start_strip_in_disk = start_strip / num_base_bdevs; 628 io_range->end_strip_in_disk = end_strip / num_base_bdevs; 629 630 /* The first strip may have unaligned start LBA offset. 631 * The end strip may have unaligned end LBA offset. 632 * Strips between them certainly have aligned offset and length to boundaries. 633 */ 634 io_range->start_offset_in_strip = offset_blocks % strip_size; 635 io_range->end_offset_in_strip = (offset_blocks + num_blocks - 1) % strip_size; 636 637 /* The base bdev indexes in which start and end strips are located */ 638 io_range->start_disk = start_strip % num_base_bdevs; 639 io_range->end_disk = end_strip % num_base_bdevs; 640 641 /* Calculate how many base_bdevs are involved in io operation. 642 * Number of base bdevs involved is between 1 and num_base_bdevs. 643 * It will be 1 if the first strip and last strip are the same one. 644 */ 645 io_range->n_disks_involved = spdk_min((end_strip - start_strip + 1), num_base_bdevs); 646 } 647 648 static inline void 649 _raid_bdev_split_io_range(struct raid_bdev_io_range *io_range, uint8_t disk_idx, 650 uint64_t *_offset_in_disk, uint64_t *_nblocks_in_disk) 651 { 652 uint64_t n_strips_in_disk; 653 uint64_t start_offset_in_disk; 654 uint64_t end_offset_in_disk; 655 uint64_t offset_in_disk; 656 uint64_t nblocks_in_disk; 657 uint64_t start_strip_in_disk; 658 uint64_t end_strip_in_disk; 659 660 start_strip_in_disk = io_range->start_strip_in_disk; 661 if (disk_idx < io_range->start_disk) { 662 start_strip_in_disk += 1; 663 } 664 665 end_strip_in_disk = io_range->end_strip_in_disk; 666 if (disk_idx > io_range->end_disk) { 667 end_strip_in_disk -= 1; 668 } 669 670 assert(end_strip_in_disk >= start_strip_in_disk); 671 n_strips_in_disk = end_strip_in_disk - start_strip_in_disk + 1; 672 673 if (disk_idx == io_range->start_disk) { 674 start_offset_in_disk = io_range->start_offset_in_strip; 675 } else { 676 start_offset_in_disk = 0; 677 } 678 679 if (disk_idx == io_range->end_disk) { 680 end_offset_in_disk = io_range->end_offset_in_strip; 681 } else { 682 end_offset_in_disk = io_range->strip_size - 1; 683 } 684 685 offset_in_disk = start_offset_in_disk + start_strip_in_disk * io_range->strip_size; 686 nblocks_in_disk = (n_strips_in_disk - 1) * io_range->strip_size 687 + end_offset_in_disk - start_offset_in_disk + 1; 688 689 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, 690 "raid_bdev (strip_size 0x%lx) splits IO to base_bdev (%u) at (0x%lx, 0x%lx).\n", 691 io_range->strip_size, disk_idx, offset_in_disk, nblocks_in_disk); 692 693 *_offset_in_disk = offset_in_disk; 694 *_nblocks_in_disk = nblocks_in_disk; 695 } 696 697 /* 698 * brief: 699 * _raid_bdev_submit_null_payload_request_next function submits the next batch of 700 * io requests with range but without payload, like FLUSH and UNMAP, to member disks; 701 * it will submit as many as possible unless one base io request fails with -ENOMEM, 702 * in which case it will queue itself for later submission. 703 * params: 704 * bdev_io - pointer to parent bdev_io on raid bdev device 705 * returns: 706 * none 707 */ 708 static void 709 _raid_bdev_submit_null_payload_request_next(void *_bdev_io) 710 { 711 struct spdk_bdev_io *bdev_io = _bdev_io; 712 struct raid_bdev_io *raid_io; 713 struct raid_bdev *raid_bdev; 714 struct raid_bdev_io_channel *raid_ch; 715 struct raid_bdev_io_range io_range; 716 int ret; 717 718 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 719 raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 720 raid_ch = spdk_io_channel_get_ctx(raid_io->ch); 721 722 _raid_bdev_get_io_range(&io_range, raid_bdev->num_base_bdevs, 723 raid_bdev->strip_size, raid_bdev->strip_size_shift, 724 bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks); 725 726 raid_io->base_bdev_io_expected = io_range.n_disks_involved; 727 728 while (raid_io->base_bdev_io_submitted < raid_io->base_bdev_io_expected) { 729 uint8_t disk_idx; 730 uint64_t offset_in_disk; 731 uint64_t nblocks_in_disk; 732 733 /* base_bdev is started from start_disk to end_disk. 734 * It is possible that index of start_disk is larger than end_disk's. 735 */ 736 disk_idx = (io_range.start_disk + raid_io->base_bdev_io_submitted) % raid_bdev->num_base_bdevs; 737 738 _raid_bdev_split_io_range(&io_range, disk_idx, &offset_in_disk, &nblocks_in_disk); 739 740 switch (bdev_io->type) { 741 case SPDK_BDEV_IO_TYPE_UNMAP: 742 ret = spdk_bdev_unmap_blocks(raid_bdev->base_bdev_info[disk_idx].desc, 743 raid_ch->base_channel[disk_idx], 744 offset_in_disk, nblocks_in_disk, 745 raid_bdev_base_io_completion, bdev_io); 746 break; 747 748 case SPDK_BDEV_IO_TYPE_FLUSH: 749 ret = spdk_bdev_flush_blocks(raid_bdev->base_bdev_info[disk_idx].desc, 750 raid_ch->base_channel[disk_idx], 751 offset_in_disk, nblocks_in_disk, 752 raid_bdev_base_io_completion, bdev_io); 753 break; 754 755 default: 756 SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type); 757 assert(false); 758 ret = -EIO; 759 } 760 761 if (ret == 0) { 762 raid_io->base_bdev_io_submitted++; 763 } else { 764 raid_bdev_base_io_submit_fail_process(bdev_io, disk_idx, 765 _raid_bdev_submit_null_payload_request_next, ret); 766 return; 767 } 768 } 769 } 770 771 /* 772 * brief: 773 * _raid_bdev_submit_null_payload_request function is the submit_request function 774 * for io requests with range but without payload, like UNMAP and FLUSH. 775 * params: 776 * ch - pointer to raid bdev io channel 777 * bdev_io - pointer to parent bdev_io on raid bdev device 778 * returns: 779 * none 780 */ 781 static void 782 _raid_bdev_submit_null_payload_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 783 { 784 struct raid_bdev_io *raid_io; 785 786 raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 787 raid_io->ch = ch; 788 raid_io->base_bdev_io_submitted = 0; 789 raid_io->base_bdev_io_completed = 0; 790 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 791 792 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev: type %d, range (0x%lx, 0x%lx)\n", 793 bdev_io->type, bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks); 794 795 _raid_bdev_submit_null_payload_request_next(bdev_io); 796 } 797 798 /* 799 * brief: 800 * Callback function to spdk_bdev_io_get_buf. 801 * params: 802 * ch - pointer to raid bdev io channel 803 * bdev_io - pointer to parent bdev_io on raid bdev device 804 * success - True if buffer is allocated or false otherwise. 805 * returns: 806 * none 807 */ 808 static void 809 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 810 bool success) 811 { 812 if (!success) { 813 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 814 return; 815 } 816 817 raid_bdev_start_rw_request(ch, bdev_io); 818 } 819 820 /* 821 * brief: 822 * raid_bdev_submit_request function is the submit_request function pointer of 823 * raid bdev function table. This is used to submit the io on raid_bdev to below 824 * layers. 825 * params: 826 * ch - pointer to raid bdev io channel 827 * bdev_io - pointer to parent bdev_io on raid bdev device 828 * returns: 829 * none 830 */ 831 static void 832 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 833 { 834 switch (bdev_io->type) { 835 case SPDK_BDEV_IO_TYPE_READ: 836 if (bdev_io->u.bdev.iovs == NULL || bdev_io->u.bdev.iovs[0].iov_base == NULL) { 837 spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb, 838 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 839 } else { 840 /* Just call it directly if iov_base is already populated. */ 841 raid_bdev_start_rw_request(ch, bdev_io); 842 } 843 break; 844 case SPDK_BDEV_IO_TYPE_WRITE: 845 raid_bdev_start_rw_request(ch, bdev_io); 846 break; 847 848 case SPDK_BDEV_IO_TYPE_RESET: 849 _raid_bdev_submit_reset_request(ch, bdev_io); 850 break; 851 852 case SPDK_BDEV_IO_TYPE_FLUSH: 853 case SPDK_BDEV_IO_TYPE_UNMAP: 854 _raid_bdev_submit_null_payload_request(ch, bdev_io); 855 break; 856 857 default: 858 SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type); 859 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 860 break; 861 } 862 863 } 864 865 /* 866 * brief: 867 * _raid_bdev_io_type_supported checks whether io_type is supported in 868 * all base bdev modules of raid bdev module. If anyone among the base_bdevs 869 * doesn't support, the raid device doesn't supports. 870 * 871 * params: 872 * raid_bdev - pointer to raid bdev context 873 * io_type - io type 874 * returns: 875 * true - io_type is supported 876 * false - io_type is not supported 877 */ 878 inline static bool 879 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type) 880 { 881 uint8_t i; 882 883 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 884 if (raid_bdev->base_bdev_info[i].bdev == NULL) { 885 assert(false); 886 continue; 887 } 888 889 if (spdk_bdev_io_type_supported(raid_bdev->base_bdev_info[i].bdev, 890 io_type) == false) { 891 return false; 892 } 893 } 894 895 return true; 896 } 897 898 /* 899 * brief: 900 * raid_bdev_io_type_supported is the io_supported function for bdev function 901 * table which returns whether the particular io type is supported or not by 902 * raid bdev module 903 * params: 904 * ctx - pointer to raid bdev context 905 * type - io type 906 * returns: 907 * true - io_type is supported 908 * false - io_type is not supported 909 */ 910 static bool 911 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 912 { 913 switch (io_type) { 914 case SPDK_BDEV_IO_TYPE_READ: 915 case SPDK_BDEV_IO_TYPE_WRITE: 916 return true; 917 918 case SPDK_BDEV_IO_TYPE_FLUSH: 919 case SPDK_BDEV_IO_TYPE_RESET: 920 case SPDK_BDEV_IO_TYPE_UNMAP: 921 return _raid_bdev_io_type_supported(ctx, io_type); 922 923 default: 924 return false; 925 } 926 927 return false; 928 } 929 930 /* 931 * brief: 932 * raid_bdev_get_io_channel is the get_io_channel function table pointer for 933 * raid bdev. This is used to return the io channel for this raid bdev 934 * params: 935 * ctxt - pointer to raid_bdev 936 * returns: 937 * pointer to io channel for raid bdev 938 */ 939 static struct spdk_io_channel * 940 raid_bdev_get_io_channel(void *ctxt) 941 { 942 struct raid_bdev *raid_bdev = ctxt; 943 944 return spdk_get_io_channel(raid_bdev); 945 } 946 947 /* 948 * brief: 949 * raid_bdev_dump_info_json is the function table pointer for raid bdev 950 * params: 951 * ctx - pointer to raid_bdev 952 * w - pointer to json context 953 * returns: 954 * 0 - success 955 * non zero - failure 956 */ 957 static int 958 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 959 { 960 struct raid_bdev *raid_bdev = ctx; 961 962 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_dump_config_json\n"); 963 assert(raid_bdev != NULL); 964 965 /* Dump the raid bdev configuration related information */ 966 spdk_json_write_named_object_begin(w, "raid"); 967 spdk_json_write_named_uint32(w, "strip_size", raid_bdev->strip_size); 968 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 969 spdk_json_write_named_uint32(w, "state", raid_bdev->state); 970 spdk_json_write_named_uint32(w, "raid_level", raid_bdev->raid_level); 971 spdk_json_write_named_uint32(w, "destruct_called", raid_bdev->destruct_called); 972 spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs); 973 spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered); 974 spdk_json_write_name(w, "base_bdevs_list"); 975 spdk_json_write_array_begin(w); 976 for (uint8_t i = 0; i < raid_bdev->num_base_bdevs; i++) { 977 if (raid_bdev->base_bdev_info[i].bdev) { 978 spdk_json_write_string(w, raid_bdev->base_bdev_info[i].bdev->name); 979 } else { 980 spdk_json_write_null(w); 981 } 982 } 983 spdk_json_write_array_end(w); 984 spdk_json_write_object_end(w); 985 986 return 0; 987 } 988 989 /* 990 * brief: 991 * raid_bdev_write_config_json is the function table pointer for raid bdev 992 * params: 993 * bdev - pointer to spdk_bdev 994 * w - pointer to json context 995 * returns: 996 * none 997 */ 998 static void 999 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 1000 { 1001 struct raid_bdev *raid_bdev = bdev->ctxt; 1002 struct spdk_bdev *base; 1003 uint8_t i; 1004 1005 spdk_json_write_object_begin(w); 1006 1007 spdk_json_write_named_string(w, "method", "construct_raid_bdev"); 1008 1009 spdk_json_write_named_object_begin(w, "params"); 1010 spdk_json_write_named_string(w, "name", bdev->name); 1011 spdk_json_write_named_uint32(w, "strip_size", raid_bdev->strip_size_kb); 1012 spdk_json_write_named_uint32(w, "raid_level", raid_bdev->raid_level); 1013 1014 spdk_json_write_named_array_begin(w, "base_bdevs"); 1015 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1016 base = raid_bdev->base_bdev_info[i].bdev; 1017 if (base) { 1018 spdk_json_write_string(w, base->name); 1019 } 1020 } 1021 spdk_json_write_array_end(w); 1022 spdk_json_write_object_end(w); 1023 1024 spdk_json_write_object_end(w); 1025 } 1026 1027 /* g_raid_bdev_fn_table is the function table for raid bdev */ 1028 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = { 1029 .destruct = raid_bdev_destruct, 1030 .submit_request = raid_bdev_submit_request, 1031 .io_type_supported = raid_bdev_io_type_supported, 1032 .get_io_channel = raid_bdev_get_io_channel, 1033 .dump_info_json = raid_bdev_dump_info_json, 1034 .write_config_json = raid_bdev_write_config_json, 1035 }; 1036 1037 /* 1038 * brief: 1039 * raid_bdev_config_cleanup function is used to free memory for one raid_bdev in configuration 1040 * params: 1041 * raid_cfg - pointer to raid_bdev_config structure 1042 * returns: 1043 * none 1044 */ 1045 void 1046 raid_bdev_config_cleanup(struct raid_bdev_config *raid_cfg) 1047 { 1048 uint8_t i; 1049 1050 TAILQ_REMOVE(&g_raid_config.raid_bdev_config_head, raid_cfg, link); 1051 g_raid_config.total_raid_bdev--; 1052 1053 if (raid_cfg->base_bdev) { 1054 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 1055 free(raid_cfg->base_bdev[i].name); 1056 } 1057 free(raid_cfg->base_bdev); 1058 } 1059 free(raid_cfg->name); 1060 free(raid_cfg); 1061 } 1062 1063 /* 1064 * brief: 1065 * raid_bdev_free is the raid bdev function table function pointer. This is 1066 * called on bdev free path 1067 * params: 1068 * none 1069 * returns: 1070 * none 1071 */ 1072 static void 1073 raid_bdev_free(void) 1074 { 1075 struct raid_bdev_config *raid_cfg, *tmp; 1076 1077 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_free\n"); 1078 TAILQ_FOREACH_SAFE(raid_cfg, &g_raid_config.raid_bdev_config_head, link, tmp) { 1079 raid_bdev_config_cleanup(raid_cfg); 1080 } 1081 } 1082 1083 /* brief 1084 * raid_bdev_config_find_by_name is a helper function to find raid bdev config 1085 * by name as key. 1086 * 1087 * params: 1088 * raid_name - name for raid bdev. 1089 */ 1090 struct raid_bdev_config * 1091 raid_bdev_config_find_by_name(const char *raid_name) 1092 { 1093 struct raid_bdev_config *raid_cfg; 1094 1095 TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) { 1096 if (!strcmp(raid_cfg->name, raid_name)) { 1097 return raid_cfg; 1098 } 1099 } 1100 1101 return raid_cfg; 1102 } 1103 1104 /* 1105 * brief 1106 * raid_bdev_config_add function adds config for newly created raid bdev. 1107 * 1108 * params: 1109 * raid_name - name for raid bdev. 1110 * strip_size - strip size in KB 1111 * num_base_bdevs - number of base bdevs. 1112 * raid_level - raid level, only raid level 0 is supported. 1113 * _raid_cfg - Pointer to newly added configuration 1114 */ 1115 int 1116 raid_bdev_config_add(const char *raid_name, uint32_t strip_size, uint8_t num_base_bdevs, 1117 uint8_t raid_level, struct raid_bdev_config **_raid_cfg) 1118 { 1119 struct raid_bdev_config *raid_cfg; 1120 1121 raid_cfg = raid_bdev_config_find_by_name(raid_name); 1122 if (raid_cfg != NULL) { 1123 SPDK_ERRLOG("Duplicate raid bdev name found in config file %s\n", 1124 raid_name); 1125 return -EEXIST; 1126 } 1127 1128 if (spdk_u32_is_pow2(strip_size) == false) { 1129 SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size); 1130 return -EINVAL; 1131 } 1132 1133 if (num_base_bdevs == 0) { 1134 SPDK_ERRLOG("Invalid base device count %u\n", num_base_bdevs); 1135 return -EINVAL; 1136 } 1137 1138 if (raid_level != 0) { 1139 SPDK_ERRLOG("invalid raid level %u, only raid level 0 is supported\n", 1140 raid_level); 1141 return -EINVAL; 1142 } 1143 1144 raid_cfg = calloc(1, sizeof(*raid_cfg)); 1145 if (raid_cfg == NULL) { 1146 SPDK_ERRLOG("unable to allocate memory\n"); 1147 return -ENOMEM; 1148 } 1149 1150 raid_cfg->name = strdup(raid_name); 1151 if (!raid_cfg->name) { 1152 free(raid_cfg); 1153 SPDK_ERRLOG("unable to allocate memory\n"); 1154 return -ENOMEM; 1155 } 1156 raid_cfg->strip_size = strip_size; 1157 raid_cfg->num_base_bdevs = num_base_bdevs; 1158 raid_cfg->raid_level = raid_level; 1159 1160 raid_cfg->base_bdev = calloc(num_base_bdevs, sizeof(*raid_cfg->base_bdev)); 1161 if (raid_cfg->base_bdev == NULL) { 1162 free(raid_cfg->name); 1163 free(raid_cfg); 1164 SPDK_ERRLOG("unable to allocate memory\n"); 1165 return -ENOMEM; 1166 } 1167 1168 TAILQ_INSERT_TAIL(&g_raid_config.raid_bdev_config_head, raid_cfg, link); 1169 g_raid_config.total_raid_bdev++; 1170 1171 *_raid_cfg = raid_cfg; 1172 return 0; 1173 } 1174 1175 /* 1176 * brief: 1177 * raid_bdev_config_add_base_bdev function add base bdev to raid bdev config. 1178 * 1179 * params: 1180 * raid_cfg - pointer to raid bdev configuration 1181 * base_bdev_name - name of base bdev 1182 * slot - Position to add base bdev 1183 */ 1184 int 1185 raid_bdev_config_add_base_bdev(struct raid_bdev_config *raid_cfg, const char *base_bdev_name, 1186 uint8_t slot) 1187 { 1188 uint8_t i; 1189 struct raid_bdev_config *tmp; 1190 1191 if (slot >= raid_cfg->num_base_bdevs) { 1192 return -EINVAL; 1193 } 1194 1195 TAILQ_FOREACH(tmp, &g_raid_config.raid_bdev_config_head, link) { 1196 for (i = 0; i < tmp->num_base_bdevs; i++) { 1197 if (tmp->base_bdev[i].name != NULL) { 1198 if (!strcmp(tmp->base_bdev[i].name, base_bdev_name)) { 1199 SPDK_ERRLOG("duplicate base bdev name %s mentioned\n", 1200 base_bdev_name); 1201 return -EEXIST; 1202 } 1203 } 1204 } 1205 } 1206 1207 raid_cfg->base_bdev[slot].name = strdup(base_bdev_name); 1208 if (raid_cfg->base_bdev[slot].name == NULL) { 1209 SPDK_ERRLOG("unable to allocate memory\n"); 1210 return -ENOMEM; 1211 } 1212 1213 return 0; 1214 } 1215 /* 1216 * brief: 1217 * raid_bdev_parse_raid is used to parse the raid bdev from config file based on 1218 * pre-defined raid bdev format in config file. 1219 * Format of config file: 1220 * [RAID1] 1221 * Name raid1 1222 * StripSize 64 1223 * NumDevices 2 1224 * RaidLevel 0 1225 * Devices Nvme0n1 Nvme1n1 1226 * 1227 * [RAID2] 1228 * Name raid2 1229 * StripSize 64 1230 * NumDevices 3 1231 * RaidLevel 0 1232 * Devices Nvme2n1 Nvme3n1 Nvme4n1 1233 * 1234 * params: 1235 * conf_section - pointer to config section 1236 * returns: 1237 * 0 - success 1238 * non zero - failure 1239 */ 1240 static int 1241 raid_bdev_parse_raid(struct spdk_conf_section *conf_section) 1242 { 1243 const char *raid_name; 1244 uint32_t strip_size; 1245 uint8_t num_base_bdevs, raid_level; 1246 const char *base_bdev_name; 1247 struct raid_bdev_config *raid_cfg; 1248 int rc, i, val; 1249 1250 raid_name = spdk_conf_section_get_val(conf_section, "Name"); 1251 if (raid_name == NULL) { 1252 SPDK_ERRLOG("raid_name is null\n"); 1253 return -EINVAL; 1254 } 1255 1256 val = spdk_conf_section_get_intval(conf_section, "StripSize"); 1257 if (val < 0) { 1258 return -EINVAL; 1259 } 1260 strip_size = val; 1261 1262 val = spdk_conf_section_get_intval(conf_section, "NumDevices"); 1263 if (val < 0) { 1264 return -EINVAL; 1265 } 1266 num_base_bdevs = val; 1267 1268 val = spdk_conf_section_get_intval(conf_section, "RaidLevel"); 1269 if (val < 0) { 1270 return -EINVAL; 1271 } 1272 raid_level = val; 1273 1274 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "%s %" PRIu32 " %u %u\n", 1275 raid_name, strip_size, num_base_bdevs, raid_level); 1276 1277 rc = raid_bdev_config_add(raid_name, strip_size, num_base_bdevs, raid_level, 1278 &raid_cfg); 1279 if (rc != 0) { 1280 SPDK_ERRLOG("Failed to add raid bdev config\n"); 1281 return rc; 1282 } 1283 1284 for (i = 0; true; i++) { 1285 base_bdev_name = spdk_conf_section_get_nmval(conf_section, "Devices", 0, i); 1286 if (base_bdev_name == NULL) { 1287 break; 1288 } 1289 if (i >= num_base_bdevs) { 1290 raid_bdev_config_cleanup(raid_cfg); 1291 SPDK_ERRLOG("Number of devices mentioned is more than count\n"); 1292 return -EINVAL; 1293 } 1294 1295 rc = raid_bdev_config_add_base_bdev(raid_cfg, base_bdev_name, i); 1296 if (rc != 0) { 1297 raid_bdev_config_cleanup(raid_cfg); 1298 SPDK_ERRLOG("Failed to add base bdev to raid bdev config\n"); 1299 return rc; 1300 } 1301 } 1302 1303 if (i != raid_cfg->num_base_bdevs) { 1304 raid_bdev_config_cleanup(raid_cfg); 1305 SPDK_ERRLOG("Number of devices mentioned is less than count\n"); 1306 return -EINVAL; 1307 } 1308 1309 rc = raid_bdev_create(raid_cfg); 1310 if (rc != 0) { 1311 raid_bdev_config_cleanup(raid_cfg); 1312 SPDK_ERRLOG("Failed to create raid bdev\n"); 1313 return rc; 1314 } 1315 1316 rc = raid_bdev_add_base_devices(raid_cfg); 1317 if (rc != 0) { 1318 SPDK_ERRLOG("Failed to add any base bdev to raid bdev\n"); 1319 /* Config is not removed in this case. */ 1320 } 1321 1322 return 0; 1323 } 1324 1325 /* 1326 * brief: 1327 * raid_bdev_parse_config is used to find the raid bdev config section and parse it 1328 * Format of config file: 1329 * params: 1330 * none 1331 * returns: 1332 * 0 - success 1333 * non zero - failure 1334 */ 1335 static int 1336 raid_bdev_parse_config(void) 1337 { 1338 int ret; 1339 struct spdk_conf_section *conf_section; 1340 1341 conf_section = spdk_conf_first_section(NULL); 1342 while (conf_section != NULL) { 1343 if (spdk_conf_section_match_prefix(conf_section, "RAID")) { 1344 ret = raid_bdev_parse_raid(conf_section); 1345 if (ret < 0) { 1346 SPDK_ERRLOG("Unable to parse raid bdev section\n"); 1347 return ret; 1348 } 1349 } 1350 conf_section = spdk_conf_next_section(conf_section); 1351 } 1352 1353 return 0; 1354 } 1355 1356 /* 1357 * brief: 1358 * raid_bdev_fini_start is called when bdev layer is starting the 1359 * shutdown process 1360 * params: 1361 * none 1362 * returns: 1363 * none 1364 */ 1365 static void 1366 raid_bdev_fini_start(void) 1367 { 1368 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_fini_start\n"); 1369 g_shutdown_started = true; 1370 } 1371 1372 /* 1373 * brief: 1374 * raid_bdev_exit is called on raid bdev module exit time by bdev layer 1375 * params: 1376 * none 1377 * returns: 1378 * none 1379 */ 1380 static void 1381 raid_bdev_exit(void) 1382 { 1383 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_exit\n"); 1384 raid_bdev_free(); 1385 } 1386 1387 /* 1388 * brief: 1389 * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid 1390 * module 1391 * params: 1392 * none 1393 * returns: 1394 * size of spdk_bdev_io context for raid 1395 */ 1396 static int 1397 raid_bdev_get_ctx_size(void) 1398 { 1399 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_get_ctx_size\n"); 1400 return sizeof(struct raid_bdev_io); 1401 } 1402 1403 /* 1404 * brief: 1405 * raid_bdev_get_running_config is used to get the configuration options. 1406 * 1407 * params: 1408 * fp - The pointer to a file that will be written to the configuration options. 1409 * returns: 1410 * none 1411 */ 1412 static void 1413 raid_bdev_get_running_config(FILE *fp) 1414 { 1415 struct raid_bdev *raid_bdev; 1416 struct spdk_bdev *base; 1417 int index = 1; 1418 uint8_t i; 1419 1420 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_configured_list, state_link) { 1421 fprintf(fp, 1422 "\n" 1423 "[RAID%d]\n" 1424 " Name %s\n" 1425 " StripSize %" PRIu32 "\n" 1426 " NumDevices %u\n" 1427 " RaidLevel %hhu\n", 1428 index, raid_bdev->bdev.name, raid_bdev->strip_size_kb, 1429 raid_bdev->num_base_bdevs, raid_bdev->raid_level); 1430 fprintf(fp, 1431 " Devices "); 1432 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1433 base = raid_bdev->base_bdev_info[i].bdev; 1434 if (base) { 1435 fprintf(fp, 1436 "%s ", 1437 base->name); 1438 } 1439 } 1440 fprintf(fp, 1441 "\n"); 1442 index++; 1443 } 1444 } 1445 1446 /* 1447 * brief: 1448 * raid_bdev_can_claim_bdev is the function to check if this base_bdev can be 1449 * claimed by raid bdev or not. 1450 * params: 1451 * bdev_name - represents base bdev name 1452 * _raid_cfg - pointer to raid bdev config parsed from config file 1453 * base_bdev_slot - if bdev can be claimed, it represents the base_bdev correct 1454 * slot. This field is only valid if return value of this function is true 1455 * returns: 1456 * true - if bdev can be claimed 1457 * false - if bdev can't be claimed 1458 */ 1459 static bool 1460 raid_bdev_can_claim_bdev(const char *bdev_name, struct raid_bdev_config **_raid_cfg, 1461 uint8_t *base_bdev_slot) 1462 { 1463 struct raid_bdev_config *raid_cfg; 1464 uint8_t i; 1465 1466 TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) { 1467 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 1468 /* 1469 * Check if the base bdev name is part of raid bdev configuration. 1470 * If match is found then return true and the slot information where 1471 * this base bdev should be inserted in raid bdev 1472 */ 1473 if (!strcmp(bdev_name, raid_cfg->base_bdev[i].name)) { 1474 *_raid_cfg = raid_cfg; 1475 *base_bdev_slot = i; 1476 return true; 1477 } 1478 } 1479 } 1480 1481 return false; 1482 } 1483 1484 1485 static struct spdk_bdev_module g_raid_if = { 1486 .name = "raid", 1487 .module_init = raid_bdev_init, 1488 .fini_start = raid_bdev_fini_start, 1489 .module_fini = raid_bdev_exit, 1490 .get_ctx_size = raid_bdev_get_ctx_size, 1491 .examine_config = raid_bdev_examine, 1492 .config_text = raid_bdev_get_running_config, 1493 .async_init = false, 1494 .async_fini = false, 1495 }; 1496 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if) 1497 1498 /* 1499 * brief: 1500 * raid_bdev_init is the initialization function for raid bdev module 1501 * params: 1502 * none 1503 * returns: 1504 * 0 - success 1505 * non zero - failure 1506 */ 1507 static int 1508 raid_bdev_init(void) 1509 { 1510 int ret; 1511 1512 /* Parse config file for raids */ 1513 ret = raid_bdev_parse_config(); 1514 if (ret < 0) { 1515 SPDK_ERRLOG("raid bdev init failed parsing\n"); 1516 raid_bdev_free(); 1517 return ret; 1518 } 1519 1520 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_init completed successfully\n"); 1521 1522 return 0; 1523 } 1524 1525 /* 1526 * brief: 1527 * raid_bdev_create allocates raid bdev based on passed configuration 1528 * params: 1529 * raid_cfg - configuration of raid bdev 1530 * returns: 1531 * 0 - success 1532 * non zero - failure 1533 */ 1534 int 1535 raid_bdev_create(struct raid_bdev_config *raid_cfg) 1536 { 1537 struct raid_bdev *raid_bdev; 1538 struct spdk_bdev *raid_bdev_gen; 1539 1540 raid_bdev = calloc(1, sizeof(*raid_bdev)); 1541 if (!raid_bdev) { 1542 SPDK_ERRLOG("Unable to allocate memory for raid bdev\n"); 1543 return -ENOMEM; 1544 } 1545 1546 assert(raid_cfg->num_base_bdevs != 0); 1547 raid_bdev->num_base_bdevs = raid_cfg->num_base_bdevs; 1548 raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, 1549 sizeof(struct raid_base_bdev_info)); 1550 if (!raid_bdev->base_bdev_info) { 1551 SPDK_ERRLOG("Unable able to allocate base bdev info\n"); 1552 free(raid_bdev); 1553 return -ENOMEM; 1554 } 1555 1556 /* strip_size_kb is from the rpc param. strip_size is in blocks and used 1557 * intnerally and set later. 1558 */ 1559 raid_bdev->strip_size = 0; 1560 raid_bdev->strip_size_kb = raid_cfg->strip_size; 1561 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1562 raid_bdev->config = raid_cfg; 1563 1564 raid_bdev_gen = &raid_bdev->bdev; 1565 1566 raid_bdev_gen->name = strdup(raid_cfg->name); 1567 if (!raid_bdev_gen->name) { 1568 SPDK_ERRLOG("Unable to allocate name for raid\n"); 1569 free(raid_bdev->base_bdev_info); 1570 free(raid_bdev); 1571 return -ENOMEM; 1572 } 1573 1574 raid_bdev_gen->product_name = "Raid Volume"; 1575 raid_bdev_gen->ctxt = raid_bdev; 1576 raid_bdev_gen->fn_table = &g_raid_bdev_fn_table; 1577 raid_bdev_gen->module = &g_raid_if; 1578 raid_bdev_gen->write_cache = 0; 1579 1580 TAILQ_INSERT_TAIL(&g_raid_bdev_configuring_list, raid_bdev, state_link); 1581 TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link); 1582 1583 raid_cfg->raid_bdev = raid_bdev; 1584 1585 return 0; 1586 } 1587 1588 /* 1589 * brief 1590 * raid_bdev_alloc_base_bdev_resource allocates resource of base bdev. 1591 * params: 1592 * raid_bdev - pointer to raid bdev 1593 * bdev - pointer to base bdev 1594 * base_bdev_slot - position to add base bdev 1595 * returns: 1596 * 0 - success 1597 * non zero - failure 1598 */ 1599 static int 1600 raid_bdev_alloc_base_bdev_resource(struct raid_bdev *raid_bdev, struct spdk_bdev *bdev, 1601 uint8_t base_bdev_slot) 1602 { 1603 struct spdk_bdev_desc *desc; 1604 int rc; 1605 1606 rc = spdk_bdev_open(bdev, true, raid_bdev_remove_base_bdev, bdev, &desc); 1607 if (rc != 0) { 1608 SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", bdev->name); 1609 return rc; 1610 } 1611 1612 rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if); 1613 if (rc != 0) { 1614 SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); 1615 spdk_bdev_close(desc); 1616 return rc; 1617 } 1618 1619 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "bdev %s is claimed\n", bdev->name); 1620 1621 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 1622 assert(base_bdev_slot < raid_bdev->num_base_bdevs); 1623 1624 raid_bdev->base_bdev_info[base_bdev_slot].bdev = bdev; 1625 raid_bdev->base_bdev_info[base_bdev_slot].desc = desc; 1626 raid_bdev->num_base_bdevs_discovered++; 1627 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1628 1629 return 0; 1630 } 1631 1632 /* 1633 * brief: 1634 * If raid bdev config is complete, then only register the raid bdev to 1635 * bdev layer and remove this raid bdev from configuring list and 1636 * insert the raid bdev to configured list 1637 * params: 1638 * raid_bdev - pointer to raid bdev 1639 * returns: 1640 * 0 - success 1641 * non zero - failure 1642 */ 1643 static int 1644 raid_bdev_configure(struct raid_bdev *raid_bdev) 1645 { 1646 uint32_t blocklen; 1647 uint64_t min_blockcnt; 1648 struct spdk_bdev *raid_bdev_gen; 1649 int rc = 0; 1650 1651 blocklen = raid_bdev->base_bdev_info[0].bdev->blocklen; 1652 min_blockcnt = raid_bdev->base_bdev_info[0].bdev->blockcnt; 1653 for (uint8_t i = 1; i < raid_bdev->num_base_bdevs; i++) { 1654 /* Calculate minimum block count from all base bdevs */ 1655 if (raid_bdev->base_bdev_info[i].bdev->blockcnt < min_blockcnt) { 1656 min_blockcnt = raid_bdev->base_bdev_info[i].bdev->blockcnt; 1657 } 1658 1659 /* Check blocklen for all base bdevs that it should be same */ 1660 if (blocklen != raid_bdev->base_bdev_info[i].bdev->blocklen) { 1661 /* 1662 * Assumption is that all the base bdevs for any raid bdev should 1663 * have same blocklen 1664 */ 1665 SPDK_ERRLOG("Blocklen of various bdevs not matching\n"); 1666 return -EINVAL; 1667 } 1668 } 1669 1670 /* The strip_size_kb is read in from user in KB. Convert to blocks here for 1671 * internal use. 1672 */ 1673 raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / blocklen; 1674 raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size); 1675 raid_bdev->blocklen_shift = spdk_u32log2(blocklen); 1676 1677 raid_bdev_gen = &raid_bdev->bdev; 1678 raid_bdev_gen->blocklen = blocklen; 1679 if (raid_bdev->num_base_bdevs > 1) { 1680 raid_bdev_gen->optimal_io_boundary = raid_bdev->strip_size; 1681 raid_bdev_gen->split_on_optimal_io_boundary = true; 1682 } else { 1683 /* Do not need to split reads/writes on single bdev RAID modules. */ 1684 raid_bdev_gen->optimal_io_boundary = 0; 1685 raid_bdev_gen->split_on_optimal_io_boundary = false; 1686 } 1687 1688 /* 1689 * RAID bdev logic is for striping so take the minimum block count based 1690 * approach where total block count of raid bdev is the number of base 1691 * bdev times the minimum block count of any base bdev 1692 */ 1693 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "min blockcount %lu, numbasedev %u, strip size shift %u\n", 1694 min_blockcnt, 1695 raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift); 1696 raid_bdev_gen->blockcnt = ((min_blockcnt >> raid_bdev->strip_size_shift) << 1697 raid_bdev->strip_size_shift) * raid_bdev->num_base_bdevs; 1698 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "io device register %p\n", raid_bdev); 1699 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "blockcnt %lu, blocklen %u\n", raid_bdev_gen->blockcnt, 1700 raid_bdev_gen->blocklen); 1701 if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 1702 raid_bdev->state = RAID_BDEV_STATE_ONLINE; 1703 spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb, 1704 sizeof(struct raid_bdev_io_channel), 1705 raid_bdev->bdev.name); 1706 rc = spdk_bdev_register(raid_bdev_gen); 1707 if (rc != 0) { 1708 SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n"); 1709 spdk_io_device_unregister(raid_bdev, NULL); 1710 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1711 return rc; 1712 } 1713 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev generic %p\n", raid_bdev_gen); 1714 TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link); 1715 TAILQ_INSERT_TAIL(&g_raid_bdev_configured_list, raid_bdev, state_link); 1716 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev is created with name %s, raid_bdev %p\n", 1717 raid_bdev_gen->name, raid_bdev); 1718 } 1719 1720 return 0; 1721 } 1722 1723 /* 1724 * brief: 1725 * If raid bdev is online and registered, change the bdev state to 1726 * configuring and unregister this raid device. Queue this raid device 1727 * in configuring list 1728 * params: 1729 * raid_bdev - pointer to raid bdev 1730 * cb_fn - callback function 1731 * cb_arg - argument to callback function 1732 * returns: 1733 * none 1734 */ 1735 static void 1736 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, 1737 void *cb_arg) 1738 { 1739 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1740 if (cb_fn) { 1741 cb_fn(cb_arg, 0); 1742 } 1743 return; 1744 } 1745 1746 assert(raid_bdev->num_base_bdevs == raid_bdev->num_base_bdevs_discovered); 1747 TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link); 1748 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 1749 assert(raid_bdev->num_base_bdevs_discovered); 1750 TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link); 1751 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev state chaning from online to offline\n"); 1752 1753 spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg); 1754 } 1755 1756 /* 1757 * brief: 1758 * raid_bdev_find_by_base_bdev function finds the raid bdev which has 1759 * claimed the base bdev. 1760 * params: 1761 * base_bdev - pointer to base bdev pointer 1762 * _raid_bdev - Referenct to pointer to raid bdev 1763 * _base_bdev_slot - Reference to the slot of the base bdev. 1764 * returns: 1765 * true - if the raid bdev is found. 1766 * false - if the raid bdev is not found. 1767 */ 1768 static bool 1769 raid_bdev_find_by_base_bdev(struct spdk_bdev *base_bdev, struct raid_bdev **_raid_bdev, 1770 uint8_t *_base_bdev_slot) 1771 { 1772 struct raid_bdev *raid_bdev; 1773 uint8_t i; 1774 1775 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1776 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1777 if (raid_bdev->base_bdev_info[i].bdev == base_bdev) { 1778 *_raid_bdev = raid_bdev; 1779 *_base_bdev_slot = i; 1780 return true; 1781 } 1782 } 1783 } 1784 1785 return false; 1786 } 1787 1788 /* 1789 * brief: 1790 * raid_bdev_remove_base_bdev function is called by below layers when base_bdev 1791 * is removed. This function checks if this base bdev is part of any raid bdev 1792 * or not. If yes, it takes necessary action on that particular raid bdev. 1793 * params: 1794 * ctx - pointer to base bdev pointer which got removed 1795 * returns: 1796 * none 1797 */ 1798 static void 1799 raid_bdev_remove_base_bdev(void *ctx) 1800 { 1801 struct spdk_bdev *base_bdev = ctx; 1802 struct raid_bdev *raid_bdev = NULL; 1803 uint8_t base_bdev_slot = 0; 1804 1805 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_remove_base_bdev\n"); 1806 1807 /* Find the raid_bdev which has claimed this base_bdev */ 1808 if (!raid_bdev_find_by_base_bdev(base_bdev, &raid_bdev, &base_bdev_slot)) { 1809 SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); 1810 return; 1811 } 1812 1813 assert(raid_bdev->base_bdev_info[base_bdev_slot].desc); 1814 raid_bdev->base_bdev_info[base_bdev_slot].remove_scheduled = true; 1815 1816 if (raid_bdev->destruct_called == true || 1817 raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 1818 /* 1819 * As raid bdev is not registered yet or already unregistered, 1820 * so cleanup should be done here itself. 1821 */ 1822 raid_bdev_free_base_bdev_resource(raid_bdev, base_bdev_slot); 1823 if (raid_bdev->num_base_bdevs_discovered == 0) { 1824 /* There is no base bdev for this raid, so free the raid device. */ 1825 raid_bdev_cleanup(raid_bdev); 1826 return; 1827 } 1828 } 1829 1830 raid_bdev_deconfigure(raid_bdev, NULL, NULL); 1831 } 1832 1833 /* 1834 * brief: 1835 * Remove base bdevs from the raid bdev one by one. Skip any base bdev which 1836 * doesn't exist. 1837 * params: 1838 * raid_cfg - pointer to raid bdev config. 1839 * cb_fn - callback function 1840 * cb_ctx - argument to callback function 1841 */ 1842 void 1843 raid_bdev_remove_base_devices(struct raid_bdev_config *raid_cfg, 1844 raid_bdev_destruct_cb cb_fn, void *cb_arg) 1845 { 1846 struct raid_bdev *raid_bdev; 1847 struct raid_base_bdev_info *info; 1848 uint8_t i; 1849 1850 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_remove_base_devices\n"); 1851 1852 raid_bdev = raid_cfg->raid_bdev; 1853 if (raid_bdev == NULL) { 1854 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev %s doesn't exist now\n", raid_cfg->name); 1855 if (cb_fn) { 1856 cb_fn(cb_arg, 0); 1857 } 1858 return; 1859 } 1860 1861 if (raid_bdev->destroy_started) { 1862 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "destroying raid bdev %s is already started\n", 1863 raid_cfg->name); 1864 if (cb_fn) { 1865 cb_fn(cb_arg, -EALREADY); 1866 } 1867 return; 1868 } 1869 1870 raid_bdev->destroy_started = true; 1871 1872 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1873 info = &raid_bdev->base_bdev_info[i]; 1874 1875 if (info->bdev == NULL) { 1876 continue; 1877 } 1878 1879 assert(info->desc); 1880 info->remove_scheduled = true; 1881 1882 if (raid_bdev->destruct_called == true || 1883 raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 1884 /* 1885 * As raid bdev is not registered yet or already unregistered, 1886 * so cleanup should be done here itself. 1887 */ 1888 raid_bdev_free_base_bdev_resource(raid_bdev, i); 1889 if (raid_bdev->num_base_bdevs_discovered == 0) { 1890 /* There is no base bdev for this raid, so free the raid device. */ 1891 raid_bdev_cleanup(raid_bdev); 1892 if (cb_fn) { 1893 cb_fn(cb_arg, 0); 1894 } 1895 return; 1896 } 1897 } 1898 } 1899 1900 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg); 1901 } 1902 1903 /* 1904 * brief: 1905 * raid_bdev_add_base_device function is the actual function which either adds 1906 * the nvme base device to existing raid bdev or create a new raid bdev. It also claims 1907 * the base device and keep the open descriptor. 1908 * params: 1909 * raid_cfg - pointer to raid bdev config 1910 * bdev - pointer to base bdev 1911 * base_bdev_slot - position to add base bdev 1912 * returns: 1913 * 0 - success 1914 * non zero - failure 1915 */ 1916 static int 1917 raid_bdev_add_base_device(struct raid_bdev_config *raid_cfg, struct spdk_bdev *bdev, 1918 uint8_t base_bdev_slot) 1919 { 1920 struct raid_bdev *raid_bdev; 1921 int rc; 1922 1923 raid_bdev = raid_cfg->raid_bdev; 1924 if (!raid_bdev) { 1925 SPDK_ERRLOG("Raid bdev '%s' is not created yet\n", raid_cfg->name); 1926 return -ENODEV; 1927 } 1928 1929 rc = raid_bdev_alloc_base_bdev_resource(raid_bdev, bdev, base_bdev_slot); 1930 if (rc != 0) { 1931 SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", bdev->name); 1932 return rc; 1933 } 1934 1935 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1936 1937 if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) { 1938 rc = raid_bdev_configure(raid_bdev); 1939 if (rc != 0) { 1940 SPDK_ERRLOG("Failed to configure raid bdev\n"); 1941 return rc; 1942 } 1943 } 1944 1945 return 0; 1946 } 1947 1948 /* 1949 * brief: 1950 * Add base bdevs to the raid bdev one by one. Skip any base bdev which doesn't 1951 * exist or fails to add. If all base bdevs are successfully added, the raid bdev 1952 * moves to the configured state and becomes available. Otherwise, the raid bdev 1953 * stays at the configuring state with added base bdevs. 1954 * params: 1955 * raid_cfg - pointer to raid bdev config 1956 * returns: 1957 * 0 - The raid bdev moves to the configured state or stays at the configuring 1958 * state with added base bdevs due to any nonexistent base bdev. 1959 * non zero - Failed to add any base bdev and stays at the configuring state with 1960 * added base bdevs. 1961 */ 1962 int 1963 raid_bdev_add_base_devices(struct raid_bdev_config *raid_cfg) 1964 { 1965 struct spdk_bdev *base_bdev; 1966 uint8_t i; 1967 int rc = 0, _rc; 1968 1969 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 1970 base_bdev = spdk_bdev_get_by_name(raid_cfg->base_bdev[i].name); 1971 if (base_bdev == NULL) { 1972 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "base bdev %s doesn't exist now\n", 1973 raid_cfg->base_bdev[i].name); 1974 continue; 1975 } 1976 1977 _rc = raid_bdev_add_base_device(raid_cfg, base_bdev, i); 1978 if (_rc != 0) { 1979 SPDK_ERRLOG("Failed to add base bdev %s to RAID bdev %s: %s\n", 1980 raid_cfg->base_bdev[i].name, raid_cfg->name, 1981 spdk_strerror(-_rc)); 1982 if (rc == 0) { 1983 rc = _rc; 1984 } 1985 } 1986 } 1987 1988 return rc; 1989 } 1990 1991 /* 1992 * brief: 1993 * raid_bdev_examine function is the examine function call by the below layers 1994 * like bdev_nvme layer. This function will check if this base bdev can be 1995 * claimed by this raid bdev or not. 1996 * params: 1997 * bdev - pointer to base bdev 1998 * returns: 1999 * none 2000 */ 2001 static void 2002 raid_bdev_examine(struct spdk_bdev *bdev) 2003 { 2004 struct raid_bdev_config *raid_cfg; 2005 uint8_t base_bdev_slot; 2006 2007 if (raid_bdev_can_claim_bdev(bdev->name, &raid_cfg, &base_bdev_slot)) { 2008 raid_bdev_add_base_device(raid_cfg, bdev, base_bdev_slot); 2009 } else { 2010 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "bdev %s can't be claimed\n", 2011 bdev->name); 2012 } 2013 2014 spdk_bdev_module_examine_done(&g_raid_if); 2015 } 2016 2017 /* Log component for bdev raid bdev module */ 2018 SPDK_LOG_REGISTER_COMPONENT("bdev_raid", SPDK_LOG_BDEV_RAID) 2019