1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "bdev_raid.h" 35 #include "spdk/env.h" 36 #include "spdk/io_channel.h" 37 #include "spdk/conf.h" 38 #include "spdk_internal/log.h" 39 #include "spdk/string.h" 40 #include "spdk/util.h" 41 #include "spdk/json.h" 42 #include "spdk/string.h" 43 44 static bool g_shutdown_started = false; 45 46 /* raid bdev config as read from config file */ 47 struct raid_config g_raid_config = { 48 .raid_bdev_config_head = TAILQ_HEAD_INITIALIZER(g_raid_config.raid_bdev_config_head), 49 }; 50 51 /* 52 * List of raid bdev in configured list, these raid bdevs are registered with 53 * bdev layer 54 */ 55 struct raid_configured_tailq g_raid_bdev_configured_list = TAILQ_HEAD_INITIALIZER( 56 g_raid_bdev_configured_list); 57 58 /* List of raid bdev in configuring list */ 59 struct raid_configuring_tailq g_raid_bdev_configuring_list = TAILQ_HEAD_INITIALIZER( 60 g_raid_bdev_configuring_list); 61 62 /* List of all raid bdevs */ 63 struct raid_all_tailq g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list); 64 65 /* List of all raid bdevs that are offline */ 66 struct raid_offline_tailq g_raid_bdev_offline_list = TAILQ_HEAD_INITIALIZER( 67 g_raid_bdev_offline_list); 68 69 /* Function declarations */ 70 static void raid_bdev_examine(struct spdk_bdev *bdev); 71 static int raid_bdev_init(void); 72 static void raid_bdev_deconfigure(struct raid_bdev *raid_bdev, 73 raid_bdev_destruct_cb cb_fn, void *cb_arg); 74 static void raid_bdev_remove_base_bdev(void *ctx); 75 76 /* 77 * brief: 78 * raid_bdev_create_cb function is a cb function for raid bdev which creates the 79 * hierarchy from raid bdev to base bdev io channels. It will be called per core 80 * params: 81 * io_device - pointer to raid bdev io device represented by raid_bdev 82 * ctx_buf - pointer to context buffer for raid bdev io channel 83 * returns: 84 * 0 - success 85 * non zero - failure 86 */ 87 static int 88 raid_bdev_create_cb(void *io_device, void *ctx_buf) 89 { 90 struct raid_bdev *raid_bdev = io_device; 91 struct raid_bdev_io_channel *raid_ch = ctx_buf; 92 93 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_create_cb, %p\n", raid_ch); 94 95 assert(raid_bdev != NULL); 96 assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); 97 98 raid_ch->num_channels = raid_bdev->num_base_bdevs; 99 100 raid_ch->base_channel = calloc(raid_ch->num_channels, 101 sizeof(struct spdk_io_channel *)); 102 if (!raid_ch->base_channel) { 103 SPDK_ERRLOG("Unable to allocate base bdevs io channel\n"); 104 return -ENOMEM; 105 } 106 for (uint8_t i = 0; i < raid_ch->num_channels; i++) { 107 /* 108 * Get the spdk_io_channel for all the base bdevs. This is used during 109 * split logic to send the respective child bdev ios to respective base 110 * bdev io channel. 111 */ 112 raid_ch->base_channel[i] = spdk_bdev_get_io_channel( 113 raid_bdev->base_bdev_info[i].desc); 114 if (!raid_ch->base_channel[i]) { 115 for (uint8_t j = 0; j < i; j++) { 116 spdk_put_io_channel(raid_ch->base_channel[j]); 117 } 118 free(raid_ch->base_channel); 119 raid_ch->base_channel = NULL; 120 SPDK_ERRLOG("Unable to create io channel for base bdev\n"); 121 return -ENOMEM; 122 } 123 } 124 125 return 0; 126 } 127 128 /* 129 * brief: 130 * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the 131 * hierarchy from raid bdev to base bdev io channels. It will be called per core 132 * params: 133 * io_device - pointer to raid bdev io device represented by raid_bdev 134 * ctx_buf - pointer to context buffer for raid bdev io channel 135 * returns: 136 * none 137 */ 138 static void 139 raid_bdev_destroy_cb(void *io_device, void *ctx_buf) 140 { 141 struct raid_bdev_io_channel *raid_ch = ctx_buf; 142 143 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_destroy_cb\n"); 144 145 assert(raid_ch != NULL); 146 assert(raid_ch->base_channel); 147 for (uint8_t i = 0; i < raid_ch->num_channels; i++) { 148 /* Free base bdev channels */ 149 assert(raid_ch->base_channel[i] != NULL); 150 spdk_put_io_channel(raid_ch->base_channel[i]); 151 } 152 free(raid_ch->base_channel); 153 raid_ch->base_channel = NULL; 154 } 155 156 /* 157 * brief: 158 * raid_bdev_cleanup is used to cleanup and free raid_bdev related data 159 * structures. 160 * params: 161 * raid_bdev - pointer to raid_bdev 162 * returns: 163 * none 164 */ 165 static void 166 raid_bdev_cleanup(struct raid_bdev *raid_bdev) 167 { 168 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_cleanup, %p name %s, state %u, config %p\n", 169 raid_bdev, 170 raid_bdev->bdev.name, raid_bdev->state, raid_bdev->config); 171 if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 172 TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link); 173 } else if (raid_bdev->state == RAID_BDEV_STATE_OFFLINE) { 174 TAILQ_REMOVE(&g_raid_bdev_offline_list, raid_bdev, state_link); 175 } else { 176 assert(0); 177 } 178 TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link); 179 free(raid_bdev->bdev.name); 180 free(raid_bdev->base_bdev_info); 181 if (raid_bdev->config) { 182 raid_bdev->config->raid_bdev = NULL; 183 } 184 free(raid_bdev); 185 } 186 187 /* 188 * brief: 189 * free resource of base bdev for raid bdev 190 * params: 191 * raid_bdev - pointer to raid bdev 192 * base_bdev_slot - position to base bdev in raid bdev 193 * returns: 194 * 0 - success 195 * non zero - failure 196 */ 197 static void 198 raid_bdev_free_base_bdev_resource(struct raid_bdev *raid_bdev, uint8_t base_bdev_slot) 199 { 200 struct raid_base_bdev_info *info; 201 202 info = &raid_bdev->base_bdev_info[base_bdev_slot]; 203 204 spdk_bdev_module_release_bdev(info->bdev); 205 spdk_bdev_close(info->desc); 206 info->desc = NULL; 207 info->bdev = NULL; 208 209 assert(raid_bdev->num_base_bdevs_discovered); 210 raid_bdev->num_base_bdevs_discovered--; 211 } 212 213 /* 214 * brief: 215 * raid_bdev_destruct is the destruct function table pointer for raid bdev 216 * params: 217 * ctxt - pointer to raid_bdev 218 * returns: 219 * 0 - success 220 * non zero - failure 221 */ 222 static int 223 raid_bdev_destruct(void *ctxt) 224 { 225 struct raid_bdev *raid_bdev = ctxt; 226 227 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_destruct\n"); 228 229 raid_bdev->destruct_called = true; 230 for (uint8_t i = 0; i < raid_bdev->num_base_bdevs; i++) { 231 /* 232 * Close all base bdev descriptors for which call has come from below 233 * layers. Also close the descriptors if we have started shutdown. 234 */ 235 if (g_shutdown_started || 236 ((raid_bdev->base_bdev_info[i].remove_scheduled == true) && 237 (raid_bdev->base_bdev_info[i].bdev != NULL))) { 238 raid_bdev_free_base_bdev_resource(raid_bdev, i); 239 } 240 } 241 242 if (g_shutdown_started) { 243 TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link); 244 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 245 TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link); 246 } 247 248 spdk_io_device_unregister(raid_bdev, NULL); 249 250 if (raid_bdev->num_base_bdevs_discovered == 0) { 251 /* Free raid_bdev when there are no base bdevs left */ 252 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev base bdevs is 0, going to free all in destruct\n"); 253 raid_bdev_cleanup(raid_bdev); 254 } 255 256 return 0; 257 } 258 259 /* 260 * brief: 261 * raid_bdev_base_io_completion is the completion callback for member disk requests 262 * params: 263 * bdev_io - pointer to member disk requested bdev_io 264 * success - true if successful, false if unsuccessful 265 * cb_arg - callback argument (parent raid bdev_io) 266 * returns: 267 * none 268 */ 269 void 270 raid_bdev_base_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 271 { 272 struct spdk_bdev_io *parent_io = cb_arg; 273 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)parent_io->driver_ctx; 274 275 spdk_bdev_free_io(bdev_io); 276 277 if (!success) { 278 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; 279 } 280 281 raid_io->base_bdev_io_completed++; 282 if (raid_io->base_bdev_io_completed == raid_io->base_bdev_io_expected) { 283 spdk_bdev_io_complete(parent_io, raid_io->base_bdev_io_status); 284 } 285 } 286 287 /* 288 * brief: 289 * raid_bdev_queue_io_wait function processes the IO which failed to submit. 290 * It will try to queue the IOs after storing the context to bdev wait queue logic. 291 * params: 292 * raid_bdev_io - pointer to raid bdev_io 293 * pd_idx - base_dev index in raid_bdev 294 * cb_fn - callback when the spdk_bdev_io for base_bdev becomes available 295 * ret - return code 296 * returns: 297 * none 298 */ 299 void 300 raid_bdev_queue_io_wait(struct spdk_bdev_io *raid_bdev_io, uint8_t pd_idx, 301 spdk_bdev_io_wait_cb cb_fn, int ret) 302 { 303 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)raid_bdev_io->driver_ctx; 304 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(raid_io->ch); 305 struct raid_bdev *raid_bdev = (struct raid_bdev *)raid_bdev_io->bdev->ctxt; 306 307 assert(ret != 0); 308 309 if (ret == -ENOMEM) { 310 raid_io->waitq_entry.bdev = raid_bdev->base_bdev_info[pd_idx].bdev; 311 raid_io->waitq_entry.cb_fn = cb_fn; 312 raid_io->waitq_entry.cb_arg = raid_bdev_io; 313 spdk_bdev_queue_io_wait(raid_bdev->base_bdev_info[pd_idx].bdev, 314 raid_ch->base_channel[pd_idx], 315 &raid_io->waitq_entry); 316 return; 317 } 318 319 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 320 assert(false); 321 spdk_bdev_io_complete(raid_bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 322 } 323 324 /* 325 * brief: 326 * _raid_bdev_submit_reset_request_next function submits the next batch of reset requests 327 * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in 328 * which case it will queue it for later submission 329 * params: 330 * bdev_io - pointer to parent bdev_io on raid bdev device 331 * returns: 332 * none 333 */ 334 static void 335 _raid_bdev_submit_reset_request_next(void *_bdev_io) 336 { 337 struct spdk_bdev_io *bdev_io = _bdev_io; 338 struct raid_bdev_io *raid_io; 339 struct raid_bdev *raid_bdev; 340 struct raid_bdev_io_channel *raid_ch; 341 int ret; 342 uint8_t i; 343 344 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 345 raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 346 raid_ch = spdk_io_channel_get_ctx(raid_io->ch); 347 348 while (raid_io->base_bdev_io_submitted < raid_bdev->num_base_bdevs) { 349 i = raid_io->base_bdev_io_submitted; 350 ret = spdk_bdev_reset(raid_bdev->base_bdev_info[i].desc, 351 raid_ch->base_channel[i], 352 raid_bdev_base_io_completion, bdev_io); 353 if (ret == 0) { 354 raid_io->base_bdev_io_submitted++; 355 } else { 356 raid_bdev_queue_io_wait(bdev_io, i, 357 _raid_bdev_submit_reset_request_next, ret); 358 return; 359 } 360 } 361 } 362 363 /* 364 * brief: 365 * _raid_bdev_submit_reset_request function is the submit_request function for 366 * reset requests 367 * params: 368 * ch - pointer to raid bdev io channel 369 * bdev_io - pointer to parent bdev_io on raid bdev device 370 * returns: 371 * none 372 */ 373 static void 374 _raid_bdev_submit_reset_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 375 { 376 struct raid_bdev_io *raid_io; 377 struct raid_bdev *raid_bdev; 378 379 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 380 raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 381 raid_io->ch = ch; 382 raid_io->base_bdev_io_submitted = 0; 383 raid_io->base_bdev_io_completed = 0; 384 raid_io->base_bdev_io_expected = raid_bdev->num_base_bdevs; 385 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 386 _raid_bdev_submit_reset_request_next(bdev_io); 387 } 388 389 /* 390 * brief: 391 * _raid_bdev_submit_null_payload_request function is the submit_request function 392 * for io requests with range but without payload, like UNMAP and FLUSH. 393 * params: 394 * ch - pointer to raid bdev io channel 395 * bdev_io - pointer to parent bdev_io on raid bdev device 396 * returns: 397 * none 398 */ 399 static void 400 _raid_bdev_submit_null_payload_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 401 { 402 struct raid_bdev_io *raid_io; 403 404 raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 405 raid_io->ch = ch; 406 raid_io->base_bdev_io_submitted = 0; 407 raid_io->base_bdev_io_completed = 0; 408 raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 409 410 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev: type %d, range (0x%lx, 0x%lx)\n", 411 bdev_io->type, bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks); 412 413 raid0_submit_null_payload_request(bdev_io); 414 } 415 416 /* 417 * brief: 418 * Callback function to spdk_bdev_io_get_buf. 419 * params: 420 * ch - pointer to raid bdev io channel 421 * bdev_io - pointer to parent bdev_io on raid bdev device 422 * success - True if buffer is allocated or false otherwise. 423 * returns: 424 * none 425 */ 426 static void 427 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 428 bool success) 429 { 430 if (!success) { 431 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 432 return; 433 } 434 435 raid0_start_rw_request(ch, bdev_io); 436 } 437 438 /* 439 * brief: 440 * raid_bdev_submit_request function is the submit_request function pointer of 441 * raid bdev function table. This is used to submit the io on raid_bdev to below 442 * layers. 443 * params: 444 * ch - pointer to raid bdev io channel 445 * bdev_io - pointer to parent bdev_io on raid bdev device 446 * returns: 447 * none 448 */ 449 static void 450 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 451 { 452 switch (bdev_io->type) { 453 case SPDK_BDEV_IO_TYPE_READ: 454 spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb, 455 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 456 break; 457 case SPDK_BDEV_IO_TYPE_WRITE: 458 raid0_start_rw_request(ch, bdev_io); 459 break; 460 461 case SPDK_BDEV_IO_TYPE_RESET: 462 _raid_bdev_submit_reset_request(ch, bdev_io); 463 break; 464 465 case SPDK_BDEV_IO_TYPE_FLUSH: 466 case SPDK_BDEV_IO_TYPE_UNMAP: 467 _raid_bdev_submit_null_payload_request(ch, bdev_io); 468 break; 469 470 default: 471 SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type); 472 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 473 break; 474 } 475 476 } 477 478 /* 479 * brief: 480 * _raid_bdev_io_type_supported checks whether io_type is supported in 481 * all base bdev modules of raid bdev module. If anyone among the base_bdevs 482 * doesn't support, the raid device doesn't supports. 483 * 484 * params: 485 * raid_bdev - pointer to raid bdev context 486 * io_type - io type 487 * returns: 488 * true - io_type is supported 489 * false - io_type is not supported 490 */ 491 inline static bool 492 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type) 493 { 494 uint8_t i; 495 496 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 497 if (raid_bdev->base_bdev_info[i].bdev == NULL) { 498 assert(false); 499 continue; 500 } 501 502 if (spdk_bdev_io_type_supported(raid_bdev->base_bdev_info[i].bdev, 503 io_type) == false) { 504 return false; 505 } 506 } 507 508 return true; 509 } 510 511 /* 512 * brief: 513 * raid_bdev_io_type_supported is the io_supported function for bdev function 514 * table which returns whether the particular io type is supported or not by 515 * raid bdev module 516 * params: 517 * ctx - pointer to raid bdev context 518 * type - io type 519 * returns: 520 * true - io_type is supported 521 * false - io_type is not supported 522 */ 523 static bool 524 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 525 { 526 switch (io_type) { 527 case SPDK_BDEV_IO_TYPE_READ: 528 case SPDK_BDEV_IO_TYPE_WRITE: 529 return true; 530 531 case SPDK_BDEV_IO_TYPE_FLUSH: 532 case SPDK_BDEV_IO_TYPE_RESET: 533 case SPDK_BDEV_IO_TYPE_UNMAP: 534 return _raid_bdev_io_type_supported(ctx, io_type); 535 536 default: 537 return false; 538 } 539 540 return false; 541 } 542 543 /* 544 * brief: 545 * raid_bdev_get_io_channel is the get_io_channel function table pointer for 546 * raid bdev. This is used to return the io channel for this raid bdev 547 * params: 548 * ctxt - pointer to raid_bdev 549 * returns: 550 * pointer to io channel for raid bdev 551 */ 552 static struct spdk_io_channel * 553 raid_bdev_get_io_channel(void *ctxt) 554 { 555 struct raid_bdev *raid_bdev = ctxt; 556 557 return spdk_get_io_channel(raid_bdev); 558 } 559 560 /* 561 * brief: 562 * raid_bdev_dump_info_json is the function table pointer for raid bdev 563 * params: 564 * ctx - pointer to raid_bdev 565 * w - pointer to json context 566 * returns: 567 * 0 - success 568 * non zero - failure 569 */ 570 static int 571 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 572 { 573 struct raid_bdev *raid_bdev = ctx; 574 575 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_dump_config_json\n"); 576 assert(raid_bdev != NULL); 577 578 /* Dump the raid bdev configuration related information */ 579 spdk_json_write_named_object_begin(w, "raid"); 580 spdk_json_write_named_uint32(w, "strip_size", raid_bdev->strip_size); 581 spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb); 582 spdk_json_write_named_uint32(w, "state", raid_bdev->state); 583 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 584 spdk_json_write_named_uint32(w, "destruct_called", raid_bdev->destruct_called); 585 spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs); 586 spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered); 587 spdk_json_write_name(w, "base_bdevs_list"); 588 spdk_json_write_array_begin(w); 589 for (uint8_t i = 0; i < raid_bdev->num_base_bdevs; i++) { 590 if (raid_bdev->base_bdev_info[i].bdev) { 591 spdk_json_write_string(w, raid_bdev->base_bdev_info[i].bdev->name); 592 } else { 593 spdk_json_write_null(w); 594 } 595 } 596 spdk_json_write_array_end(w); 597 spdk_json_write_object_end(w); 598 599 return 0; 600 } 601 602 /* 603 * brief: 604 * raid_bdev_write_config_json is the function table pointer for raid bdev 605 * params: 606 * bdev - pointer to spdk_bdev 607 * w - pointer to json context 608 * returns: 609 * none 610 */ 611 static void 612 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 613 { 614 struct raid_bdev *raid_bdev = bdev->ctxt; 615 struct spdk_bdev *base; 616 uint8_t i; 617 618 spdk_json_write_object_begin(w); 619 620 spdk_json_write_named_string(w, "method", "bdev_raid_create"); 621 622 spdk_json_write_named_object_begin(w, "params"); 623 spdk_json_write_named_string(w, "name", bdev->name); 624 spdk_json_write_named_uint32(w, "strip_size", raid_bdev->strip_size_kb); 625 spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level)); 626 627 spdk_json_write_named_array_begin(w, "base_bdevs"); 628 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 629 base = raid_bdev->base_bdev_info[i].bdev; 630 if (base) { 631 spdk_json_write_string(w, base->name); 632 } 633 } 634 spdk_json_write_array_end(w); 635 spdk_json_write_object_end(w); 636 637 spdk_json_write_object_end(w); 638 } 639 640 /* g_raid_bdev_fn_table is the function table for raid bdev */ 641 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = { 642 .destruct = raid_bdev_destruct, 643 .submit_request = raid_bdev_submit_request, 644 .io_type_supported = raid_bdev_io_type_supported, 645 .get_io_channel = raid_bdev_get_io_channel, 646 .dump_info_json = raid_bdev_dump_info_json, 647 .write_config_json = raid_bdev_write_config_json, 648 }; 649 650 /* 651 * brief: 652 * raid_bdev_config_cleanup function is used to free memory for one raid_bdev in configuration 653 * params: 654 * raid_cfg - pointer to raid_bdev_config structure 655 * returns: 656 * none 657 */ 658 void 659 raid_bdev_config_cleanup(struct raid_bdev_config *raid_cfg) 660 { 661 uint8_t i; 662 663 TAILQ_REMOVE(&g_raid_config.raid_bdev_config_head, raid_cfg, link); 664 g_raid_config.total_raid_bdev--; 665 666 if (raid_cfg->base_bdev) { 667 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 668 free(raid_cfg->base_bdev[i].name); 669 } 670 free(raid_cfg->base_bdev); 671 } 672 free(raid_cfg->name); 673 free(raid_cfg); 674 } 675 676 /* 677 * brief: 678 * raid_bdev_free is the raid bdev function table function pointer. This is 679 * called on bdev free path 680 * params: 681 * none 682 * returns: 683 * none 684 */ 685 static void 686 raid_bdev_free(void) 687 { 688 struct raid_bdev_config *raid_cfg, *tmp; 689 690 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_free\n"); 691 TAILQ_FOREACH_SAFE(raid_cfg, &g_raid_config.raid_bdev_config_head, link, tmp) { 692 raid_bdev_config_cleanup(raid_cfg); 693 } 694 } 695 696 /* brief 697 * raid_bdev_config_find_by_name is a helper function to find raid bdev config 698 * by name as key. 699 * 700 * params: 701 * raid_name - name for raid bdev. 702 */ 703 struct raid_bdev_config * 704 raid_bdev_config_find_by_name(const char *raid_name) 705 { 706 struct raid_bdev_config *raid_cfg; 707 708 TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) { 709 if (!strcmp(raid_cfg->name, raid_name)) { 710 return raid_cfg; 711 } 712 } 713 714 return raid_cfg; 715 } 716 717 /* 718 * brief 719 * raid_bdev_config_add function adds config for newly created raid bdev. 720 * 721 * params: 722 * raid_name - name for raid bdev. 723 * strip_size - strip size in KB 724 * num_base_bdevs - number of base bdevs. 725 * level - raid level, only raid level 0 is supported. 726 * _raid_cfg - Pointer to newly added configuration 727 */ 728 int 729 raid_bdev_config_add(const char *raid_name, uint32_t strip_size, uint8_t num_base_bdevs, 730 enum raid_level level, struct raid_bdev_config **_raid_cfg) 731 { 732 struct raid_bdev_config *raid_cfg; 733 734 raid_cfg = raid_bdev_config_find_by_name(raid_name); 735 if (raid_cfg != NULL) { 736 SPDK_ERRLOG("Duplicate raid bdev name found in config file %s\n", 737 raid_name); 738 return -EEXIST; 739 } 740 741 if (spdk_u32_is_pow2(strip_size) == false) { 742 SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size); 743 return -EINVAL; 744 } 745 746 if (num_base_bdevs == 0) { 747 SPDK_ERRLOG("Invalid base device count %u\n", num_base_bdevs); 748 return -EINVAL; 749 } 750 751 if (level != RAID0) { 752 SPDK_ERRLOG("invalid raid level %u, only raid level 0 is supported\n", 753 level); 754 return -EINVAL; 755 } 756 757 raid_cfg = calloc(1, sizeof(*raid_cfg)); 758 if (raid_cfg == NULL) { 759 SPDK_ERRLOG("unable to allocate memory\n"); 760 return -ENOMEM; 761 } 762 763 raid_cfg->name = strdup(raid_name); 764 if (!raid_cfg->name) { 765 free(raid_cfg); 766 SPDK_ERRLOG("unable to allocate memory\n"); 767 return -ENOMEM; 768 } 769 raid_cfg->strip_size = strip_size; 770 raid_cfg->num_base_bdevs = num_base_bdevs; 771 raid_cfg->level = level; 772 773 raid_cfg->base_bdev = calloc(num_base_bdevs, sizeof(*raid_cfg->base_bdev)); 774 if (raid_cfg->base_bdev == NULL) { 775 free(raid_cfg->name); 776 free(raid_cfg); 777 SPDK_ERRLOG("unable to allocate memory\n"); 778 return -ENOMEM; 779 } 780 781 TAILQ_INSERT_TAIL(&g_raid_config.raid_bdev_config_head, raid_cfg, link); 782 g_raid_config.total_raid_bdev++; 783 784 *_raid_cfg = raid_cfg; 785 return 0; 786 } 787 788 /* 789 * brief: 790 * raid_bdev_config_add_base_bdev function add base bdev to raid bdev config. 791 * 792 * params: 793 * raid_cfg - pointer to raid bdev configuration 794 * base_bdev_name - name of base bdev 795 * slot - Position to add base bdev 796 */ 797 int 798 raid_bdev_config_add_base_bdev(struct raid_bdev_config *raid_cfg, const char *base_bdev_name, 799 uint8_t slot) 800 { 801 uint8_t i; 802 struct raid_bdev_config *tmp; 803 804 if (slot >= raid_cfg->num_base_bdevs) { 805 return -EINVAL; 806 } 807 808 TAILQ_FOREACH(tmp, &g_raid_config.raid_bdev_config_head, link) { 809 for (i = 0; i < tmp->num_base_bdevs; i++) { 810 if (tmp->base_bdev[i].name != NULL) { 811 if (!strcmp(tmp->base_bdev[i].name, base_bdev_name)) { 812 SPDK_ERRLOG("duplicate base bdev name %s mentioned\n", 813 base_bdev_name); 814 return -EEXIST; 815 } 816 } 817 } 818 } 819 820 raid_cfg->base_bdev[slot].name = strdup(base_bdev_name); 821 if (raid_cfg->base_bdev[slot].name == NULL) { 822 SPDK_ERRLOG("unable to allocate memory\n"); 823 return -ENOMEM; 824 } 825 826 return 0; 827 } 828 829 static struct { 830 const char *name; 831 enum raid_level value; 832 } g_raid_level_names[] = { 833 { "raid0", RAID0 }, 834 { "0", RAID0 }, 835 { } 836 }; 837 838 enum raid_level raid_bdev_parse_raid_level(const char *str) 839 { 840 unsigned int i; 841 842 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 843 if (strcasecmp(g_raid_level_names[i].name, str) == 0) { 844 return g_raid_level_names[i].value; 845 } 846 } 847 848 return INVALID_RAID_LEVEL; 849 } 850 851 const char * 852 raid_bdev_level_to_str(enum raid_level level) 853 { 854 unsigned int i; 855 856 for (i = 0; g_raid_level_names[i].name != NULL; i++) { 857 if (g_raid_level_names[i].value == level) { 858 return g_raid_level_names[i].name; 859 } 860 } 861 862 return ""; 863 } 864 865 /* 866 * brief: 867 * raid_bdev_parse_raid is used to parse the raid bdev from config file based on 868 * pre-defined raid bdev format in config file. 869 * Format of config file: 870 * [RAID1] 871 * Name raid1 872 * StripSize 64 873 * NumDevices 2 874 * RaidLevel 0 875 * Devices Nvme0n1 Nvme1n1 876 * 877 * [RAID2] 878 * Name raid2 879 * StripSize 64 880 * NumDevices 3 881 * RaidLevel 0 882 * Devices Nvme2n1 Nvme3n1 Nvme4n1 883 * 884 * params: 885 * conf_section - pointer to config section 886 * returns: 887 * 0 - success 888 * non zero - failure 889 */ 890 static int 891 raid_bdev_parse_raid(struct spdk_conf_section *conf_section) 892 { 893 const char *raid_name; 894 uint32_t strip_size; 895 uint8_t num_base_bdevs; 896 const char *raid_level_str; 897 enum raid_level level; 898 const char *base_bdev_name; 899 struct raid_bdev_config *raid_cfg; 900 int rc, i, val; 901 902 raid_name = spdk_conf_section_get_val(conf_section, "Name"); 903 if (raid_name == NULL) { 904 SPDK_ERRLOG("raid_name is null\n"); 905 return -EINVAL; 906 } 907 908 val = spdk_conf_section_get_intval(conf_section, "StripSize"); 909 if (val < 0) { 910 return -EINVAL; 911 } 912 strip_size = val; 913 914 val = spdk_conf_section_get_intval(conf_section, "NumDevices"); 915 if (val < 0) { 916 return -EINVAL; 917 } 918 num_base_bdevs = val; 919 920 raid_level_str = spdk_conf_section_get_val(conf_section, "RaidLevel"); 921 if (raid_level_str == NULL) { 922 SPDK_ERRLOG("Missing RaidLevel\n"); 923 return -EINVAL; 924 } 925 level = raid_bdev_parse_raid_level(raid_level_str); 926 if (level == INVALID_RAID_LEVEL) { 927 SPDK_ERRLOG("Invalid RaidLevel\n"); 928 return -EINVAL; 929 } 930 931 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "%s %" PRIu32 " %u %u\n", 932 raid_name, strip_size, num_base_bdevs, level); 933 934 rc = raid_bdev_config_add(raid_name, strip_size, num_base_bdevs, level, 935 &raid_cfg); 936 if (rc != 0) { 937 SPDK_ERRLOG("Failed to add raid bdev config\n"); 938 return rc; 939 } 940 941 for (i = 0; true; i++) { 942 base_bdev_name = spdk_conf_section_get_nmval(conf_section, "Devices", 0, i); 943 if (base_bdev_name == NULL) { 944 break; 945 } 946 if (i >= num_base_bdevs) { 947 raid_bdev_config_cleanup(raid_cfg); 948 SPDK_ERRLOG("Number of devices mentioned is more than count\n"); 949 return -EINVAL; 950 } 951 952 rc = raid_bdev_config_add_base_bdev(raid_cfg, base_bdev_name, i); 953 if (rc != 0) { 954 raid_bdev_config_cleanup(raid_cfg); 955 SPDK_ERRLOG("Failed to add base bdev to raid bdev config\n"); 956 return rc; 957 } 958 } 959 960 if (i != raid_cfg->num_base_bdevs) { 961 raid_bdev_config_cleanup(raid_cfg); 962 SPDK_ERRLOG("Number of devices mentioned is less than count\n"); 963 return -EINVAL; 964 } 965 966 rc = raid_bdev_create(raid_cfg); 967 if (rc != 0) { 968 raid_bdev_config_cleanup(raid_cfg); 969 SPDK_ERRLOG("Failed to create raid bdev\n"); 970 return rc; 971 } 972 973 rc = raid_bdev_add_base_devices(raid_cfg); 974 if (rc != 0) { 975 SPDK_ERRLOG("Failed to add any base bdev to raid bdev\n"); 976 /* Config is not removed in this case. */ 977 } 978 979 return 0; 980 } 981 982 /* 983 * brief: 984 * raid_bdev_parse_config is used to find the raid bdev config section and parse it 985 * Format of config file: 986 * params: 987 * none 988 * returns: 989 * 0 - success 990 * non zero - failure 991 */ 992 static int 993 raid_bdev_parse_config(void) 994 { 995 int ret; 996 struct spdk_conf_section *conf_section; 997 998 conf_section = spdk_conf_first_section(NULL); 999 while (conf_section != NULL) { 1000 if (spdk_conf_section_match_prefix(conf_section, "RAID")) { 1001 ret = raid_bdev_parse_raid(conf_section); 1002 if (ret < 0) { 1003 SPDK_ERRLOG("Unable to parse raid bdev section\n"); 1004 return ret; 1005 } 1006 } 1007 conf_section = spdk_conf_next_section(conf_section); 1008 } 1009 1010 return 0; 1011 } 1012 1013 /* 1014 * brief: 1015 * raid_bdev_fini_start is called when bdev layer is starting the 1016 * shutdown process 1017 * params: 1018 * none 1019 * returns: 1020 * none 1021 */ 1022 static void 1023 raid_bdev_fini_start(void) 1024 { 1025 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_fini_start\n"); 1026 g_shutdown_started = true; 1027 } 1028 1029 /* 1030 * brief: 1031 * raid_bdev_exit is called on raid bdev module exit time by bdev layer 1032 * params: 1033 * none 1034 * returns: 1035 * none 1036 */ 1037 static void 1038 raid_bdev_exit(void) 1039 { 1040 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_exit\n"); 1041 raid_bdev_free(); 1042 } 1043 1044 /* 1045 * brief: 1046 * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid 1047 * module 1048 * params: 1049 * none 1050 * returns: 1051 * size of spdk_bdev_io context for raid 1052 */ 1053 static int 1054 raid_bdev_get_ctx_size(void) 1055 { 1056 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_get_ctx_size\n"); 1057 return sizeof(struct raid_bdev_io); 1058 } 1059 1060 /* 1061 * brief: 1062 * raid_bdev_get_running_config is used to get the configuration options. 1063 * 1064 * params: 1065 * fp - The pointer to a file that will be written to the configuration options. 1066 * returns: 1067 * none 1068 */ 1069 static void 1070 raid_bdev_get_running_config(FILE *fp) 1071 { 1072 struct raid_bdev *raid_bdev; 1073 struct spdk_bdev *base; 1074 int index = 1; 1075 uint8_t i; 1076 1077 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_configured_list, state_link) { 1078 fprintf(fp, 1079 "\n" 1080 "[RAID%d]\n" 1081 " Name %s\n" 1082 " StripSize %" PRIu32 "\n" 1083 " NumDevices %u\n" 1084 " RaidLevel %s\n", 1085 index, raid_bdev->bdev.name, raid_bdev->strip_size_kb, 1086 raid_bdev->num_base_bdevs, 1087 raid_bdev_level_to_str(raid_bdev->level)); 1088 fprintf(fp, 1089 " Devices "); 1090 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1091 base = raid_bdev->base_bdev_info[i].bdev; 1092 if (base) { 1093 fprintf(fp, 1094 "%s ", 1095 base->name); 1096 } 1097 } 1098 fprintf(fp, 1099 "\n"); 1100 index++; 1101 } 1102 } 1103 1104 /* 1105 * brief: 1106 * raid_bdev_can_claim_bdev is the function to check if this base_bdev can be 1107 * claimed by raid bdev or not. 1108 * params: 1109 * bdev_name - represents base bdev name 1110 * _raid_cfg - pointer to raid bdev config parsed from config file 1111 * base_bdev_slot - if bdev can be claimed, it represents the base_bdev correct 1112 * slot. This field is only valid if return value of this function is true 1113 * returns: 1114 * true - if bdev can be claimed 1115 * false - if bdev can't be claimed 1116 */ 1117 static bool 1118 raid_bdev_can_claim_bdev(const char *bdev_name, struct raid_bdev_config **_raid_cfg, 1119 uint8_t *base_bdev_slot) 1120 { 1121 struct raid_bdev_config *raid_cfg; 1122 uint8_t i; 1123 1124 TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) { 1125 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 1126 /* 1127 * Check if the base bdev name is part of raid bdev configuration. 1128 * If match is found then return true and the slot information where 1129 * this base bdev should be inserted in raid bdev 1130 */ 1131 if (!strcmp(bdev_name, raid_cfg->base_bdev[i].name)) { 1132 *_raid_cfg = raid_cfg; 1133 *base_bdev_slot = i; 1134 return true; 1135 } 1136 } 1137 } 1138 1139 return false; 1140 } 1141 1142 1143 static struct spdk_bdev_module g_raid_if = { 1144 .name = "raid", 1145 .module_init = raid_bdev_init, 1146 .fini_start = raid_bdev_fini_start, 1147 .module_fini = raid_bdev_exit, 1148 .get_ctx_size = raid_bdev_get_ctx_size, 1149 .examine_config = raid_bdev_examine, 1150 .config_text = raid_bdev_get_running_config, 1151 .async_init = false, 1152 .async_fini = false, 1153 }; 1154 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if) 1155 1156 /* 1157 * brief: 1158 * raid_bdev_init is the initialization function for raid bdev module 1159 * params: 1160 * none 1161 * returns: 1162 * 0 - success 1163 * non zero - failure 1164 */ 1165 static int 1166 raid_bdev_init(void) 1167 { 1168 int ret; 1169 1170 /* Parse config file for raids */ 1171 ret = raid_bdev_parse_config(); 1172 if (ret < 0) { 1173 SPDK_ERRLOG("raid bdev init failed parsing\n"); 1174 raid_bdev_free(); 1175 return ret; 1176 } 1177 1178 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_init completed successfully\n"); 1179 1180 return 0; 1181 } 1182 1183 /* 1184 * brief: 1185 * raid_bdev_create allocates raid bdev based on passed configuration 1186 * params: 1187 * raid_cfg - configuration of raid bdev 1188 * returns: 1189 * 0 - success 1190 * non zero - failure 1191 */ 1192 int 1193 raid_bdev_create(struct raid_bdev_config *raid_cfg) 1194 { 1195 struct raid_bdev *raid_bdev; 1196 struct spdk_bdev *raid_bdev_gen; 1197 1198 raid_bdev = calloc(1, sizeof(*raid_bdev)); 1199 if (!raid_bdev) { 1200 SPDK_ERRLOG("Unable to allocate memory for raid bdev\n"); 1201 return -ENOMEM; 1202 } 1203 1204 assert(raid_cfg->num_base_bdevs != 0); 1205 raid_bdev->num_base_bdevs = raid_cfg->num_base_bdevs; 1206 raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, 1207 sizeof(struct raid_base_bdev_info)); 1208 if (!raid_bdev->base_bdev_info) { 1209 SPDK_ERRLOG("Unable able to allocate base bdev info\n"); 1210 free(raid_bdev); 1211 return -ENOMEM; 1212 } 1213 1214 /* strip_size_kb is from the rpc param. strip_size is in blocks and used 1215 * intnerally and set later. 1216 */ 1217 raid_bdev->strip_size = 0; 1218 raid_bdev->strip_size_kb = raid_cfg->strip_size; 1219 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1220 raid_bdev->config = raid_cfg; 1221 raid_bdev->level = raid_cfg->level; 1222 1223 switch (raid_bdev->level) { 1224 case RAID0: 1225 break; 1226 default: 1227 SPDK_ERRLOG("invalid raid level %u\n", raid_bdev->level); 1228 free(raid_bdev); 1229 return -EINVAL; 1230 } 1231 1232 raid_bdev_gen = &raid_bdev->bdev; 1233 1234 raid_bdev_gen->name = strdup(raid_cfg->name); 1235 if (!raid_bdev_gen->name) { 1236 SPDK_ERRLOG("Unable to allocate name for raid\n"); 1237 free(raid_bdev->base_bdev_info); 1238 free(raid_bdev); 1239 return -ENOMEM; 1240 } 1241 1242 raid_bdev_gen->product_name = "Raid Volume"; 1243 raid_bdev_gen->ctxt = raid_bdev; 1244 raid_bdev_gen->fn_table = &g_raid_bdev_fn_table; 1245 raid_bdev_gen->module = &g_raid_if; 1246 raid_bdev_gen->write_cache = 0; 1247 1248 TAILQ_INSERT_TAIL(&g_raid_bdev_configuring_list, raid_bdev, state_link); 1249 TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link); 1250 1251 raid_cfg->raid_bdev = raid_bdev; 1252 1253 return 0; 1254 } 1255 1256 /* 1257 * brief 1258 * raid_bdev_alloc_base_bdev_resource allocates resource of base bdev. 1259 * params: 1260 * raid_bdev - pointer to raid bdev 1261 * bdev - pointer to base bdev 1262 * base_bdev_slot - position to add base bdev 1263 * returns: 1264 * 0 - success 1265 * non zero - failure 1266 */ 1267 static int 1268 raid_bdev_alloc_base_bdev_resource(struct raid_bdev *raid_bdev, struct spdk_bdev *bdev, 1269 uint8_t base_bdev_slot) 1270 { 1271 struct spdk_bdev_desc *desc; 1272 int rc; 1273 1274 rc = spdk_bdev_open(bdev, true, raid_bdev_remove_base_bdev, bdev, &desc); 1275 if (rc != 0) { 1276 SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", bdev->name); 1277 return rc; 1278 } 1279 1280 rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if); 1281 if (rc != 0) { 1282 SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); 1283 spdk_bdev_close(desc); 1284 return rc; 1285 } 1286 1287 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "bdev %s is claimed\n", bdev->name); 1288 1289 assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); 1290 assert(base_bdev_slot < raid_bdev->num_base_bdevs); 1291 1292 raid_bdev->base_bdev_info[base_bdev_slot].bdev = bdev; 1293 raid_bdev->base_bdev_info[base_bdev_slot].desc = desc; 1294 raid_bdev->num_base_bdevs_discovered++; 1295 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1296 1297 return 0; 1298 } 1299 1300 /* 1301 * brief: 1302 * If raid bdev config is complete, then only register the raid bdev to 1303 * bdev layer and remove this raid bdev from configuring list and 1304 * insert the raid bdev to configured list 1305 * params: 1306 * raid_bdev - pointer to raid bdev 1307 * returns: 1308 * 0 - success 1309 * non zero - failure 1310 */ 1311 static int 1312 raid_bdev_configure(struct raid_bdev *raid_bdev) 1313 { 1314 uint32_t blocklen; 1315 uint64_t min_blockcnt; 1316 struct spdk_bdev *raid_bdev_gen; 1317 int rc = 0; 1318 1319 blocklen = raid_bdev->base_bdev_info[0].bdev->blocklen; 1320 min_blockcnt = raid_bdev->base_bdev_info[0].bdev->blockcnt; 1321 for (uint8_t i = 1; i < raid_bdev->num_base_bdevs; i++) { 1322 /* Calculate minimum block count from all base bdevs */ 1323 if (raid_bdev->base_bdev_info[i].bdev->blockcnt < min_blockcnt) { 1324 min_blockcnt = raid_bdev->base_bdev_info[i].bdev->blockcnt; 1325 } 1326 1327 /* Check blocklen for all base bdevs that it should be same */ 1328 if (blocklen != raid_bdev->base_bdev_info[i].bdev->blocklen) { 1329 /* 1330 * Assumption is that all the base bdevs for any raid bdev should 1331 * have same blocklen 1332 */ 1333 SPDK_ERRLOG("Blocklen of various bdevs not matching\n"); 1334 return -EINVAL; 1335 } 1336 } 1337 1338 /* The strip_size_kb is read in from user in KB. Convert to blocks here for 1339 * internal use. 1340 */ 1341 raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / blocklen; 1342 raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size); 1343 raid_bdev->blocklen_shift = spdk_u32log2(blocklen); 1344 1345 raid_bdev_gen = &raid_bdev->bdev; 1346 raid_bdev_gen->blocklen = blocklen; 1347 if (raid_bdev->num_base_bdevs > 1) { 1348 raid_bdev_gen->optimal_io_boundary = raid_bdev->strip_size; 1349 raid_bdev_gen->split_on_optimal_io_boundary = true; 1350 } else { 1351 /* Do not need to split reads/writes on single bdev RAID modules. */ 1352 raid_bdev_gen->optimal_io_boundary = 0; 1353 raid_bdev_gen->split_on_optimal_io_boundary = false; 1354 } 1355 1356 /* 1357 * RAID bdev logic is for striping so take the minimum block count based 1358 * approach where total block count of raid bdev is the number of base 1359 * bdev times the minimum block count of any base bdev 1360 */ 1361 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "min blockcount %lu, numbasedev %u, strip size shift %u\n", 1362 min_blockcnt, 1363 raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift); 1364 raid_bdev_gen->blockcnt = ((min_blockcnt >> raid_bdev->strip_size_shift) << 1365 raid_bdev->strip_size_shift) * raid_bdev->num_base_bdevs; 1366 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "io device register %p\n", raid_bdev); 1367 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "blockcnt %lu, blocklen %u\n", raid_bdev_gen->blockcnt, 1368 raid_bdev_gen->blocklen); 1369 if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 1370 raid_bdev->state = RAID_BDEV_STATE_ONLINE; 1371 spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb, 1372 sizeof(struct raid_bdev_io_channel), 1373 raid_bdev->bdev.name); 1374 rc = spdk_bdev_register(raid_bdev_gen); 1375 if (rc != 0) { 1376 SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n"); 1377 spdk_io_device_unregister(raid_bdev, NULL); 1378 raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; 1379 return rc; 1380 } 1381 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev generic %p\n", raid_bdev_gen); 1382 TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link); 1383 TAILQ_INSERT_TAIL(&g_raid_bdev_configured_list, raid_bdev, state_link); 1384 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev is created with name %s, raid_bdev %p\n", 1385 raid_bdev_gen->name, raid_bdev); 1386 } 1387 1388 return 0; 1389 } 1390 1391 /* 1392 * brief: 1393 * If raid bdev is online and registered, change the bdev state to 1394 * configuring and unregister this raid device. Queue this raid device 1395 * in configuring list 1396 * params: 1397 * raid_bdev - pointer to raid bdev 1398 * cb_fn - callback function 1399 * cb_arg - argument to callback function 1400 * returns: 1401 * none 1402 */ 1403 static void 1404 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, 1405 void *cb_arg) 1406 { 1407 if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) { 1408 if (cb_fn) { 1409 cb_fn(cb_arg, 0); 1410 } 1411 return; 1412 } 1413 1414 assert(raid_bdev->num_base_bdevs == raid_bdev->num_base_bdevs_discovered); 1415 TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link); 1416 raid_bdev->state = RAID_BDEV_STATE_OFFLINE; 1417 assert(raid_bdev->num_base_bdevs_discovered); 1418 TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link); 1419 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev state chaning from online to offline\n"); 1420 1421 spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg); 1422 } 1423 1424 /* 1425 * brief: 1426 * raid_bdev_find_by_base_bdev function finds the raid bdev which has 1427 * claimed the base bdev. 1428 * params: 1429 * base_bdev - pointer to base bdev pointer 1430 * _raid_bdev - Referenct to pointer to raid bdev 1431 * _base_bdev_slot - Reference to the slot of the base bdev. 1432 * returns: 1433 * true - if the raid bdev is found. 1434 * false - if the raid bdev is not found. 1435 */ 1436 static bool 1437 raid_bdev_find_by_base_bdev(struct spdk_bdev *base_bdev, struct raid_bdev **_raid_bdev, 1438 uint8_t *_base_bdev_slot) 1439 { 1440 struct raid_bdev *raid_bdev; 1441 uint8_t i; 1442 1443 TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { 1444 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1445 if (raid_bdev->base_bdev_info[i].bdev == base_bdev) { 1446 *_raid_bdev = raid_bdev; 1447 *_base_bdev_slot = i; 1448 return true; 1449 } 1450 } 1451 } 1452 1453 return false; 1454 } 1455 1456 /* 1457 * brief: 1458 * raid_bdev_remove_base_bdev function is called by below layers when base_bdev 1459 * is removed. This function checks if this base bdev is part of any raid bdev 1460 * or not. If yes, it takes necessary action on that particular raid bdev. 1461 * params: 1462 * ctx - pointer to base bdev pointer which got removed 1463 * returns: 1464 * none 1465 */ 1466 static void 1467 raid_bdev_remove_base_bdev(void *ctx) 1468 { 1469 struct spdk_bdev *base_bdev = ctx; 1470 struct raid_bdev *raid_bdev = NULL; 1471 uint8_t base_bdev_slot = 0; 1472 1473 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_remove_base_bdev\n"); 1474 1475 /* Find the raid_bdev which has claimed this base_bdev */ 1476 if (!raid_bdev_find_by_base_bdev(base_bdev, &raid_bdev, &base_bdev_slot)) { 1477 SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); 1478 return; 1479 } 1480 1481 assert(raid_bdev->base_bdev_info[base_bdev_slot].desc); 1482 raid_bdev->base_bdev_info[base_bdev_slot].remove_scheduled = true; 1483 1484 if (raid_bdev->destruct_called == true || 1485 raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 1486 /* 1487 * As raid bdev is not registered yet or already unregistered, 1488 * so cleanup should be done here itself. 1489 */ 1490 raid_bdev_free_base_bdev_resource(raid_bdev, base_bdev_slot); 1491 if (raid_bdev->num_base_bdevs_discovered == 0) { 1492 /* There is no base bdev for this raid, so free the raid device. */ 1493 raid_bdev_cleanup(raid_bdev); 1494 return; 1495 } 1496 } 1497 1498 raid_bdev_deconfigure(raid_bdev, NULL, NULL); 1499 } 1500 1501 /* 1502 * brief: 1503 * Remove base bdevs from the raid bdev one by one. Skip any base bdev which 1504 * doesn't exist. 1505 * params: 1506 * raid_cfg - pointer to raid bdev config. 1507 * cb_fn - callback function 1508 * cb_ctx - argument to callback function 1509 */ 1510 void 1511 raid_bdev_remove_base_devices(struct raid_bdev_config *raid_cfg, 1512 raid_bdev_destruct_cb cb_fn, void *cb_arg) 1513 { 1514 struct raid_bdev *raid_bdev; 1515 struct raid_base_bdev_info *info; 1516 uint8_t i; 1517 1518 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_remove_base_devices\n"); 1519 1520 raid_bdev = raid_cfg->raid_bdev; 1521 if (raid_bdev == NULL) { 1522 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev %s doesn't exist now\n", raid_cfg->name); 1523 if (cb_fn) { 1524 cb_fn(cb_arg, 0); 1525 } 1526 return; 1527 } 1528 1529 if (raid_bdev->destroy_started) { 1530 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "destroying raid bdev %s is already started\n", 1531 raid_cfg->name); 1532 if (cb_fn) { 1533 cb_fn(cb_arg, -EALREADY); 1534 } 1535 return; 1536 } 1537 1538 raid_bdev->destroy_started = true; 1539 1540 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 1541 info = &raid_bdev->base_bdev_info[i]; 1542 1543 if (info->bdev == NULL) { 1544 continue; 1545 } 1546 1547 assert(info->desc); 1548 info->remove_scheduled = true; 1549 1550 if (raid_bdev->destruct_called == true || 1551 raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { 1552 /* 1553 * As raid bdev is not registered yet or already unregistered, 1554 * so cleanup should be done here itself. 1555 */ 1556 raid_bdev_free_base_bdev_resource(raid_bdev, i); 1557 if (raid_bdev->num_base_bdevs_discovered == 0) { 1558 /* There is no base bdev for this raid, so free the raid device. */ 1559 raid_bdev_cleanup(raid_bdev); 1560 if (cb_fn) { 1561 cb_fn(cb_arg, 0); 1562 } 1563 return; 1564 } 1565 } 1566 } 1567 1568 raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg); 1569 } 1570 1571 /* 1572 * brief: 1573 * raid_bdev_add_base_device function is the actual function which either adds 1574 * the nvme base device to existing raid bdev or create a new raid bdev. It also claims 1575 * the base device and keep the open descriptor. 1576 * params: 1577 * raid_cfg - pointer to raid bdev config 1578 * bdev - pointer to base bdev 1579 * base_bdev_slot - position to add base bdev 1580 * returns: 1581 * 0 - success 1582 * non zero - failure 1583 */ 1584 static int 1585 raid_bdev_add_base_device(struct raid_bdev_config *raid_cfg, struct spdk_bdev *bdev, 1586 uint8_t base_bdev_slot) 1587 { 1588 struct raid_bdev *raid_bdev; 1589 int rc; 1590 1591 raid_bdev = raid_cfg->raid_bdev; 1592 if (!raid_bdev) { 1593 SPDK_ERRLOG("Raid bdev '%s' is not created yet\n", raid_cfg->name); 1594 return -ENODEV; 1595 } 1596 1597 rc = raid_bdev_alloc_base_bdev_resource(raid_bdev, bdev, base_bdev_slot); 1598 if (rc != 0) { 1599 SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", bdev->name); 1600 return rc; 1601 } 1602 1603 assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); 1604 1605 if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) { 1606 rc = raid_bdev_configure(raid_bdev); 1607 if (rc != 0) { 1608 SPDK_ERRLOG("Failed to configure raid bdev\n"); 1609 return rc; 1610 } 1611 } 1612 1613 return 0; 1614 } 1615 1616 /* 1617 * brief: 1618 * Add base bdevs to the raid bdev one by one. Skip any base bdev which doesn't 1619 * exist or fails to add. If all base bdevs are successfully added, the raid bdev 1620 * moves to the configured state and becomes available. Otherwise, the raid bdev 1621 * stays at the configuring state with added base bdevs. 1622 * params: 1623 * raid_cfg - pointer to raid bdev config 1624 * returns: 1625 * 0 - The raid bdev moves to the configured state or stays at the configuring 1626 * state with added base bdevs due to any nonexistent base bdev. 1627 * non zero - Failed to add any base bdev and stays at the configuring state with 1628 * added base bdevs. 1629 */ 1630 int 1631 raid_bdev_add_base_devices(struct raid_bdev_config *raid_cfg) 1632 { 1633 struct spdk_bdev *base_bdev; 1634 uint8_t i; 1635 int rc = 0, _rc; 1636 1637 for (i = 0; i < raid_cfg->num_base_bdevs; i++) { 1638 base_bdev = spdk_bdev_get_by_name(raid_cfg->base_bdev[i].name); 1639 if (base_bdev == NULL) { 1640 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "base bdev %s doesn't exist now\n", 1641 raid_cfg->base_bdev[i].name); 1642 continue; 1643 } 1644 1645 _rc = raid_bdev_add_base_device(raid_cfg, base_bdev, i); 1646 if (_rc != 0) { 1647 SPDK_ERRLOG("Failed to add base bdev %s to RAID bdev %s: %s\n", 1648 raid_cfg->base_bdev[i].name, raid_cfg->name, 1649 spdk_strerror(-_rc)); 1650 if (rc == 0) { 1651 rc = _rc; 1652 } 1653 } 1654 } 1655 1656 return rc; 1657 } 1658 1659 /* 1660 * brief: 1661 * raid_bdev_examine function is the examine function call by the below layers 1662 * like bdev_nvme layer. This function will check if this base bdev can be 1663 * claimed by this raid bdev or not. 1664 * params: 1665 * bdev - pointer to base bdev 1666 * returns: 1667 * none 1668 */ 1669 static void 1670 raid_bdev_examine(struct spdk_bdev *bdev) 1671 { 1672 struct raid_bdev_config *raid_cfg; 1673 uint8_t base_bdev_slot; 1674 1675 if (raid_bdev_can_claim_bdev(bdev->name, &raid_cfg, &base_bdev_slot)) { 1676 raid_bdev_add_base_device(raid_cfg, bdev, base_bdev_slot); 1677 } else { 1678 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "bdev %s can't be claimed\n", 1679 bdev->name); 1680 } 1681 1682 spdk_bdev_module_examine_done(&g_raid_if); 1683 } 1684 1685 /* Log component for bdev raid bdev module */ 1686 SPDK_LOG_REGISTER_COMPONENT("bdev_raid", SPDK_LOG_BDEV_RAID) 1687