1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2022 Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "bdev_raid.h" 7 8 #include "spdk/likely.h" 9 #include "spdk/log.h" 10 11 struct raid1_info { 12 /* The parent raid bdev */ 13 struct raid_bdev *raid_bdev; 14 }; 15 16 struct raid1_io_channel { 17 /* Array of per-base_bdev counters of outstanding read blocks on this channel */ 18 uint64_t read_blocks_outstanding[0]; 19 }; 20 21 static void 22 raid1_channel_inc_read_counters(struct raid_bdev_io_channel *raid_ch, uint8_t idx, 23 uint64_t num_blocks) 24 { 25 struct raid1_io_channel *raid1_ch = raid_bdev_channel_get_module_ctx(raid_ch); 26 27 assert(raid1_ch->read_blocks_outstanding[idx] <= UINT64_MAX - num_blocks); 28 raid1_ch->read_blocks_outstanding[idx] += num_blocks; 29 } 30 31 static void 32 raid1_channel_dec_read_counters(struct raid_bdev_io_channel *raid_ch, uint8_t idx, 33 uint64_t num_blocks) 34 { 35 struct raid1_io_channel *raid1_ch = raid_bdev_channel_get_module_ctx(raid_ch); 36 37 assert(raid1_ch->read_blocks_outstanding[idx] >= num_blocks); 38 raid1_ch->read_blocks_outstanding[idx] -= num_blocks; 39 } 40 41 static void 42 raid1_init_ext_io_opts(struct spdk_bdev_ext_io_opts *opts, struct raid_bdev_io *raid_io) 43 { 44 memset(opts, 0, sizeof(*opts)); 45 opts->size = sizeof(*opts); 46 opts->memory_domain = raid_io->memory_domain; 47 opts->memory_domain_ctx = raid_io->memory_domain_ctx; 48 opts->metadata = raid_io->md_buf; 49 } 50 51 static void 52 raid1_write_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 53 { 54 struct raid_bdev_io *raid_io = cb_arg; 55 56 if (!success) { 57 struct raid_base_bdev_info *base_info; 58 59 base_info = raid_bdev_channel_get_base_info(raid_io->raid_ch, bdev_io->bdev); 60 if (base_info) { 61 raid_bdev_fail_base_bdev(base_info); 62 } 63 } 64 65 spdk_bdev_free_io(bdev_io); 66 67 raid_bdev_io_complete_part(raid_io, 1, success ? 68 SPDK_BDEV_IO_STATUS_SUCCESS : 69 SPDK_BDEV_IO_STATUS_FAILED); 70 } 71 72 static struct raid_base_bdev_info * 73 raid1_get_read_io_base_bdev(struct raid_bdev_io *raid_io) 74 { 75 assert(raid_io->type == SPDK_BDEV_IO_TYPE_READ); 76 return &raid_io->raid_bdev->base_bdev_info[raid_io->base_bdev_io_submitted]; 77 } 78 79 static void 80 raid1_correct_read_error_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 81 { 82 struct raid_bdev_io *raid_io = cb_arg; 83 84 spdk_bdev_free_io(bdev_io); 85 86 if (!success) { 87 struct raid_base_bdev_info *base_info = raid1_get_read_io_base_bdev(raid_io); 88 89 /* Writing to the bdev that had the read error failed so fail the base bdev 90 * but complete the raid_io successfully. */ 91 raid_bdev_fail_base_bdev(base_info); 92 } 93 94 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS); 95 } 96 97 static void 98 raid1_correct_read_error(void *_raid_io) 99 { 100 struct raid_bdev_io *raid_io = _raid_io; 101 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 102 struct spdk_bdev_ext_io_opts io_opts; 103 struct raid_base_bdev_info *base_info; 104 struct spdk_io_channel *base_ch; 105 uint8_t i; 106 int ret; 107 108 i = raid_io->base_bdev_io_submitted; 109 base_info = &raid_bdev->base_bdev_info[i]; 110 base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch, i); 111 assert(base_ch != NULL); 112 113 raid1_init_ext_io_opts(&io_opts, raid_io); 114 ret = raid_bdev_writev_blocks_ext(base_info, base_ch, raid_io->iovs, raid_io->iovcnt, 115 raid_io->offset_blocks, raid_io->num_blocks, 116 raid1_correct_read_error_completion, raid_io, &io_opts); 117 if (spdk_unlikely(ret != 0)) { 118 if (ret == -ENOMEM) { 119 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), 120 base_ch, raid1_correct_read_error); 121 } else { 122 raid_bdev_fail_base_bdev(base_info); 123 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS); 124 } 125 } 126 } 127 128 static void raid1_read_other_base_bdev(void *_raid_io); 129 130 static void 131 raid1_read_other_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 132 { 133 struct raid_bdev_io *raid_io = cb_arg; 134 135 spdk_bdev_free_io(bdev_io); 136 137 if (!success) { 138 assert(raid_io->base_bdev_io_remaining > 0); 139 raid_io->base_bdev_io_remaining--; 140 raid1_read_other_base_bdev(raid_io); 141 return; 142 } 143 144 /* try to correct the read error by writing data read from the other base bdev */ 145 raid1_correct_read_error(raid_io); 146 } 147 148 static void 149 raid1_read_other_base_bdev(void *_raid_io) 150 { 151 struct raid_bdev_io *raid_io = _raid_io; 152 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 153 struct spdk_bdev_ext_io_opts io_opts; 154 struct raid_base_bdev_info *base_info; 155 struct spdk_io_channel *base_ch; 156 uint8_t i; 157 int ret; 158 159 for (i = raid_bdev->num_base_bdevs - raid_io->base_bdev_io_remaining; i < raid_bdev->num_base_bdevs; 160 i++) { 161 base_info = &raid_bdev->base_bdev_info[i]; 162 base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch, i); 163 164 if (base_ch == NULL || i == raid_io->base_bdev_io_submitted) { 165 raid_io->base_bdev_io_remaining--; 166 continue; 167 } 168 169 raid1_init_ext_io_opts(&io_opts, raid_io); 170 ret = raid_bdev_readv_blocks_ext(base_info, base_ch, raid_io->iovs, raid_io->iovcnt, 171 raid_io->offset_blocks, raid_io->num_blocks, 172 raid1_read_other_completion, raid_io, &io_opts); 173 if (spdk_unlikely(ret != 0)) { 174 if (ret == -ENOMEM) { 175 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), 176 base_ch, raid1_read_other_base_bdev); 177 } else { 178 break; 179 } 180 } 181 return; 182 } 183 184 base_info = raid1_get_read_io_base_bdev(raid_io); 185 raid_bdev_fail_base_bdev(base_info); 186 187 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 188 } 189 190 static void 191 raid1_read_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 192 { 193 struct raid_bdev_io *raid_io = cb_arg; 194 195 spdk_bdev_free_io(bdev_io); 196 197 raid1_channel_dec_read_counters(raid_io->raid_ch, raid_io->base_bdev_io_submitted, 198 raid_io->num_blocks); 199 200 if (!success) { 201 raid_io->base_bdev_io_remaining = raid_io->raid_bdev->num_base_bdevs; 202 raid1_read_other_base_bdev(raid_io); 203 return; 204 } 205 206 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS); 207 } 208 209 static void raid1_submit_rw_request(struct raid_bdev_io *raid_io); 210 211 static void 212 _raid1_submit_rw_request(void *_raid_io) 213 { 214 struct raid_bdev_io *raid_io = _raid_io; 215 216 raid1_submit_rw_request(raid_io); 217 } 218 219 static uint8_t 220 raid1_channel_next_read_base_bdev(struct raid_bdev *raid_bdev, struct raid_bdev_io_channel *raid_ch) 221 { 222 struct raid1_io_channel *raid1_ch = raid_bdev_channel_get_module_ctx(raid_ch); 223 uint64_t read_blocks_min = UINT64_MAX; 224 uint8_t idx = UINT8_MAX; 225 uint8_t i; 226 227 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 228 if (raid_bdev_channel_get_base_channel(raid_ch, i) != NULL && 229 raid1_ch->read_blocks_outstanding[i] < read_blocks_min) { 230 read_blocks_min = raid1_ch->read_blocks_outstanding[i]; 231 idx = i; 232 } 233 } 234 235 return idx; 236 } 237 238 static int 239 raid1_submit_read_request(struct raid_bdev_io *raid_io) 240 { 241 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 242 struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; 243 struct spdk_bdev_ext_io_opts io_opts; 244 struct raid_base_bdev_info *base_info; 245 struct spdk_io_channel *base_ch; 246 uint8_t idx; 247 int ret; 248 249 idx = raid1_channel_next_read_base_bdev(raid_bdev, raid_ch); 250 if (spdk_unlikely(idx == UINT8_MAX)) { 251 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 252 return 0; 253 } 254 255 base_info = &raid_bdev->base_bdev_info[idx]; 256 base_ch = raid_bdev_channel_get_base_channel(raid_ch, idx); 257 258 raid1_init_ext_io_opts(&io_opts, raid_io); 259 ret = raid_bdev_readv_blocks_ext(base_info, base_ch, raid_io->iovs, raid_io->iovcnt, 260 raid_io->offset_blocks, raid_io->num_blocks, 261 raid1_read_bdev_io_completion, raid_io, &io_opts); 262 263 if (spdk_likely(ret == 0)) { 264 raid1_channel_inc_read_counters(raid_ch, idx, raid_io->num_blocks); 265 raid_io->base_bdev_io_submitted = idx; 266 } else if (spdk_unlikely(ret == -ENOMEM)) { 267 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), 268 base_ch, _raid1_submit_rw_request); 269 return 0; 270 } 271 272 return ret; 273 } 274 275 static int 276 raid1_submit_write_request(struct raid_bdev_io *raid_io) 277 { 278 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 279 struct spdk_bdev_ext_io_opts io_opts; 280 struct raid_base_bdev_info *base_info; 281 struct spdk_io_channel *base_ch; 282 uint8_t idx; 283 uint64_t base_bdev_io_not_submitted; 284 int ret = 0; 285 286 if (raid_io->base_bdev_io_submitted == 0) { 287 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; 288 raid_bdev_io_set_default_status(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 289 } 290 291 raid1_init_ext_io_opts(&io_opts, raid_io); 292 for (idx = raid_io->base_bdev_io_submitted; idx < raid_bdev->num_base_bdevs; idx++) { 293 base_info = &raid_bdev->base_bdev_info[idx]; 294 base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch, idx); 295 296 if (base_ch == NULL) { 297 /* skip a missing base bdev's slot */ 298 raid_io->base_bdev_io_submitted++; 299 raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_FAILED); 300 continue; 301 } 302 303 ret = raid_bdev_writev_blocks_ext(base_info, base_ch, raid_io->iovs, raid_io->iovcnt, 304 raid_io->offset_blocks, raid_io->num_blocks, 305 raid1_write_bdev_io_completion, raid_io, &io_opts); 306 if (spdk_unlikely(ret != 0)) { 307 if (spdk_unlikely(ret == -ENOMEM)) { 308 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), 309 base_ch, _raid1_submit_rw_request); 310 return 0; 311 } 312 313 base_bdev_io_not_submitted = raid_bdev->num_base_bdevs - 314 raid_io->base_bdev_io_submitted; 315 raid_bdev_io_complete_part(raid_io, base_bdev_io_not_submitted, 316 SPDK_BDEV_IO_STATUS_FAILED); 317 return 0; 318 } 319 320 raid_io->base_bdev_io_submitted++; 321 } 322 323 if (raid_io->base_bdev_io_submitted == 0) { 324 ret = -ENODEV; 325 } 326 327 return ret; 328 } 329 330 static void 331 raid1_submit_rw_request(struct raid_bdev_io *raid_io) 332 { 333 int ret; 334 335 switch (raid_io->type) { 336 case SPDK_BDEV_IO_TYPE_READ: 337 ret = raid1_submit_read_request(raid_io); 338 break; 339 case SPDK_BDEV_IO_TYPE_WRITE: 340 ret = raid1_submit_write_request(raid_io); 341 break; 342 default: 343 ret = -EINVAL; 344 break; 345 } 346 347 if (spdk_unlikely(ret != 0)) { 348 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 349 } 350 } 351 352 static void 353 raid1_ioch_destroy(void *io_device, void *ctx_buf) 354 { 355 } 356 357 static int 358 raid1_ioch_create(void *io_device, void *ctx_buf) 359 { 360 return 0; 361 } 362 363 static void 364 raid1_io_device_unregister_done(void *io_device) 365 { 366 struct raid1_info *r1info = io_device; 367 368 raid_bdev_module_stop_done(r1info->raid_bdev); 369 370 free(r1info); 371 } 372 373 static int 374 raid1_start(struct raid_bdev *raid_bdev) 375 { 376 uint64_t min_blockcnt = UINT64_MAX; 377 struct raid_base_bdev_info *base_info; 378 struct raid1_info *r1info; 379 char name[256]; 380 381 r1info = calloc(1, sizeof(*r1info)); 382 if (!r1info) { 383 SPDK_ERRLOG("Failed to allocate RAID1 info device structure\n"); 384 return -ENOMEM; 385 } 386 r1info->raid_bdev = raid_bdev; 387 388 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 389 min_blockcnt = spdk_min(min_blockcnt, base_info->data_size); 390 } 391 392 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 393 base_info->data_size = min_blockcnt; 394 } 395 396 raid_bdev->bdev.blockcnt = min_blockcnt; 397 raid_bdev->module_private = r1info; 398 399 snprintf(name, sizeof(name), "raid1_%s", raid_bdev->bdev.name); 400 spdk_io_device_register(r1info, raid1_ioch_create, raid1_ioch_destroy, 401 sizeof(struct raid1_io_channel) + raid_bdev->num_base_bdevs * sizeof(uint64_t), 402 name); 403 404 return 0; 405 } 406 407 static bool 408 raid1_stop(struct raid_bdev *raid_bdev) 409 { 410 struct raid1_info *r1info = raid_bdev->module_private; 411 412 spdk_io_device_unregister(r1info, raid1_io_device_unregister_done); 413 414 return false; 415 } 416 417 static struct spdk_io_channel * 418 raid1_get_io_channel(struct raid_bdev *raid_bdev) 419 { 420 struct raid1_info *r1info = raid_bdev->module_private; 421 422 return spdk_get_io_channel(r1info); 423 } 424 425 static void 426 raid1_process_write_completed(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 427 { 428 struct raid_bdev_process_request *process_req = cb_arg; 429 430 spdk_bdev_free_io(bdev_io); 431 432 raid_bdev_process_request_complete(process_req, success ? 0 : -EIO); 433 } 434 435 static void raid1_process_submit_write(struct raid_bdev_process_request *process_req); 436 437 static void 438 _raid1_process_submit_write(void *ctx) 439 { 440 struct raid_bdev_process_request *process_req = ctx; 441 442 raid1_process_submit_write(process_req); 443 } 444 445 static void 446 raid1_process_submit_write(struct raid_bdev_process_request *process_req) 447 { 448 struct raid_bdev_io *raid_io = &process_req->raid_io; 449 struct spdk_bdev_ext_io_opts io_opts; 450 int ret; 451 452 raid1_init_ext_io_opts(&io_opts, raid_io); 453 ret = raid_bdev_writev_blocks_ext(process_req->target, process_req->target_ch, 454 raid_io->iovs, raid_io->iovcnt, 455 raid_io->offset_blocks, raid_io->num_blocks, 456 raid1_process_write_completed, process_req, &io_opts); 457 if (spdk_unlikely(ret != 0)) { 458 if (ret == -ENOMEM) { 459 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(process_req->target->desc), 460 process_req->target_ch, _raid1_process_submit_write); 461 } else { 462 raid_bdev_process_request_complete(process_req, ret); 463 } 464 } 465 } 466 467 static void 468 raid1_process_read_completed(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status) 469 { 470 struct raid_bdev_process_request *process_req = SPDK_CONTAINEROF(raid_io, 471 struct raid_bdev_process_request, raid_io); 472 473 if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { 474 raid_bdev_process_request_complete(process_req, -EIO); 475 return; 476 } 477 478 raid1_process_submit_write(process_req); 479 } 480 481 static int 482 raid1_submit_process_request(struct raid_bdev_process_request *process_req, 483 struct raid_bdev_io_channel *raid_ch) 484 { 485 struct raid_bdev_io *raid_io = &process_req->raid_io; 486 int ret; 487 488 raid_bdev_io_init(raid_io, raid_ch, SPDK_BDEV_IO_TYPE_READ, 489 process_req->offset_blocks, process_req->num_blocks, 490 &process_req->iov, 1, process_req->md_buf, NULL, NULL); 491 raid_io->completion_cb = raid1_process_read_completed; 492 493 ret = raid1_submit_read_request(raid_io); 494 if (spdk_likely(ret == 0)) { 495 return process_req->num_blocks; 496 } else if (ret < 0) { 497 return ret; 498 } else { 499 return -EINVAL; 500 } 501 } 502 503 static bool 504 raid1_resize(struct raid_bdev *raid_bdev) 505 { 506 int rc; 507 uint64_t min_blockcnt = UINT64_MAX; 508 struct raid_base_bdev_info *base_info; 509 510 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 511 struct spdk_bdev *base_bdev; 512 513 if (base_info->desc == NULL) { 514 continue; 515 } 516 base_bdev = spdk_bdev_desc_get_bdev(base_info->desc); 517 min_blockcnt = spdk_min(min_blockcnt, base_bdev->blockcnt - base_info->data_offset); 518 } 519 520 if (min_blockcnt == raid_bdev->bdev.blockcnt) { 521 return false; 522 } 523 524 rc = spdk_bdev_notify_blockcnt_change(&raid_bdev->bdev, min_blockcnt); 525 if (rc != 0) { 526 SPDK_ERRLOG("Failed to notify blockcount change\n"); 527 return false; 528 } 529 530 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 531 base_info->data_size = min_blockcnt; 532 } 533 return true; 534 } 535 536 static struct raid_bdev_module g_raid1_module = { 537 .level = RAID1, 538 .base_bdevs_min = 2, 539 .base_bdevs_constraint = {CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL, 1}, 540 .memory_domains_supported = true, 541 .start = raid1_start, 542 .stop = raid1_stop, 543 .submit_rw_request = raid1_submit_rw_request, 544 .get_io_channel = raid1_get_io_channel, 545 .submit_process_request = raid1_submit_process_request, 546 .resize = raid1_resize, 547 }; 548 RAID_MODULE_REGISTER(&g_raid1_module) 549 550 SPDK_LOG_REGISTER_COMPONENT(bdev_raid1) 551