1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2022 Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "bdev_raid.h" 7 8 #include "spdk/env.h" 9 #include "spdk/thread.h" 10 #include "spdk/string.h" 11 #include "spdk/util.h" 12 #include "spdk/likely.h" 13 #include "spdk/log.h" 14 #include "spdk/xor.h" 15 16 /* Maximum concurrent full stripe writes per io channel */ 17 #define RAID5F_MAX_STRIPES 32 18 19 struct chunk { 20 /* Corresponds to base_bdev index */ 21 uint8_t index; 22 23 /* Array of iovecs */ 24 struct iovec *iovs; 25 26 /* Number of used iovecs */ 27 int iovcnt; 28 29 /* Total number of available iovecs in the array */ 30 int iovcnt_max; 31 32 /* Pointer to buffer with I/O metadata */ 33 void *md_buf; 34 35 /* Shallow copy of IO request parameters */ 36 struct spdk_bdev_ext_io_opts ext_opts; 37 }; 38 39 struct stripe_request { 40 struct raid5f_io_channel *r5ch; 41 42 /* The associated raid_bdev_io */ 43 struct raid_bdev_io *raid_io; 44 45 /* The stripe's index in the raid array. */ 46 uint64_t stripe_index; 47 48 /* The stripe's parity chunk */ 49 struct chunk *parity_chunk; 50 51 /* Buffer for stripe parity */ 52 void *parity_buf; 53 54 /* Buffer for stripe io metadata parity */ 55 void *parity_md_buf; 56 57 TAILQ_ENTRY(stripe_request) link; 58 59 /* Array of chunks corresponding to base_bdevs */ 60 struct chunk chunks[0]; 61 }; 62 63 struct raid5f_info { 64 /* The parent raid bdev */ 65 struct raid_bdev *raid_bdev; 66 67 /* Number of data blocks in a stripe (without parity) */ 68 uint64_t stripe_blocks; 69 70 /* Number of stripes on this array */ 71 uint64_t total_stripes; 72 73 /* Alignment for buffer allocation */ 74 size_t buf_alignment; 75 }; 76 77 struct raid5f_io_channel { 78 /* All available stripe requests on this channel */ 79 TAILQ_HEAD(, stripe_request) free_stripe_requests; 80 81 /* Array of iovec iterators for each data chunk */ 82 struct iov_iter { 83 struct iovec *iovs; 84 int iovcnt; 85 int index; 86 size_t offset; 87 } *chunk_iov_iters; 88 89 /* Array of source buffer pointers for parity calculation */ 90 void **chunk_xor_buffers; 91 92 /* Array of source buffer pointers for parity calculation of io metadata */ 93 void **chunk_xor_md_buffers; 94 95 /* Bounce buffers for parity calculation in case of unaligned source buffers */ 96 struct iovec *chunk_xor_bounce_buffers; 97 }; 98 99 #define __CHUNK_IN_RANGE(req, c) \ 100 c < req->chunks + raid5f_ch_to_r5f_info(req->r5ch)->raid_bdev->num_base_bdevs 101 102 #define FOR_EACH_CHUNK_FROM(req, c, from) \ 103 for (c = from; __CHUNK_IN_RANGE(req, c); c++) 104 105 #define FOR_EACH_CHUNK(req, c) \ 106 FOR_EACH_CHUNK_FROM(req, c, req->chunks) 107 108 #define __NEXT_DATA_CHUNK(req, c) \ 109 c == req->parity_chunk ? c+1 : c 110 111 #define FOR_EACH_DATA_CHUNK(req, c) \ 112 for (c = __NEXT_DATA_CHUNK(req, req->chunks); __CHUNK_IN_RANGE(req, c); \ 113 c = __NEXT_DATA_CHUNK(req, c+1)) 114 115 static inline struct raid5f_info * 116 raid5f_ch_to_r5f_info(struct raid5f_io_channel *r5ch) 117 { 118 return spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(r5ch)); 119 } 120 121 static inline struct stripe_request * 122 raid5f_chunk_stripe_req(struct chunk *chunk) 123 { 124 return SPDK_CONTAINEROF((chunk - chunk->index), struct stripe_request, chunks); 125 } 126 127 static inline uint8_t 128 raid5f_stripe_data_chunks_num(const struct raid_bdev *raid_bdev) 129 { 130 return raid_bdev->min_base_bdevs_operational; 131 } 132 133 static inline uint8_t 134 raid5f_stripe_parity_chunk_index(const struct raid_bdev *raid_bdev, uint64_t stripe_index) 135 { 136 return raid5f_stripe_data_chunks_num(raid_bdev) - stripe_index % raid_bdev->num_base_bdevs; 137 } 138 139 static inline void 140 raid5f_stripe_request_release(struct stripe_request *stripe_req) 141 { 142 TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests, stripe_req, link); 143 } 144 145 static int 146 raid5f_xor_stripe(struct stripe_request *stripe_req) 147 { 148 struct raid_bdev_io *raid_io = stripe_req->raid_io; 149 struct raid5f_io_channel *r5ch = stripe_req->r5ch; 150 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 151 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 152 size_t remaining = raid_bdev->strip_size << raid_bdev->blocklen_shift; 153 uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev); 154 void *dest = stripe_req->parity_buf; 155 size_t alignment_mask = spdk_xor_get_optimal_alignment() - 1; 156 void *raid_md = spdk_bdev_io_get_md_buf(bdev_io); 157 uint32_t raid_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev); 158 struct chunk *chunk; 159 int ret; 160 uint8_t c; 161 162 c = 0; 163 FOR_EACH_DATA_CHUNK(stripe_req, chunk) { 164 struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[c]; 165 bool aligned = true; 166 int i; 167 168 for (i = 0; i < chunk->iovcnt; i++) { 169 if (((uintptr_t)chunk->iovs[i].iov_base & alignment_mask) || 170 (chunk->iovs[i].iov_len & alignment_mask)) { 171 aligned = false; 172 break; 173 } 174 } 175 176 if (aligned) { 177 iov_iter->iovs = chunk->iovs; 178 iov_iter->iovcnt = chunk->iovcnt; 179 } else { 180 iov_iter->iovs = &r5ch->chunk_xor_bounce_buffers[c]; 181 iov_iter->iovcnt = 1; 182 spdk_iovcpy(chunk->iovs, chunk->iovcnt, iov_iter->iovs, iov_iter->iovcnt); 183 } 184 185 iov_iter->index = 0; 186 iov_iter->offset = 0; 187 188 c++; 189 } 190 191 while (remaining > 0) { 192 size_t len = remaining; 193 uint8_t i; 194 195 for (i = 0; i < n_src; i++) { 196 struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[i]; 197 struct iovec *iov = &iov_iter->iovs[iov_iter->index]; 198 199 len = spdk_min(len, iov->iov_len - iov_iter->offset); 200 r5ch->chunk_xor_buffers[i] = iov->iov_base + iov_iter->offset; 201 } 202 203 assert(len > 0); 204 205 ret = spdk_xor_gen(dest, r5ch->chunk_xor_buffers, n_src, len); 206 if (spdk_unlikely(ret)) { 207 SPDK_ERRLOG("stripe xor failed\n"); 208 return ret; 209 } 210 211 for (i = 0; i < n_src; i++) { 212 struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[i]; 213 struct iovec *iov = &iov_iter->iovs[iov_iter->index]; 214 215 iov_iter->offset += len; 216 if (iov_iter->offset == iov->iov_len) { 217 iov_iter->offset = 0; 218 iov_iter->index++; 219 } 220 } 221 dest += len; 222 223 remaining -= len; 224 } 225 226 if (raid_md != NULL) { 227 uint64_t len = raid_bdev->strip_size * raid_md_size; 228 c = 0; 229 FOR_EACH_DATA_CHUNK(stripe_req, chunk) { 230 r5ch->chunk_xor_md_buffers[c] = chunk->md_buf; 231 c++; 232 } 233 ret = spdk_xor_gen(stripe_req->parity_md_buf, r5ch->chunk_xor_md_buffers, n_src, len); 234 if (spdk_unlikely(ret)) { 235 SPDK_ERRLOG("stripe io metadata xor failed\n"); 236 return ret; 237 } 238 } 239 240 return 0; 241 } 242 243 static void 244 raid5f_chunk_write_complete(struct chunk *chunk, enum spdk_bdev_io_status status) 245 { 246 struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk); 247 248 if (raid_bdev_io_complete_part(stripe_req->raid_io, 1, status)) { 249 raid5f_stripe_request_release(stripe_req); 250 } 251 } 252 253 static void 254 raid5f_chunk_write_complete_bdev_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 255 { 256 struct chunk *chunk = cb_arg; 257 258 spdk_bdev_free_io(bdev_io); 259 260 raid5f_chunk_write_complete(chunk, success ? SPDK_BDEV_IO_STATUS_SUCCESS : 261 SPDK_BDEV_IO_STATUS_FAILED); 262 } 263 264 static void raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req); 265 266 static void 267 raid5f_chunk_write_retry(void *_raid_io) 268 { 269 struct raid_bdev_io *raid_io = _raid_io; 270 struct stripe_request *stripe_req = raid_io->module_private; 271 272 raid5f_stripe_request_submit_chunks(stripe_req); 273 } 274 275 static inline void 276 raid5f_init_ext_io_opts(struct spdk_bdev_io *bdev_io, struct spdk_bdev_ext_io_opts *opts) 277 { 278 memset(opts, 0, sizeof(*opts)); 279 opts->size = sizeof(*opts); 280 opts->memory_domain = bdev_io->u.bdev.memory_domain; 281 opts->memory_domain_ctx = bdev_io->u.bdev.memory_domain_ctx; 282 opts->metadata = bdev_io->u.bdev.md_buf; 283 } 284 285 static int 286 raid5f_chunk_write(struct chunk *chunk) 287 { 288 struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk); 289 struct raid_bdev_io *raid_io = stripe_req->raid_io; 290 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 291 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 292 struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk->index]; 293 struct spdk_io_channel *base_ch = raid_io->raid_ch->base_channel[chunk->index]; 294 uint64_t base_offset_blocks = (stripe_req->stripe_index << raid_bdev->strip_size_shift); 295 int ret; 296 297 raid5f_init_ext_io_opts(bdev_io, &chunk->ext_opts); 298 chunk->ext_opts.metadata = chunk->md_buf; 299 300 ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, chunk->iovs, chunk->iovcnt, 301 base_offset_blocks, raid_bdev->strip_size, raid5f_chunk_write_complete_bdev_io, 302 chunk, &chunk->ext_opts); 303 304 if (spdk_unlikely(ret)) { 305 if (ret == -ENOMEM) { 306 raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, 307 raid5f_chunk_write_retry); 308 } else { 309 /* 310 * Implicitly complete any I/Os not yet submitted as FAILED. If completing 311 * these means there are no more to complete for the stripe request, we can 312 * release the stripe request as well. 313 */ 314 uint64_t base_bdev_io_not_submitted = raid_bdev->num_base_bdevs - 315 raid_io->base_bdev_io_submitted; 316 317 if (raid_bdev_io_complete_part(stripe_req->raid_io, base_bdev_io_not_submitted, 318 SPDK_BDEV_IO_STATUS_FAILED)) { 319 raid5f_stripe_request_release(stripe_req); 320 } 321 } 322 } 323 324 return ret; 325 } 326 327 static int 328 raid5f_stripe_request_map_iovecs(struct stripe_request *stripe_req) 329 { 330 struct raid_bdev *raid_bdev = stripe_req->raid_io->raid_bdev; 331 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(stripe_req->raid_io); 332 const struct iovec *raid_io_iovs = bdev_io->u.bdev.iovs; 333 int raid_io_iovcnt = bdev_io->u.bdev.iovcnt; 334 void *raid_io_md = spdk_bdev_io_get_md_buf(bdev_io); 335 uint32_t raid_io_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev); 336 struct chunk *chunk; 337 int raid_io_iov_idx = 0; 338 size_t raid_io_offset = 0; 339 size_t raid_io_iov_offset = 0; 340 int i; 341 342 FOR_EACH_DATA_CHUNK(stripe_req, chunk) { 343 int chunk_iovcnt = 0; 344 uint64_t len = raid_bdev->strip_size << raid_bdev->blocklen_shift; 345 size_t off = raid_io_iov_offset; 346 347 for (i = raid_io_iov_idx; i < raid_io_iovcnt; i++) { 348 chunk_iovcnt++; 349 off += raid_io_iovs[i].iov_len; 350 if (off >= raid_io_offset + len) { 351 break; 352 } 353 } 354 355 assert(raid_io_iov_idx + chunk_iovcnt <= raid_io_iovcnt); 356 357 if (chunk_iovcnt > chunk->iovcnt_max) { 358 struct iovec *iovs = chunk->iovs; 359 360 iovs = realloc(iovs, chunk_iovcnt * sizeof(*iovs)); 361 if (!iovs) { 362 return -ENOMEM; 363 } 364 chunk->iovs = iovs; 365 chunk->iovcnt_max = chunk_iovcnt; 366 } 367 chunk->iovcnt = chunk_iovcnt; 368 369 if (raid_io_md) { 370 chunk->md_buf = raid_io_md + 371 (raid_io_offset >> raid_bdev->blocklen_shift) * raid_io_md_size; 372 } 373 374 for (i = 0; i < chunk_iovcnt; i++) { 375 struct iovec *chunk_iov = &chunk->iovs[i]; 376 const struct iovec *raid_io_iov = &raid_io_iovs[raid_io_iov_idx]; 377 size_t chunk_iov_offset = raid_io_offset - raid_io_iov_offset; 378 379 chunk_iov->iov_base = raid_io_iov->iov_base + chunk_iov_offset; 380 chunk_iov->iov_len = spdk_min(len, raid_io_iov->iov_len - chunk_iov_offset); 381 raid_io_offset += chunk_iov->iov_len; 382 len -= chunk_iov->iov_len; 383 384 if (raid_io_offset >= raid_io_iov_offset + raid_io_iov->iov_len) { 385 raid_io_iov_idx++; 386 raid_io_iov_offset += raid_io_iov->iov_len; 387 } 388 } 389 390 if (spdk_unlikely(len > 0)) { 391 return -EINVAL; 392 } 393 } 394 395 stripe_req->parity_chunk->iovs[0].iov_base = stripe_req->parity_buf; 396 stripe_req->parity_chunk->iovs[0].iov_len = raid_bdev->strip_size << 397 raid_bdev->blocklen_shift; 398 stripe_req->parity_chunk->md_buf = stripe_req->parity_md_buf; 399 stripe_req->parity_chunk->iovcnt = 1; 400 401 return 0; 402 } 403 404 static void 405 raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req) 406 { 407 struct raid_bdev_io *raid_io = stripe_req->raid_io; 408 struct chunk *start = &stripe_req->chunks[raid_io->base_bdev_io_submitted]; 409 struct chunk *chunk; 410 411 FOR_EACH_CHUNK_FROM(stripe_req, chunk, start) { 412 if (spdk_unlikely(raid5f_chunk_write(chunk) != 0)) { 413 break; 414 } 415 raid_io->base_bdev_io_submitted++; 416 } 417 } 418 419 static void 420 raid5f_submit_stripe_request(struct stripe_request *stripe_req) 421 { 422 if (spdk_unlikely(raid5f_xor_stripe(stripe_req) != 0)) { 423 raid_bdev_io_complete(stripe_req->raid_io, SPDK_BDEV_IO_STATUS_FAILED); 424 return; 425 } 426 427 raid5f_stripe_request_submit_chunks(stripe_req); 428 } 429 430 static int 431 raid5f_submit_write_request(struct raid_bdev_io *raid_io, uint64_t stripe_index) 432 { 433 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 434 struct raid5f_io_channel *r5ch = spdk_io_channel_get_ctx(raid_io->raid_ch->module_channel); 435 struct stripe_request *stripe_req; 436 int ret; 437 438 stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests); 439 if (!stripe_req) { 440 return -ENOMEM; 441 } 442 443 stripe_req->stripe_index = stripe_index; 444 stripe_req->parity_chunk = stripe_req->chunks + raid5f_stripe_parity_chunk_index(raid_bdev, 445 stripe_req->stripe_index); 446 stripe_req->raid_io = raid_io; 447 448 ret = raid5f_stripe_request_map_iovecs(stripe_req); 449 if (spdk_unlikely(ret)) { 450 return ret; 451 } 452 453 TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link); 454 455 raid_io->module_private = stripe_req; 456 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; 457 458 raid5f_submit_stripe_request(stripe_req); 459 460 return 0; 461 } 462 463 static void 464 raid5f_chunk_read_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 465 { 466 struct raid_bdev_io *raid_io = cb_arg; 467 468 spdk_bdev_free_io(bdev_io); 469 470 raid_bdev_io_complete(raid_io, success ? SPDK_BDEV_IO_STATUS_SUCCESS : 471 SPDK_BDEV_IO_STATUS_FAILED); 472 } 473 474 static void raid5f_submit_rw_request(struct raid_bdev_io *raid_io); 475 476 static void 477 _raid5f_submit_rw_request(void *_raid_io) 478 { 479 struct raid_bdev_io *raid_io = _raid_io; 480 481 raid5f_submit_rw_request(raid_io); 482 } 483 484 static int 485 raid5f_submit_read_request(struct raid_bdev_io *raid_io, uint64_t stripe_index, 486 uint64_t stripe_offset) 487 { 488 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 489 uint8_t chunk_data_idx = stripe_offset >> raid_bdev->strip_size_shift; 490 uint8_t p_idx = raid5f_stripe_parity_chunk_index(raid_bdev, stripe_index); 491 uint8_t chunk_idx = chunk_data_idx < p_idx ? chunk_data_idx : chunk_data_idx + 1; 492 struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk_idx]; 493 struct spdk_io_channel *base_ch = raid_io->raid_ch->base_channel[chunk_idx]; 494 uint64_t chunk_offset = stripe_offset - (chunk_data_idx << raid_bdev->strip_size_shift); 495 uint64_t base_offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + chunk_offset; 496 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 497 struct spdk_bdev_ext_io_opts io_opts; 498 int ret; 499 500 raid5f_init_ext_io_opts(bdev_io, &io_opts); 501 ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, bdev_io->u.bdev.iovs, 502 bdev_io->u.bdev.iovcnt, 503 base_offset_blocks, bdev_io->u.bdev.num_blocks, raid5f_chunk_read_complete, raid_io, 504 &io_opts); 505 506 if (spdk_unlikely(ret == -ENOMEM)) { 507 raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, 508 _raid5f_submit_rw_request); 509 return 0; 510 } 511 512 return ret; 513 } 514 515 static void 516 raid5f_submit_rw_request(struct raid_bdev_io *raid_io) 517 { 518 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 519 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 520 struct raid5f_info *r5f_info = raid_bdev->module_private; 521 uint64_t offset_blocks = bdev_io->u.bdev.offset_blocks; 522 uint64_t stripe_index = offset_blocks / r5f_info->stripe_blocks; 523 uint64_t stripe_offset = offset_blocks % r5f_info->stripe_blocks; 524 int ret; 525 526 switch (bdev_io->type) { 527 case SPDK_BDEV_IO_TYPE_READ: 528 assert(bdev_io->u.bdev.num_blocks <= raid_bdev->strip_size); 529 ret = raid5f_submit_read_request(raid_io, stripe_index, stripe_offset); 530 break; 531 case SPDK_BDEV_IO_TYPE_WRITE: 532 assert(stripe_offset == 0); 533 assert(bdev_io->u.bdev.num_blocks == r5f_info->stripe_blocks); 534 ret = raid5f_submit_write_request(raid_io, stripe_index); 535 break; 536 default: 537 ret = -EINVAL; 538 break; 539 } 540 541 if (spdk_unlikely(ret)) { 542 raid_bdev_io_complete(raid_io, ret == -ENOMEM ? SPDK_BDEV_IO_STATUS_NOMEM : 543 SPDK_BDEV_IO_STATUS_FAILED); 544 } 545 } 546 547 static void 548 raid5f_stripe_request_free(struct stripe_request *stripe_req) 549 { 550 struct chunk *chunk; 551 552 FOR_EACH_CHUNK(stripe_req, chunk) { 553 free(chunk->iovs); 554 } 555 556 spdk_dma_free(stripe_req->parity_buf); 557 spdk_dma_free(stripe_req->parity_md_buf); 558 559 free(stripe_req); 560 } 561 562 static struct stripe_request * 563 raid5f_stripe_request_alloc(struct raid5f_io_channel *r5ch) 564 { 565 struct raid5f_info *r5f_info = raid5f_ch_to_r5f_info(r5ch); 566 struct raid_bdev *raid_bdev = r5f_info->raid_bdev; 567 uint32_t raid_io_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev); 568 struct stripe_request *stripe_req; 569 struct chunk *chunk; 570 571 stripe_req = calloc(1, sizeof(*stripe_req) + 572 sizeof(struct chunk) * raid_bdev->num_base_bdevs); 573 if (!stripe_req) { 574 return NULL; 575 } 576 577 stripe_req->r5ch = r5ch; 578 579 FOR_EACH_CHUNK(stripe_req, chunk) { 580 chunk->index = chunk - stripe_req->chunks; 581 chunk->iovcnt_max = 4; 582 chunk->iovs = calloc(chunk->iovcnt_max, sizeof(chunk->iovs[0])); 583 if (!chunk->iovs) { 584 goto err; 585 } 586 } 587 588 stripe_req->parity_buf = spdk_dma_malloc(raid_bdev->strip_size << raid_bdev->blocklen_shift, 589 r5f_info->buf_alignment, NULL); 590 if (!stripe_req->parity_buf) { 591 goto err; 592 } 593 594 if (raid_io_md_size != 0) { 595 stripe_req->parity_md_buf = spdk_dma_malloc(raid_bdev->strip_size * raid_io_md_size, 596 r5f_info->buf_alignment, NULL); 597 if (!stripe_req->parity_md_buf) { 598 goto err; 599 } 600 } 601 602 return stripe_req; 603 err: 604 raid5f_stripe_request_free(stripe_req); 605 return NULL; 606 } 607 608 static void 609 raid5f_ioch_destroy(void *io_device, void *ctx_buf) 610 { 611 struct raid5f_io_channel *r5ch = ctx_buf; 612 struct raid5f_info *r5f_info = io_device; 613 struct raid_bdev *raid_bdev = r5f_info->raid_bdev; 614 struct stripe_request *stripe_req; 615 int i; 616 617 while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests))) { 618 TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link); 619 raid5f_stripe_request_free(stripe_req); 620 } 621 622 if (r5ch->chunk_xor_bounce_buffers) { 623 for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) { 624 free(r5ch->chunk_xor_bounce_buffers[i].iov_base); 625 } 626 free(r5ch->chunk_xor_bounce_buffers); 627 } 628 629 free(r5ch->chunk_xor_buffers); 630 free(r5ch->chunk_xor_md_buffers); 631 free(r5ch->chunk_iov_iters); 632 } 633 634 static int 635 raid5f_ioch_create(void *io_device, void *ctx_buf) 636 { 637 struct raid5f_io_channel *r5ch = ctx_buf; 638 struct raid5f_info *r5f_info = io_device; 639 struct raid_bdev *raid_bdev = r5f_info->raid_bdev; 640 size_t chunk_len = raid_bdev->strip_size << raid_bdev->blocklen_shift; 641 int status = 0; 642 int i; 643 644 TAILQ_INIT(&r5ch->free_stripe_requests); 645 646 for (i = 0; i < RAID5F_MAX_STRIPES; i++) { 647 struct stripe_request *stripe_req; 648 649 stripe_req = raid5f_stripe_request_alloc(r5ch); 650 if (!stripe_req) { 651 status = -ENOMEM; 652 goto out; 653 } 654 655 TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests, stripe_req, link); 656 } 657 658 r5ch->chunk_iov_iters = calloc(raid5f_stripe_data_chunks_num(raid_bdev), 659 sizeof(r5ch->chunk_iov_iters[0])); 660 if (!r5ch->chunk_iov_iters) { 661 status = -ENOMEM; 662 goto out; 663 } 664 665 r5ch->chunk_xor_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev), 666 sizeof(r5ch->chunk_xor_buffers[0])); 667 if (!r5ch->chunk_xor_buffers) { 668 status = -ENOMEM; 669 goto out; 670 } 671 672 r5ch->chunk_xor_md_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev), 673 sizeof(r5ch->chunk_xor_md_buffers[0])); 674 if (!r5ch->chunk_xor_md_buffers) { 675 status = -ENOMEM; 676 goto out; 677 } 678 679 r5ch->chunk_xor_bounce_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev), 680 sizeof(r5ch->chunk_xor_bounce_buffers[0])); 681 if (!r5ch->chunk_xor_bounce_buffers) { 682 status = -ENOMEM; 683 goto out; 684 } 685 686 for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) { 687 status = posix_memalign(&r5ch->chunk_xor_bounce_buffers[i].iov_base, 688 spdk_xor_get_optimal_alignment(), chunk_len); 689 if (status) { 690 goto out; 691 } 692 r5ch->chunk_xor_bounce_buffers[i].iov_len = chunk_len; 693 } 694 out: 695 if (status) { 696 SPDK_ERRLOG("Failed to initialize io channel\n"); 697 raid5f_ioch_destroy(r5f_info, r5ch); 698 } 699 return status; 700 } 701 702 static int 703 raid5f_start(struct raid_bdev *raid_bdev) 704 { 705 uint64_t min_blockcnt = UINT64_MAX; 706 struct raid_base_bdev_info *base_info; 707 struct raid5f_info *r5f_info; 708 size_t alignment; 709 710 r5f_info = calloc(1, sizeof(*r5f_info)); 711 if (!r5f_info) { 712 SPDK_ERRLOG("Failed to allocate r5f_info\n"); 713 return -ENOMEM; 714 } 715 r5f_info->raid_bdev = raid_bdev; 716 717 alignment = spdk_xor_get_optimal_alignment(); 718 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 719 min_blockcnt = spdk_min(min_blockcnt, base_info->bdev->blockcnt); 720 alignment = spdk_max(alignment, spdk_bdev_get_buf_align(base_info->bdev)); 721 } 722 723 r5f_info->total_stripes = min_blockcnt / raid_bdev->strip_size; 724 r5f_info->stripe_blocks = raid_bdev->strip_size * raid5f_stripe_data_chunks_num(raid_bdev); 725 r5f_info->buf_alignment = alignment; 726 727 raid_bdev->bdev.blockcnt = r5f_info->stripe_blocks * r5f_info->total_stripes; 728 raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size; 729 raid_bdev->bdev.split_on_optimal_io_boundary = true; 730 raid_bdev->bdev.write_unit_size = r5f_info->stripe_blocks; 731 raid_bdev->bdev.split_on_write_unit = true; 732 733 raid_bdev->module_private = r5f_info; 734 735 spdk_io_device_register(r5f_info, raid5f_ioch_create, raid5f_ioch_destroy, 736 sizeof(struct raid5f_io_channel), NULL); 737 738 return 0; 739 } 740 741 static void 742 raid5f_io_device_unregister_done(void *io_device) 743 { 744 struct raid5f_info *r5f_info = io_device; 745 746 raid_bdev_module_stop_done(r5f_info->raid_bdev); 747 748 free(r5f_info); 749 } 750 751 static bool 752 raid5f_stop(struct raid_bdev *raid_bdev) 753 { 754 struct raid5f_info *r5f_info = raid_bdev->module_private; 755 756 spdk_io_device_unregister(r5f_info, raid5f_io_device_unregister_done); 757 758 return false; 759 } 760 761 static struct spdk_io_channel * 762 raid5f_get_io_channel(struct raid_bdev *raid_bdev) 763 { 764 struct raid5f_info *r5f_info = raid_bdev->module_private; 765 766 return spdk_get_io_channel(r5f_info); 767 } 768 769 static struct raid_bdev_module g_raid5f_module = { 770 .level = RAID5F, 771 .base_bdevs_min = 3, 772 .base_bdevs_constraint = {CONSTRAINT_MAX_BASE_BDEVS_REMOVED, 1}, 773 .start = raid5f_start, 774 .stop = raid5f_stop, 775 .submit_rw_request = raid5f_submit_rw_request, 776 .get_io_channel = raid5f_get_io_channel, 777 }; 778 RAID_MODULE_REGISTER(&g_raid5f_module) 779 780 SPDK_LOG_REGISTER_COMPONENT(bdev_raid5f) 781